Ejemplo n.º 1
0
def get_tok2vec_kwargs():
    # This actually creates models, so seems best to put it in a function.
    return {
        "embed":
        MultiHashEmbed(
            width=32,
            rows=[500, 500, 500],
            attrs=["NORM", "PREFIX", "SHAPE"],
            include_static_vectors=False,
        ),
        "encode":
        MaxoutWindowEncoder(width=32, depth=2, maxout_pieces=2, window_size=1),
    }
Ejemplo n.º 2
0
def my_parser():
    tok2vec = build_Tok2Vec_model(
        MultiHashEmbed(
            width=321,
            attrs=["LOWER", "SHAPE"],
            rows=[5432, 5432],
            include_static_vectors=False,
        ),
        MaxoutWindowEncoder(width=321, window_size=3, maxout_pieces=4, depth=2),
    )
    parser = build_tb_parser_model(
        tok2vec=tok2vec,
        state_type="parser",
        extra_state_tokens=True,
        hidden_width=65,
        maxout_pieces=5,
        use_upper=True,
    )
    return parser
Ejemplo n.º 3
0
def test_multi_hash_embed():
    embed = MultiHashEmbed(
        width=32,
        rows=[500, 500, 500],
        attrs=["NORM", "PREFIX", "SHAPE"],
        include_static_vectors=False,
    )
    hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
    assert len(hash_embeds) == 3
    # Check they look at different columns.
    assert list(sorted(he.attrs["column"] for he in hash_embeds)) == [0, 1, 2]
    # Check they use different seeds
    assert len(set(he.attrs["seed"] for he in hash_embeds)) == 3
    # Check they all have the same number of rows
    assert [he.get_dim("nV") for he in hash_embeds] == [500, 500, 500]
    # Now try with different row factors
    embed = MultiHashEmbed(
        width=32,
        rows=[1000, 50, 250],
        attrs=["NORM", "PREFIX", "SHAPE"],
        include_static_vectors=False,
    )
    hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
    assert [he.get_dim("nV") for he in hash_embeds] == [1000, 50, 250]