Ejemplo n.º 1
0
def test_tok2vec_batch_sizes(batch_size, width, embed_size):
    batch = get_batch(batch_size)
    tok2vec = Tok2Vec(width, embed_size)
    vectors, backprop = tok2vec.begin_update(batch)
    assert len(vectors) == len(batch)
    for doc_vec, doc in zip(vectors, batch):
        assert doc_vec.shape == (len(doc), width)
Ejemplo n.º 2
0
def test_tok2vec_configs(tok2vec_config):
    docs = get_batch(3)
    tok2vec = Tok2Vec(**tok2vec_config)
    vectors, backprop = tok2vec.begin_update(docs)
    assert len(vectors) == len(docs)
    assert vectors[0].shape == (len(docs[0]), tok2vec_config["width"])
    backprop(vectors)
Ejemplo n.º 3
0
def test_empty_doc():
    width = 128
    embed_size = 2000
    vocab = Vocab()
    doc = Doc(vocab, words=[])
    tok2vec = Tok2Vec(width, embed_size)
    vectors, backprop = tok2vec.begin_update([doc])
    assert len(vectors) == 1
    assert vectors[0].shape == (0, width)
Ejemplo n.º 4
0
def create_pipeline(width, embed_size, vectors_model):
    print("Load vectors")
    nlp = spacy.load(vectors_model)
    print("Start training")
    textcat = TextCategorizer(
        nlp.vocab,
        labels=["POSITIVE", "NEGATIVE"],
        model=build_textcat_model(Tok2Vec(width=width, embed_size=embed_size),
                                  2, width),
    )

    nlp.add_pipe(textcat)
    return nlp
Ejemplo n.º 5
0
def create_pipeline(lang, width, embed_size, vectors):
    if vectors:
        nlp = spacy.blank(lang)
    else:
        print("Load vectors", vectors)
        nlp = spacy.load(vectors)
    print("Start training")
    tok2vec = Tok2Vec(width=width,
                      embed_size=embed_size,
                      pretrained_vectors=vectors)
    textcat = TextCategorizer(
        nlp.vocab,
        labels=["POSITIVE", "NEGATIVE"],
        model=build_textcat_model(tok2vec, 2, width),
    )
    nlp.add_pipe(textcat)
    return nlp
Ejemplo n.º 6
0
def create_pipeline(lang, width, embed_size, vectors):
    if vectors is None:
        nlp = spacy.blank(lang)
    else:
        print("Load vectors", vectors)
        nlp = spacy.load(vectors)
    print("Start training")
    tok2vec = Tok2Vec(
        width=width,
        embed_size=embed_size,
    )
    textcat = TextCategorizer(
        nlp.vocab,
        labels=['1', '2', '3', '4'],
        model=build_textcat_model(tok2vec, 4, width),
    )
    nlp.add_pipe(textcat)
    return nlp
Ejemplo n.º 7
0
def tok2vec():
    return Tok2Vec(8, 100)