Ejemplo n.º 1
0
def test_word2vec():
    sentences = [
        "This is an awesome book to learn NLP.",
        "DistilBERT is an amazing NLP model.",
        "We can interchangeably use embedding, encoding, or vectorizing.",
    ]
    splitter = Splitter()
    splitter.sent2words(sentences,
                        add_stop_words=['distilbert', 'vectorizing'])
    vectorizer = Vectorizer()
    vectorizer.word2vec(splitter.words,
                        pretrained_vectors_path=PRETRAINED_VECTORS_PATH_WIKI)
    dist_1 = spatial.distance.cosine(vectorizer.vectors[0],
                                     vectorizer.vectors[1])
    dist_2 = spatial.distance.cosine(vectorizer.vectors[0],
                                     vectorizer.vectors[2])
    assert dist_1 < dist_2
Ejemplo n.º 2
0
def test_complete():
    sentences = [
        "Alice is in the Wonderland.",
        "Alice is not in the Wonderland.",
    ]
    vectorizer = Vectorizer()
    vectorizer.bert(sentences)
    vectors_bert = vectorizer.vectors
    dist_bert = spatial.distance.cosine(vectors_bert[0], vectors_bert[1])

    splitter = Splitter()
    splitter.sent2words(sentences=sentences,
                        remove_stop_words=['not'],
                        add_stop_words=[])
    vectorizer.word2vec(splitter.words,
                        pretrained_vectors_path=PRETRAINED_VECTORS_PATH_WIKI)
    vectors_w2v = vectorizer.vectors
    dist_w2v = spatial.distance.cosine(vectors_w2v[0], vectors_w2v[1])

    print('dist_bert: {0}, dist_w2v: {1}'.format(dist_bert, dist_w2v))
    assert dist_w2v > dist_bert
Ejemplo n.º 3
0
def test_models():
    sentences = [
        "'Artificial Intelligence: Unorthodox Lessons' is an amazing book to gain insights about AI."
    ]
    splitter = Splitter()
    splitter.sent2words(sentences=sentences)
    vectorizer = Vectorizer()
    vectorizer.word2vec(splitter.words,
                        pretrained_vectors_path=PRETRAINED_VECTORS_PATH_WIKI)
    vectors_wiki = vectorizer.vectors

    sentences = [
        "'Artificial Intelligence: Unorthodox Lessons' is an amazing book to gain insights about AI."
    ]
    splitter = Splitter()
    splitter.sent2words(sentences=sentences)
    vectorizer = Vectorizer()
    vectorizer.word2vec(splitter.words,
                        pretrained_vectors_path=PRETRAINED_VECTORS_PATH_WIKI)
    vectors_fasttext = vectorizer.vectors

    dist = spatial.distance.cosine(vectors_wiki, vectors_fasttext)
    print(dist)