Beispiel #1
0
def f(process_id):
    print('Process Id: {}'.format(os.getpid()))
    process = psutil.Process(os.getpid())
    new_model = Word2Vec.load('/tmp/mymodel.pkl')
    vector = new_model.wv["science"]
    annoy_index = AnnoyIndexer()
    annoy_index.load('/tmp/mymodel.index')
    annoy_index.model = new_model
    approximate_neighbors = new_model.wv.most_similar([vector],
                                                      topn=5,
                                                      indexer=annoy_index)
    print('\nMemory used by process {}: {}\n---'.format(
        os.getpid(), process.memory_info()))
def summary(x: str, word2vec, vocabulary):
    '''Summary of the 10 nearest similar words based on Word2Vec'''
    annoy_index = AnnoyIndexer(word2vec, 100)
    vector = vocabulary[x]
    # The instance of AnnoyIndexer we just created is passed
    approximate_neighbors = vocabulary.most_similar(
        [vector], topn=11, indexer=annoy_index)
    # Neatly print the approximate_neighbors and their corresponding cosine similarity values
    print("Approximate Neighbors")
    for neighbor in approximate_neighbors:
        print(neighbor)
    normal_neighbors = vocabulary.most_similar([vector], topn=11)
    print("\nExact Neighbors")
    for neighbor in normal_neighbors:
        print(neighbor)
Beispiel #3
0
#   `here <https://github.com/spotify/annoy#how-does-it-work>`__. The relationship
#   between ``num_trees``\ , build time, and accuracy will be investigated later
#   in the tutorial.
#
# Now that we are ready to make a query, lets find the top 5 most similar words
# to "science" in the Text8 corpus. To make a similarity query we call
# ``Word2Vec.most_similar`` like we would traditionally, but with an added
# parameter, ``indexer``.
#
# Apart from Annoy, Gensim also supports the NMSLIB indexer. NMSLIB is a similar library to
# Annoy – both support fast, approximate searches for similar vectors.
#
from gensim.similarities.annoy import AnnoyIndexer

# 100 trees are being used in this example
annoy_index = AnnoyIndexer(model, 100)
# Derive the vector for the word "science" in our model
vector = wv["science"]
# The instance of AnnoyIndexer we just created is passed
approximate_neighbors = wv.most_similar([vector], topn=11, indexer=annoy_index)
# Neatly print the approximate_neighbors and their corresponding cosine similarity values
print("Approximate Neighbors")
for neighbor in approximate_neighbors:
    print(neighbor)

normal_neighbors = wv.most_similar([vector], topn=11)
print("\nExact Neighbors")
for neighbor in normal_neighbors:
    print(neighbor)

###############################################################################