def f(process_id): print('Process Id: {}'.format(os.getpid())) process = psutil.Process(os.getpid()) new_model = Word2Vec.load('/tmp/mymodel.pkl') vector = new_model.wv["science"] annoy_index = AnnoyIndexer() annoy_index.load('/tmp/mymodel.index') annoy_index.model = new_model approximate_neighbors = new_model.wv.most_similar([vector], topn=5, indexer=annoy_index) print('\nMemory used by process {}: {}\n---'.format( os.getpid(), process.memory_info()))
def summary(x: str, word2vec, vocabulary): '''Summary of the 10 nearest similar words based on Word2Vec''' annoy_index = AnnoyIndexer(word2vec, 100) vector = vocabulary[x] # The instance of AnnoyIndexer we just created is passed approximate_neighbors = vocabulary.most_similar( [vector], topn=11, indexer=annoy_index) # Neatly print the approximate_neighbors and their corresponding cosine similarity values print("Approximate Neighbors") for neighbor in approximate_neighbors: print(neighbor) normal_neighbors = vocabulary.most_similar([vector], topn=11) print("\nExact Neighbors") for neighbor in normal_neighbors: print(neighbor)
# `here <https://github.com/spotify/annoy#how-does-it-work>`__. The relationship # between ``num_trees``\ , build time, and accuracy will be investigated later # in the tutorial. # # Now that we are ready to make a query, lets find the top 5 most similar words # to "science" in the Text8 corpus. To make a similarity query we call # ``Word2Vec.most_similar`` like we would traditionally, but with an added # parameter, ``indexer``. # # Apart from Annoy, Gensim also supports the NMSLIB indexer. NMSLIB is a similar library to # Annoy – both support fast, approximate searches for similar vectors. # from gensim.similarities.annoy import AnnoyIndexer # 100 trees are being used in this example annoy_index = AnnoyIndexer(model, 100) # Derive the vector for the word "science" in our model vector = wv["science"] # The instance of AnnoyIndexer we just created is passed approximate_neighbors = wv.most_similar([vector], topn=11, indexer=annoy_index) # Neatly print the approximate_neighbors and their corresponding cosine similarity values print("Approximate Neighbors") for neighbor in approximate_neighbors: print(neighbor) normal_neighbors = wv.most_similar([vector], topn=11) print("\nExact Neighbors") for neighbor in normal_neighbors: print(neighbor) ###############################################################################