Python AnnoyIndexer.modelの例

プログラミング言語: Python

名前空間/パッケージ名: gensim.similarities.index

クラス/型: AnnoyIndexer

メソッド/関数: model

hotexamples.comのコード掲載数: 13

Python AnnoyIndexer.model - 13件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgensim.similarities.index.AnnoyIndexer.modelの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

AnnoyIndexer(30)

load(26)

save(19)

model(11)

most_similar(7)

コード例 #1

ファイルを表示

def get_indexer(fpath, model, room_id):
    if os.path.exists(fpath):
        logging.info("Use annoy_index :: room_id:%s", room_id)
        annoy_index = AnnoyIndexer()
        annoy_index.load(fpath)
        annoy_index.model = model

        return annoy_index
    else:
        # indexer: defaut is None
        return None

コード例 #2

ファイルを表示

    def assertLoadedIndexEqual(self, index, model):
        from gensim.similarities.index import AnnoyIndexer

        index.save('index')

        index2 = AnnoyIndexer()
        index2.load('index')
        index2.model = model

        self.assertEqual(index.index.f, index2.index.f)
        self.assertEqual(index.labels, index2.labels)
        self.assertEqual(index.num_trees, index2.num_trees)

コード例 #3

ファイルを表示

ファイル: test_similarities.py プロジェクト: leahic/gensim

    def assertLoadedIndexEqual(self, index, model):
        from gensim.similarities.index import AnnoyIndexer

        index.save('index')

        index2 = AnnoyIndexer()
        index2.load('index')
        index2.model = model

        self.assertEqual(index.index.f, index2.index.f)
        self.assertEqual(index.labels, index2.labels)
        self.assertEqual(index.num_trees, index2.num_trees)

コード例 #4

ファイルを表示

ファイル: similarity.py プロジェクト: zhaoqinghai/harvester

def predict(text):
    model = doc2vec.Doc2Vec.load('../models/doc2vec.model')
    indexer = AnnoyIndexer()
    indexer.load('../models/dv_index')
    indexer.model = model
    # print(indexer.labels)
    new_vec = []
    for word in transform_text(text, strip=False):
        new_vec.append(model[word])
    print(new_vec)
    sv = model.infer_vector(transform_text(text, strip=False))
    print(sv)
    print(indexer.most_similar(sv, 2))

コード例 #5

ファイルを表示

def f(process_id):
    print('Process Id: {}'.format(os.getpid()))
    process = psutil.Process(os.getpid())
    new_model = Word2Vec.load('/tmp/mymodel.pkl')
    vector = new_model.wv["science"]
    annoy_index = AnnoyIndexer()
    annoy_index.load('/tmp/mymodel.index')
    annoy_index.model = new_model
    approximate_neighbors = new_model.wv.most_similar([vector],
                                                      topn=5,
                                                      indexer=annoy_index)
    print('\nMemory used by process {}: {}\n---'.format(
        os.getpid(), process.memory_info()))

コード例 #6

ファイルを表示

ファイル: test_similarities.py プロジェクト: shy-forked/s2018_02_16_gensim_package_shy

    def assertLoadedIndexEqual(self, index, model):
        from gensim.similarities.index import AnnoyIndexer

        fname = get_tmpfile('gensim_similarities.tst.pkl')
        index.save(fname)

        index2 = AnnoyIndexer()
        index2.load(fname)
        index2.model = model

        self.assertEqual(index.index.f, index2.index.f)
        self.assertEqual(index.labels, index2.labels)
        self.assertEqual(index.num_trees, index2.num_trees)

コード例 #7

ファイルを表示

ファイル: test_similarities.py プロジェクト: RaRe-Technologies/gensim

    def assertLoadedIndexEqual(self, index, model):
        from gensim.similarities.index import AnnoyIndexer

        fname = get_tmpfile('gensim_similarities.tst.pkl')
        index.save(fname)

        index2 = AnnoyIndexer()
        index2.load(fname)
        index2.model = model

        self.assertEqual(index.index.f, index2.index.f)
        self.assertEqual(index.labels, index2.labels)
        self.assertEqual(index.num_trees, index2.num_trees)

コード例 #8

ファイルを表示

def get_annoy(w2v, embedding_type='w2v'):
    dims = 100
    annoy_file_name = data_dir + '/annoy_index_' + '_' + str(dims) + '_' + embedding_type + '_' + str(len(w2v.vocab))
    if os.path.exists(annoy_file_name):
        logging.info("Loading Annoy from file: %s", annoy_file_name)
        annoy_index = AnnoyIndexer()
        annoy_index.load(annoy_file_name)
        annoy_index.model = word_vectors
    else:
        logging.info("Creating Annoy")
        annoy_index = AnnoyIndexer(word_vectors, dims)
        annoy_index.save(annoy_file_name)
        logging.info("Annoy indexing saved to %s", annoy_file_name)
    return annoy_index

コード例 #9

ファイルを表示

ファイル: soricut_and_och_2015.py プロジェクト: UchideHiroki/2018AIa

 def index_vector(self, dimensions=300, save=False):
     '''
     make annoy_index which is used in function 'is_word_pairs_similar'
     Using annoy_index, execution may be slower than normal index
     '''
     path = Path.cwd().parent.joinpath('preprocessed/annoy.index')
     if path.exists():
         annoy_index = AnnoyIndexer()
         annoy_index.load(str(path))
         annoy_index.model = self.embedding
     else:
         annoy_index = AnnoyIndexer(self.embedding, dimensions)
         if save:
             annoy_index.save(str(path))
     return annoy_index

コード例 #10

ファイルを表示

ファイル: w2v_fcst.py プロジェクト: MathewXJ/PycharmProjects

from gensim.models import Word2Vec
from gensim.similarities.index import AnnoyIndexer
import os
from app.common.config import model_path
from app.util.pre_model import W2V_VOCABULARY_SET, VOCABULARY_SET
from app.util.remove_utils import remove_not_sports

# 加载model目录下指定模型
path_to_model = os.path.join(model_path, 'word2vec')
model = Word2Vec.load(path_to_model)

# 从disk加载annoy indexer
path_to_indexer = os.path.join(model_path, 'annoy_indexer_100')
annoy_indexer_100 = AnnoyIndexer()
annoy_indexer_100.load(path_to_indexer)
annoy_indexer_100.model = model


# 使用模型计算输入词组
# 2018-03-08 使用indexer解决cpu占用问题
def associate_words(words, cont_type, with_model=model, top_n=10):
    words = [w.strip() for w in words]
    words = list(filter(lambda x: x in VOCABULARY_SET, words))
    res = {}
    tops = []
    if words is None or len(words) == 0:
        return res
    for i in range(len(words)):
        try:
            tops = (with_model.most_similar(positive=words[0:(len(words) - i)],
                                            topn=top_n,

コード例 #11

ファイルを表示

import codecs, json
from collections import defaultdict

import numpy as np
from matplotlib import pyplot as plt

from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

from gensim.models import KeyedVectors
from gensim.similarities.index import AnnoyIndexer

wv_ent = KeyedVectors.load_word2vec_format('entity2vec.bin', binary=True)
annoy_index_ent = AnnoyIndexer()
annoy_index_ent.load('entity2vec.index')
annoy_index_ent.model = wv_ent

wv_rel = KeyedVectors.load_word2vec_format('relation2vec.bin', binary=True)
annoy_index_rel = AnnoyIndexer()
annoy_index_rel.load('relation2vec.index')
annoy_index_rel.model = wv_rel


def tsne_vis(X, labels, name):
    tsne = TSNE(n_components=2).fit_transform(X)
    plt.figure(figsize=(50, 50))
    for i, label in enumerate(labels):
        x, y = tsne[i, :]
        plt.scatter(x, y)
        plt.annotate(label,
                     xy=(x, y),

コード例 #12

ファイルを表示

    annoy_index.save(annoy_file)


# extend_glove()
# build_word2vec()

info('loading model')
model = KeyedVectors.load(w2v_model)
info(model)

info('init sims')
model.init_sims()

# build_annoy(model)
info('loading annoy indexer')
annoy_index = AnnoyIndexer()
annoy_index.load(annoy_file)
annoy_index.model = model

noise = np.random.random([DIM])
noise = np.zeros(DIM)
info('querying with Annoy')
with DisableLogger():
    val = model.most_similar([noise, noise], topn=3, indexer=annoy_index)
info(val)

info('querying with gensim')
with DisableLogger():
    val = model.most_similar([noise, noise], topn=1)
info(val)

コード例 #13

ファイルを表示

# You can save and load your indexes from/to disk to prevent having to
# construct them each time. This will create two files on disk, *fname* and
# *fname.d*. Both files are needed to correctly restore all attributes. Before
# loading an index, you will have to create an empty AnnoyIndexer object.
#
fname = '/tmp/mymodel.index'

# Persist index to disk
annoy_index.save(fname)

# Load index back
import os.path
if os.path.exists(fname):
    annoy_index2 = AnnoyIndexer()
    annoy_index2.load(fname)
    annoy_index2.model = model

# Results should be identical to above
vector = model.wv["science"]
approximate_neighbors2 = model.wv.most_similar([vector],
                                               topn=11,
                                               indexer=annoy_index2)
for neighbor in approximate_neighbors2:
    print(neighbor)

assert approximate_neighbors == approximate_neighbors2

###############################################################################
# Be sure to use the same model at load that was used originally, otherwise you
# will get unexpected behaviors.
#