コード例 #1
0
 def update(self):
     wv = self.word_vectors_file.get_word_vectors()
     voc = self.vocabs_file.get_vocabs()['word']
     words_in_vocab = [
         k for k, _ in sorted(voc.items(), key=lambda i: i[1][0])
     ]
     word_embs = wv[words_in_vocab[1:]]
     unk_emb = np.mean(word_embs, 0, keepdims=True)
     embs = np.concatenate((unk_emb, word_embs), 0)
     kv = KeyedVectors(embs.shape[1])
     kv.syn0 = embs
     kv.vocab = dict(
         (k, Vocab(index=v[0], count=v[1])) for k, v in voc.items())
     kv.index2word = words_in_vocab
     kv.save(self.path)
コード例 #2
0
def delete_keys(w2v_model: KeyedVectors, del_keys: list):
    del_indexes = []
    with click.progressbar(del_keys,
                           length=len(del_keys),
                           label='Deleted keys') as bar:
        for key in bar:
            del_idx = w2v_model.vocab[key].index
            del_indexes.append(del_idx)
            del w2v_model.vocab[key]
            w2v_model.index2word[del_idx] = ''
    w2v_model.syn0 = np.delete(w2v_model.syn0, del_indexes, axis=0)
    w2v_model.index2word = [word for word in w2v_model.index2word if word]
    for i, word in enumerate(w2v_model.index2word):
        w2v_model.vocab[word].index = i
    print(len(model.vocab), w2v_model.syn0.shape)