def update(self): wv = self.word_vectors_file.get_word_vectors() voc = self.vocabs_file.get_vocabs()['word'] words_in_vocab = [ k for k, _ in sorted(voc.items(), key=lambda i: i[1][0]) ] word_embs = wv[words_in_vocab[1:]] unk_emb = np.mean(word_embs, 0, keepdims=True) embs = np.concatenate((unk_emb, word_embs), 0) kv = KeyedVectors(embs.shape[1]) kv.syn0 = embs kv.vocab = dict( (k, Vocab(index=v[0], count=v[1])) for k, v in voc.items()) kv.index2word = words_in_vocab kv.save(self.path)
def delete_keys(w2v_model: KeyedVectors, del_keys: list): del_indexes = [] with click.progressbar(del_keys, length=len(del_keys), label='Deleted keys') as bar: for key in bar: del_idx = w2v_model.vocab[key].index del_indexes.append(del_idx) del w2v_model.vocab[key] w2v_model.index2word[del_idx] = '' w2v_model.syn0 = np.delete(w2v_model.syn0, del_indexes, axis=0) w2v_model.index2word = [word for word in w2v_model.index2word if word] for i, word in enumerate(w2v_model.index2word): w2v_model.vocab[word].index = i print(len(model.vocab), w2v_model.syn0.shape)