Esempio n. 1
0
def create_keyed_vector(old_keyed_vector, new_matrix):
    vector_size = new_matrix.shape[1]
    keyed_vector = KeyedVectors(vector_size)
    keyed_vector.vector_size = vector_size
    keyed_vector.vocab = old_keyed_vector.vocab
    keyed_vector.index2word = old_keyed_vector.index2word
    keyed_vector.vectors = new_matrix
    assert (len(old_keyed_vector.vocab),
            vector_size) == keyed_vector.vectors.shape
    return keyed_vector
Esempio n. 2
0
def apply_w2v_regression(model, regression):
    """Given a word2vec model and a linear regression, apply that regression to all the vectors
    in the model.
    ::param model:: A gensim `KeyedVectors` or `Word2Vec` instance
    ::param regression:: A `sklearn.linear_model.LinearRegression` instance
    ::returns:: A gensim `KeyedVectors` instance
    """
    aligned_model = KeyedVectors()  # Word2Vec()
    aligned_model.vocab = model.vocab.copy()
    aligned_model.vector_size = model.vector_size
    aligned_model.index2word = model.index2word
    # aligned_model.reset_weights()
    aligned_model.syn0 = regression.predict(model.syn0).astype(np.float32)
    return aligned_model
Esempio n. 3
0
def __create_keyed_vector(matrix, orig_vocab):
    vocab = dict()
    index_to_word = []
    for word, word_id in sorted(orig_vocab.token2id.items(),
                                key=itemgetter(1)):
        index_to_word.append(word)
        vocab[word] = Vocab(index=word_id, count=orig_vocab.word_freq[word_id])
    vector_size = matrix.shape[1]

    keyed_vector = KeyedVectors(vector_size)
    keyed_vector.vector_size = vector_size
    keyed_vector.vocab = vocab
    keyed_vector.index2word = index_to_word
    keyed_vector.vectors = matrix
    assert (len(vocab), vector_size) == keyed_vector.vectors.shape
    return keyed_vector