コード例 #1
0
def main():
    config = load_config()
    with open(os.path.join(config.cooccurrence_dir, "vocab.pkl"), "rb") as f:
        vocab = pickle.load(f)

    model = GloVe(
        vocab_size=config.vocab_size,
        embedding_size=config.embedding_size,
        x_max=config.x_max,
        alpha=config.alpha
    )
    model.load_state_dict(torch.load(config.output_filepath))
    
    keyed_vectors = KeyedVectors(vector_size=config.embedding_size)
    keyed_vectors.add_vectors(
        keys=[vocab.get_token(index) for index in range(config.vocab_size)],
        weights=(model.weight.weight.detach()
            + model.weight_tilde.weight.detach()).numpy()
    )
    
    print("How similar is man and woman:")
    print(keyed_vectors.similarity("woman", "man"))
    print("How similar is man and apple:")
    print(keyed_vectors.similarity("apple", "man"))
    print("How similar is woman and apple:")
    print(keyed_vectors.similarity("apple", "woman"))
    for word in ["computer", "united", "early"]:
        print(f"Most similar words of {word}:")
        most_similar_words = [word for word, _ in keyed_vectors.similar_by_word(word)]
        print(most_similar_words)
コード例 #2
0
ファイル: main.py プロジェクト: PiotrTrawinski/WordEmbeddings
def user_eval_similarity(model: KeyedVectors):
    ws = input(
        "two words to compare for similarity seperated by space: ").split()
    similarity = model.similarity(ws[0], ws[1])
    print("cosine similarity = {}".format(similarity))