コード例 #1
0
#!/usr/bin/env python

import sys
sys.path.append('..')
from common.util import preprocess, create_co_matrix, cos_similarity, most_similar, ppmi
import numpy as np

text = 'You say goodbye and I say hello.'.lower()
corpus, word_to_id, id_to_word = preprocess(text)

print(corpus)
print(id_to_word)

vocabulary_size = len(word_to_id)
C = create_co_matrix(corpus=corpus, vocabulary_size=vocabulary_size)

print(vocabulary_size)
print(C)

vec_you = C[word_to_id['you']]
vec_i = C[word_to_id['i']]
vec_hello = C[word_to_id['hello']]
vec_say = C[word_to_id['say']]
vec_goodbye = C[word_to_id['goodbye']]
vec_and = C[word_to_id['and']]

print('you, i')
print(cos_similarity(vec_you, vec_i))
print('you, hello')
print(cos_similarity(vec_you, vec_hello))
print('you, say')
コード例 #2
0
def cos_similarity(x, y, eps=1e-8):
    nx = x / np.sqrt(np.sum(x**2) + eps)
    ny = y / np.sqrt(np.sum(y**2) + eps)
    return np.dot(nx, ny)


import sys

sys.path.append('..')
from common.util import preprocess, create_co_matrix, cos_similarity

text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)
vocab_size = len(word_to_id)
C = create_co_matrix(corpus, vocab_size)

c0 = C[word_to_id['you']]
c1 = C[word_to_id['i']]
print(cos_similarity(c0, c1))


def most_similar(query, word_to_id, id_to_word, word_matrix, top=5):
    if query not in word_to_id:
        print('%s is not found' % query)
        return

    print('\n[query] ' + query)
    query_id = word_to_id[query]
    query_vec = word_matrix[query_id]
コード例 #3
0
import sys

sys.path.append('..')
import numpy as np
from common.util import preprocess, create_co_matrix, cos_similarity, most_similar

text = 'You say goodbye I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)
vocab_size = len(word_to_id)
C = create_co_matrix(corpus, vocab_size)

c0 = C[word_to_id['say']]
c1 = C[word_to_id['goodbye']]
print(cos_similarity(c0, c1))

most_similar('hello', word_to_id, id_to_word, C, top=5)