Esempio n. 1
0
def print_ppmi(line: str, t1: str, t2: str, f):
    """ t1, t2 の各単語ベクトルのコサイン類似度を求める (ppmi) """
    try:
        res = cosine_similarity(ppmi[ppmi_index[t1]], ppmi[ppmi_index[t2]])
        print(f"{line.rstrip()}\t{res}", file=f)
    except KeyError:
        print(f"{line.rstrip()}\t-", file=f)
Esempio n. 2
0
def print_w2v(line: str, t1: str, t2: str, f):
    """ t1, t2 の各単語ベクトルのコサイン類似度を求める (word2vec) """
    try:
        res = cosine_similarity(w2v[w2v_index[t1]], w2v[w2v_index[t2]])
        print(f"{line.rstrip()}\t{res}", file=f)
    except KeyError:
        print(f"{line.rstrip()}\t-", file=f)
Esempio n. 3
0
def similar_list(mat: M, t_idx: Dict[str, int], v: np.ndarray) -> Rank:
    """ v の類似度が高い n 語とその類似度のリストを返す """
    t_keys = list(t_idx.keys())
    dist = {t_keys[i]: cosine_similarity(v, mat[i]) for i in range(len(t_idx))}
    rank = sorted(dist.items(), key=itemgetter(1), reverse=True)

    return rank
Esempio n. 4
0
def main():
    r = redis.Redis(host="localhost", port=6379, db=0)
    matrix = loadmat("knock85.matrix")["knock85.matrix"]
    t = pickle.loads(r.get("knock83.t"))

    t_keys = list(t.keys())
    t_index = OrderedDict((key, i) for i, key in enumerate(t_keys))

    eng = matrix[t_index["England"]]
    dist = {
        t_keys[i]: cosine_similarity(eng, matrix[i])
        for i in range(len(t_index))
    }
    rank = sorted(dist.items(), key=itemgetter(1), reverse=True)

    for i in range(1, 11):
        print(f"{rank[i][0]}\t{rank[i][1]}")
Esempio n. 5
0
def main():
    r = redis.Redis(host="localhost", port=6379, db=0)
    matrix = loadmat("knock85.matrix")["knock85.matrix"]
    t = pickle.loads(r.get("knock83.t"))

    t_keys = list(t.keys())
    t_index = OrderedDict((key, i) for i, key in enumerate(t_keys))

    # スペイン - マドリード (首都) + アテネ (ギリシャの首都)
    vec = (
        matrix[t_index["Spain"]] - matrix[t_index["Madrid"]] + matrix[t_index["Athens"]]
    )
    dist = {t_keys[i]: cosine_similarity(vec, matrix[i]) for i in range(len(t_index))}
    rank = sorted(dist.items(), key=itemgetter(1), reverse=True)

    for i in range(1, 11):
        print(f"{rank[i][0]}\t{rank[i][1]}")
Esempio n. 6
0
def main():
    word2vec_filepath = "../data/w2v.txt"
    load_word2vec(word2vec_filepath)

    matrix, t_index = deserialize("matrix"), deserialize("t_index")

    # knock86
    u_s = matrix[t_index["United_States"]]

    # knock87
    print(cosine_similarity(matrix[t_index["U.S"]], u_s))

    # knock88
    for rank in similar_list(matrix, t_index,
                             matrix[t_index["England"]])[1:11]:
        print(f"{rank[0].ljust(10)} : {rank[1]}")

    # knock89
    for rank in multi_vec(matrix, t_index, "Spain", "Madrid", "Athens")[1:11]:
        print(f"{rank[0].ljust(10)} : {rank[1]}")