예제 #1
0
def main():
    t_c_matrix = open("t_c_matrix", "r")
    t_set      = open("t_set", "r")
    word_dict  = defaultdict(float)

    word_context_matrix = pickle.load(t_c_matrix)
    word_set = pickle.load(t_set)
    word_set = list(word_set)

#    print word_set.index("Spain")
#    print word_set.index("England")
#    print word_set.index("Athens")
    
    vec1 = word_context_matrix[word_set.index("Spain")]
    vec2 = word_context_matrix[word_set.index("England")]
    vec3 = word_context_matrix[word_set.index("Athens")]
    vec  = vec1 - vec2 + vec3

    for index, word in enumerate(word_set):
        sim = cos_sim(word_context_matrix[index], vec)
        word_dict[word] = sim

    count = 0
    for word, sim in sorted(word_dict.items(), key = lambda x:-x[1]):
        print word + "\t" + str(sim)
        count += 1
        if count == 9:
           break
예제 #2
0
def main():
    t_c_matrix = open("t_c_matrix", "r")
    t_set = open("t_set", "r")
    word_dict = defaultdict(float)

    word_context_matrix = pickle.load(t_c_matrix)
    word_set = pickle.load(t_set)
    word_set = list(word_set)

    Index_specific_word = word_set.index("England")
    England_representation = word_context_matrix[Index_specific_word]
    # "England"の単語表現

    for index, word in enumerate(word_set):
        if word == "England":
            continue
        else:
            #           print England_representation
            #           print word_context_matrix[index]
            #           print word,
            sim = cos_sim(England_representation, word_context_matrix[index])
            word_dict[word] = sim
    count = 0
    for word, sim in sorted(word_dict.items(), key=lambda x: -x[1]):
        print word + "\t" + str(sim)
        count += 1
        if count == 9:
            break
예제 #3
0
def main():
    t_c_matrix = open("t_c_matrix", "r")
    t_set      = open("t_set", "r")
    word_dict  = defaultdict(float)

    word_context_matrix = pickle.load(t_c_matrix)
    word_set = pickle.load(t_set)
    word_set = list(word_set)

    Index_specific_word = word_set.index("England")
    England_representation = word_context_matrix[Index_specific_word]
    # "England"の単語表現

    for index, word in enumerate(word_set):
        if word == "England":
           continue
        else:
#           print England_representation
#           print word_context_matrix[index]
#           print word, 
           sim = cos_sim(England_representation, word_context_matrix[index])
           word_dict[word] = sim
    count = 0
    for word, sim in sorted(word_dict.items(), key = lambda x:-x[1]):
        print word + "\t" + str(sim)
        count += 1
        if count == 9:
           break
예제 #4
0
def main():
    t_c_matrix = open("t_c_matrix", "r")
    t_set = open("t_set", "r")
    word_dict = defaultdict(float)

    word_context_matrix = pickle.load(t_c_matrix)
    word_set = pickle.load(t_set)
    word_set = list(word_set)

    #    print word_set.index("Spain")
    #    print word_set.index("England")
    #    print word_set.index("Athens")

    vec1 = word_context_matrix[word_set.index("Spain")]
    vec2 = word_context_matrix[word_set.index("England")]
    vec3 = word_context_matrix[word_set.index("Athens")]
    vec = vec1 - vec2 + vec3

    for index, word in enumerate(word_set):
        sim = cos_sim(word_context_matrix[index], vec)
        word_dict[word] = sim

    count = 0
    for word, sim in sorted(word_dict.items(), key=lambda x: -x[1]):
        print word + "\t" + str(sim)
        count += 1
        if count == 9:
            break
예제 #5
0
def make_sim_dict(word_context_matrix, word_set, objective_word, word_dict):
    
    Index_specific_word = word_set.index(objective_word)
    objective_representation = word_context_matrix[Index_specific_word]
    # 受け取った単語のベクトル表現

    for index, word in enumerate(word_set):
        if word == objective_word:
           continue
        else:
           sim = cos_sim(word_context_matrix[index], objective_representation)
           word_dict[word] = sim
    return word_dict
예제 #6
0
def make_sim_dict(word_context_matrix, word_set, objective_word, word_dict):

    Index_specific_word = word_set.index(objective_word)
    objective_representation = word_context_matrix[Index_specific_word]
    # 受け取った単語のベクトル表現

    for index, word in enumerate(word_set):
        if word == objective_word:
            continue
        else:
            sim = cos_sim(word_context_matrix[index], objective_representation)
            word_dict[word] = sim
    return word_dict