Esempio n. 1
0
        m_score = 0.0
        for s1 in senses[w1]:
            for s2 in senses[w2]:
                if (sense_vectors.has_key(s1) and sense_vectors.has_key(s2)):
                    m_score = m_score + cos_function(sense_vectors[s1] , sense_vectors[s2])
        if ( len(senses[w1]) > 0.0 and len(senses[w2]) > 0.0):
            m_score = m_score / ( float( len(senses[w1] ) ) * float ( len(senses[w2])) )
        else:
            continue
        if ( m_score > 0.0 ):
            machine_score.append(m_score)
            human_score.append(float(p[2]))
    p_val, p_rel = sci.stats.spearmanr(human_score, machine_score)
    print "Simple Linear Approach", p_val
if __name__ == "__main__":
    word_vectors = nlp.read_word_vectors(VECTOR_DIR + VECTOR_NAME)
    word_pairs = nlp.read_csv(CSV_DIR + CSV_NAME)
    vocab = []
    for p in word_pairs:
        vocab.append(p[0].lower())
        vocab.append(p[1].lower())
    vocab = list(set(vocab))
    for w in vocab:
        word_hypernyms[w] = nlp.read_hypernyms(w)
        word_hyponyms[w] = nlp.read_hyponyms(w)
        word_synonyms[w] = nlp.read_synonyms(w)
        senses[w] = nlp.read_senses(w)
        for s in senses[w]:
            sense_vectors[s] = np.zeros(VECTOR_DIM)
            sense_hypernyms[s] = nlp.read_hypernyms_by_sense(s)
            sense_hyponyms[s] = nlp.read_hyponyms_by_sense(s)
Esempio n. 2
0
def cost_func(s):
    result = 0.0
    for l in s.lemmas():
        wd = l.name()
        dis = alpha * np.linalg.norm(word_pool[wd] - sense_vectors[s])
        dis = np.power(dis, 2)
        dis1 = beta * np.linalg.norm(sense_vectors[s] - sense_pool[s])
        dis1 = np.power(dis1, 2)
    result = result + dis + dis1
    result = 0.5 * result
    return result


if __name__ == "__main__":
    print "VECTOR:", VECTOR_NAME
    word_vectors = nlp.read_word_vectors(VECTOR_DIR + VECTOR_NAME)
    word_pairs = nlp.read_csv(CSV_DIR + CSV_NAME)
    print "CSV", CSV_NAME
    vocab = []
    for p in word_pairs:
        vocab.append(p[0].lower())
        vocab.append(p[1].lower())
    vocab = list(set(vocab))
    for w in vocab:
        word_hypernyms[w] = nlp.read_hypernyms(w)
        word_hyponyms[w] = nlp.read_hyponyms(w)
        word_synonyms[w] = nlp.read_synonyms(w)
        senses[w] = nlp.read_senses(w)
        for s in senses[w]:
            sense_vectors[s] = np.zeros(VECTOR_DIM)
            sense_hypernyms[s] = nlp.read_hypernyms_by_sense(s)
Esempio n. 3
0
csv_path = "../../csv/"
vector_path = "../test_vector/"
vector_file = "100_6.vec"
vector_dim = [100]
vector_win = [6]
def cos_function(v1,v2):
    result = 0.0
    tp = np.dot(v1, np.transpose(v2))
    btm = np.linalg.norm(v1) * np.linalg.norm(v2)
    result = tp/btm
    return result
if __name__ == "__main__":
    for dim in vector_dim:
        for win in vector_win:
            target_vector = str(dim) + "_" + str(win) + ".vec"
            word_vectors = nlp.read_word_vectors(vector_path + target_vector)
            print "current vec",target_vector
            print "current file", target_file
            word_pairs = nlp.read_csv(csv_path + target_file)
            m_score = []
            h_score = []
            for p in word_pairs:
                if ( word_vectors.has_key(p[0].lower()) and word_vectors.has_key(p[1].lower())):
                    h_score.append(float(p[2]))
                    m_score.append(cos_function(word_vectors[p[0].lower()], word_vectors[p[1].lower()]))
                else:
                    print "Word1",p[0], "Word2",p[1],"Ignored"
            p_val,p_dif = sci.stats.spearmanr(h_score,m_score)
            print "Spearmanr Single", str(p_val)

Esempio n. 4
0
        else:
            up_time = 0


#            learning_rate = learning_rate - 0.005
        if (p_rel >= 80.0 or up_time > 4):
            break
        print "this time error", sum_error
        print "former time error", former_sum_error
        print "up_time", up_time
        print "learning_rate", learning_rate
        print "pearson", sci.stats.pearsonr(human_s, machine_s)
        former_sum_error = sum_error
if __name__ == "__main__":
    print "read vector & score"
    word_vectors = nlplib.read_word_vectors("./test_vector/100_3.vec")
    word_pair_score = nlplib.read_csv("./csv/R&G-65.csv")
    ######Read hypernyms
    print "get hypernyms etc."
    for w_pair_score in word_pair_score:
        word_dictionary.append(w_pair_score[0])
        word_dictionary.append(w_pair_score[1])
#   remove dumplicated word, for we are searching hypernyms, hyponyms, synonyms according to the dictionary.
    word_dictionary = list(set(word_dictionary))
    for w in word_dictionary:
        word_hypernyms[w] = nlplib.read_hypernyms(w)
        word_hyponyms[w] = nlplib.read_hyponyms(w)
        word_synonyms[w] = nlplib.read_synonyms(w)
    print "start training"
    cnn_training()
Esempio n. 5
0
        else:
            up_time = 0
#            learning_rate = learning_rate + 0.3
        if (p_rel >= 80.0 and up_time == 1 ):
            break
        print "this time error", sum_error
        print "former time error", former_sum_error
        print "up_time",up_time
        print "learning_rate", learning_rate
        print "pearson",  sci.stats.pearsonr(human_s, machine_s)
        print "iteration time", iteration_time
        iteration_time = iteration_time + 1
        former_sum_error = sum_error
if __name__=="__main__":
    print "read vector & score"
    word_vectors = nlplib.read_word_vectors("./test_vector/100_3.vec")
    word_pair_score = nlplib.read_csv("./csv/R&G-65.csv")
######Read hypernyms
    print "get hypernyms etc."
    for w_pair_score in word_pair_score:
        word_dictionary.append(w_pair_score[0])
        word_dictionary.append(w_pair_score[1])
#   remove dumplicated word, for we are searching hypernyms, hyponyms, synonyms according to the dictionary.
    word_dictionary = list(set(word_dictionary))
    for w in word_dictionary:
        word_hypernyms[w] = nlplib.read_hypernyms(w)
        word_hyponyms[w] = nlplib.read_hyponyms(w)
        word_synonyms[w] = nlplib.read_synonyms(w)
    print "start training"
    cnn_training()