for s1 in senses[w1]: for s2 in senses[w2]: if (sense_vectors.has_key(s1) and sense_vectors.has_key(s2)): m_score = m_score + cos_function(sense_vectors[s1] , sense_vectors[s2]) if ( len(senses[w1]) > 0.0 and len(senses[w2]) > 0.0): m_score = m_score / ( float( len(senses[w1] ) ) * float ( len(senses[w2])) ) else: continue if ( m_score > 0.0 ): machine_score.append(m_score) human_score.append(float(p[2])) p_val, p_rel = sci.stats.spearmanr(human_score, machine_score) print "Simple Linear Approach", p_val if __name__ == "__main__": word_vectors = nlp.read_word_vectors(VECTOR_DIR + VECTOR_NAME) word_pairs = nlp.read_csv(CSV_DIR + CSV_NAME) vocab = [] for p in word_pairs: vocab.append(p[0].lower()) vocab.append(p[1].lower()) vocab = list(set(vocab)) for w in vocab: word_hypernyms[w] = nlp.read_hypernyms(w) word_hyponyms[w] = nlp.read_hyponyms(w) word_synonyms[w] = nlp.read_synonyms(w) senses[w] = nlp.read_senses(w) for s in senses[w]: sense_vectors[s] = np.zeros(VECTOR_DIM) sense_hypernyms[s] = nlp.read_hypernyms_by_sense(s) sense_hyponyms[s] = nlp.read_hyponyms_by_sense(s) sense_synonyms[s] = nlp.read_synonyms_by_sense(s)
result = 0.0 for l in s.lemmas(): wd = l.name() dis = alpha * np.linalg.norm(word_pool[wd] - sense_vectors[s]) dis = np.power(dis, 2) dis1 = beta * np.linalg.norm(sense_vectors[s] - sense_pool[s]) dis1 = np.power(dis1, 2) result = result + dis + dis1 result = 0.5 * result return result if __name__ == "__main__": print "VECTOR:", VECTOR_NAME word_vectors = nlp.read_word_vectors(VECTOR_DIR + VECTOR_NAME) word_pairs = nlp.read_csv(CSV_DIR + CSV_NAME) print "CSV", CSV_NAME vocab = [] for p in word_pairs: vocab.append(p[0].lower()) vocab.append(p[1].lower()) vocab = list(set(vocab)) for w in vocab: word_hypernyms[w] = nlp.read_hypernyms(w) word_hyponyms[w] = nlp.read_hyponyms(w) word_synonyms[w] = nlp.read_synonyms(w) senses[w] = nlp.read_senses(w) for s in senses[w]: sense_vectors[s] = np.zeros(VECTOR_DIM) sense_hypernyms[s] = nlp.read_hypernyms_by_sense(s) sense_hyponyms[s] = nlp.read_hyponyms_by_sense(s)
csv_path = "../../csv/" vector_path = "../test_vector/" vector_file = "100_6.vec" vector_dim = [100] vector_win = [6] def cos_function(v1,v2): result = 0.0 tp = np.dot(v1, np.transpose(v2)) btm = np.linalg.norm(v1) * np.linalg.norm(v2) result = tp/btm return result if __name__ == "__main__": for dim in vector_dim: for win in vector_win: target_vector = str(dim) + "_" + str(win) + ".vec" word_vectors = nlp.read_word_vectors(vector_path + target_vector) print "current vec",target_vector print "current file", target_file word_pairs = nlp.read_csv(csv_path + target_file) m_score = [] h_score = [] for p in word_pairs: if ( word_vectors.has_key(p[0].lower()) and word_vectors.has_key(p[1].lower())): h_score.append(float(p[2])) m_score.append(cos_function(word_vectors[p[0].lower()], word_vectors[p[1].lower()])) else: print "Word1",p[0], "Word2",p[1],"Ignored" p_val,p_dif = sci.stats.spearmanr(h_score,m_score) print "Spearmanr Single", str(p_val)
else: up_time = 0 # learning_rate = learning_rate - 0.005 if (p_rel >= 80.0 or up_time > 4): break print "this time error", sum_error print "former time error", former_sum_error print "up_time", up_time print "learning_rate", learning_rate print "pearson", sci.stats.pearsonr(human_s, machine_s) former_sum_error = sum_error if __name__ == "__main__": print "read vector & score" word_vectors = nlplib.read_word_vectors("./test_vector/100_3.vec") word_pair_score = nlplib.read_csv("./csv/R&G-65.csv") ######Read hypernyms print "get hypernyms etc." for w_pair_score in word_pair_score: word_dictionary.append(w_pair_score[0]) word_dictionary.append(w_pair_score[1]) # remove dumplicated word, for we are searching hypernyms, hyponyms, synonyms according to the dictionary. word_dictionary = list(set(word_dictionary)) for w in word_dictionary: word_hypernyms[w] = nlplib.read_hypernyms(w) word_hyponyms[w] = nlplib.read_hyponyms(w) word_synonyms[w] = nlplib.read_synonyms(w) print "start training" cnn_training()
else: up_time = 0 # learning_rate = learning_rate + 0.3 if (p_rel >= 80.0 and up_time == 1 ): break print "this time error", sum_error print "former time error", former_sum_error print "up_time",up_time print "learning_rate", learning_rate print "pearson", sci.stats.pearsonr(human_s, machine_s) print "iteration time", iteration_time iteration_time = iteration_time + 1 former_sum_error = sum_error if __name__=="__main__": print "read vector & score" word_vectors = nlplib.read_word_vectors("./test_vector/100_3.vec") word_pair_score = nlplib.read_csv("./csv/R&G-65.csv") ######Read hypernyms print "get hypernyms etc." for w_pair_score in word_pair_score: word_dictionary.append(w_pair_score[0]) word_dictionary.append(w_pair_score[1]) # remove dumplicated word, for we are searching hypernyms, hyponyms, synonyms according to the dictionary. word_dictionary = list(set(word_dictionary)) for w in word_dictionary: word_hypernyms[w] = nlplib.read_hypernyms(w) word_hyponyms[w] = nlplib.read_hyponyms(w) word_synonyms[w] = nlplib.read_synonyms(w) print "start training" cnn_training()