def get_threshold(We, words, Rel, rel, tm, relSize): f = open('../commonsendata/Eval/conceptnet/new_omcs_dev1.txt', 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list t2batch = t2_list x1, x1_mask, x1_length = tm.prepare_data(t1batch) x2, x2_mask, x2_length = tm.prepare_data(t2batch) v1 = tm.GetVector(x1, x1_mask, x1_length) v2 = tm.GetVector(x2, x2_mask, x2_length) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()], :].reshape((relSize)) input_vec = np.concatenate((v1[j], v_r, v2[j]), axis=0) softmaxScore = tm.score_func(input_vec) Exp_S.append(softmaxScore[0][0]) right = 0 wrong = 0 threshold = 0 accurancy = 0 binaryScore = [] Exp_S_sorted = sorted(Exp_S) for j in xrange(len(Exp_S)): temp_thr = Exp_S_sorted[j] for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= temp_thr): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < temp_thr): right = right + 1 else: wrong = wrong + 1 if ((right / (len(Exp_S))) > accurancy): accurancy = (right / (len(Exp_S))) threshold = temp_thr right = 0 wrong = 0 print 'Dev1-Accurancy', accurancy return threshold
def evaluate_conceptNet(We, words, Rel, rel, tm, memsize, relSize, fin): threshold = get_threshold(We, words, Rel, rel, tm, memsize, relSize, fin) f = open('../../commonsendata/Eval/conceptnet/new_omcs_dev2.txt', 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list t2batch = t2_list delim = (lookupwordID(We, words, "#")) batchTuple = [a + delim + b for a, b in zip(t1batch, t2batch)] xx, xx_mask = tm.prepare_data(batchTuple) vector = tm.GetVector(xx, xx_mask) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()], :].reshape((1, relSize)) gvector = vector[j].reshape((1, memsize)) input_vec = np.concatenate((gvector, v_r), axis=1) #input_vec = np.concatenate((gv1,v_r,gv2),axis = 1) softmaxScore = tm.GetVectorNew(input_vec) Exp_S.append(softmaxScore[0][0]) right = 0 wrong = 0 accurancy = 0 for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= threshold): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < threshold): right = right + 1 else: wrong = wrong + 1 accurancy = (right / (len(Exp_S))) print 'Dev2-Accurancy', accurancy fin.write('Dev2-Accurancy' + str(accurancy) + "\n") print 'Threshold', threshold fin.write('Threshold' + str(threshold) + "\n") return accurancy
def evaluate_conceptNet(words, We, rel, Rel, tm, relSize): threshold, accurancy1 = get_threshold( '../data/conceptnet/new_omcs_dev1.txt', We, words, rel, Rel, tm, relSize) f = open('../data/conceptnet/new_omcs_dev2.txt', 'r') lines = f.readlines() S = [] T = [] T1 = [] T2 = [] R = [] Exp_S = [] result = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) temp1 = lookupwordID(We, words, t1) temp2 = lookupwordID(We, words, t2) T1.append(temp1) T2.append(temp2) tp = rel[r.lower()] c = Rel[tp * relSize:tp * relSize + relSize, :] R.append(c) x1, x1_mask = tm.prepare_data(T1) x2, x2_mask = tm.prepare_data(T2) emb1 = tm.GetVector(x1, x1_mask) emb2 = tm.GetVector(x2, x2_mask) for j in range(len(R)): gv1 = emb1[j] gv2 = emb2[j] v_r = R[j] temp1 = np.dot(gv1, v_r) exp_score = np.inner(temp1, gv2) Exp_S.append(exp_score) right = 0 wrong = 0 accurancy2 = 0 for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= threshold): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] <= threshold): right = right + 1 else: wrong = wrong + 1 accurancy2 = (right / (len(Exp_S))) #print 'Dev2-Accurancy',accurancy #print 'Threshold',threshold return accurancy1, accurancy2, threshold
def evaluate_conceptNet(We, words, Rel, rel, tm, relSize): threshold = get_threshold(We, words, Rel, rel, tm, relSize) f = open('../commonsendata/Eval/conceptnet/new_omcs_dev2.txt', 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] tuple_words = [] tuple_vecs = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) tuple_words.append(i) t1batch = t1_list[0:len(lines)] t2batch = t2_list[0:len(lines)] x1, x1_mask, x1_length = tm.prepare_data(t1batch) x2, x2_mask, x2_length = tm.prepare_data(t2batch) v1 = tm.GetVector(x1, x1_mask, x1_length) v2 = tm.GetVector(x2, x2_mask, x2_length) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()], :].reshape((relSize)) input_vec = np.concatenate((v1[j], v_r, v2[j]), axis=0) softmaxScore = tm.score_func(input_vec) tuple_vecs.append(tm.hidden_func(input_vec)) Exp_S.append(softmaxScore[0][0]) right = 0 wrong = 0 accurancy = 0 for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= threshold): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < threshold): right = right + 1 else: wrong = wrong + 1 accurancy = (right / (len(Exp_S))) print 'Dev2-Accurancy', accurancy print 'Threshold', threshold return accurancy, tuple_words, tuple_vecs
def evaluate_conceptNet(We, words, Rel, rel, tm, relSize): threshold = get_threshold(We, words, Rel, rel, tm, relSize) f = open('../commonsendata/Eval/conceptnet/new_omcs_dev2.txt', 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list[0:len(lines)] t2batch = t2_list[0:len(lines)] x1, x1_mask, x1_length = tm.prepare_data(t1batch) x2, x2_mask, x2_length = tm.prepare_data(t2batch) v1 = tm.GetVector(x1, x1_mask, x1_length) v2 = tm.GetVector(x2, x2_mask, x2_length) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()] * relSize:rel[r_list[j].lower()] * relSize + relSize, :] temp1 = np.dot(v1[j], v_r) exp_score = np.inner(temp1, v2[j]) Exp_S.append(exp_score) right = 0 wrong = 0 accurancy = 0 for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= threshold): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < threshold): right = right + 1 else: wrong = wrong + 1 accurancy = (right / (len(Exp_S))) print 'Dev2-Accurancy', accurancy print 'Threshold', threshold return accurancy
def get_accu(We, words, Rel, rel, tm, relSize, threshold, filename): f = open(filename, 'r') lines = f.readlines() lines.append('ReceivesAction\thockey\tplay on ice\t1') lines.append('AtLocation\trestroom\trest area\t1') Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list[0:len(lines)] t2batch = t2_list[0:len(lines)] x1, x1_mask, x1_length = tm.prepare_data(t1batch) x2, x2_mask, x2_length = tm.prepare_data(t2batch) v1 = tm.GetVector(x1, x1_mask, x1_length) v2 = tm.GetVector(x2, x2_mask, x2_length) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()], :].reshape((relSize)) input_vec = np.concatenate((v1[j], v_r, v2[j]), axis=0) softmaxScore = tm.score_func(input_vec) # if j == len(lines)-1: # print lines[j],softmaxScore Exp_S.append(softmaxScore[0][0]) right = 0 wrong = 0 accurancy = 0 for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= threshold): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < threshold): right = right + 1 else: wrong = wrong + 1 accurancy = (right / (len(Exp_S))) return accurancy
def get_accu(We, words, Rel, rel, tm, relSize, threshold, filename): f = open(filename, 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list[0:len(lines)] t2batch = t2_list[0:len(lines)] x1, x1_mask, x1_length = tm.prepare_data(t1batch) x2, x2_mask, x2_length = tm.prepare_data(t2batch) v1 = tm.GetVector(x1, x1_mask, x1_length) v2 = tm.GetVector(x2, x2_mask, x2_length) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()] * relSize:rel[r_list[j].lower()] * relSize + relSize, :] temp1 = np.dot(v1[j], v_r) exp_score = np.inner(temp1, v2[j]) Exp_S.append(exp_score) right = 0 wrong = 0 accurancy = 0 for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= threshold): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < threshold): right = right + 1 else: wrong = wrong + 1 accurancy = (right / (len(Exp_S))) return accurancy
def getpairs2(self, batch, params): embed_size = self.memsize Rel = self.getRel() we = self.getWe() # Rel0 = np.reshape(Rel,(-1,relsize)) newd = [ convertToIndex(i, self.words, we, self.rel, Rel) for i in batch ] g1 = [] g2 = [] R = [] #print newd length = len(batch) for idx, e in enumerate(newd): (r, t1, t2, s) = e g1.append(t1) g2.append(t2) R.append(r) #batch is list of tuples p11 = [] p22 = [] p3 = [] if (params.type == 'MAX'): for i in range(length): #print 'i: ',i id0 = R[i] wpick = ['', '', ''] while (wpick[0] == ''): index = random.randint(0, len(g1) - 1) if (index != i): wpick[0] = g1[index] p11.append(wpick[0]) while (wpick[1] == ''): index = random.randint(0, len(g2) - 1) if (index != i): wpick[1] = g2[index] p22.append(wpick[1]) while (wpick[2] == ''): index = random.randint(0, len(R) - 1) if (index != i): wpick[2] = R[index] p3.append(wpick[2]) delim = (lookupwordID(we, self.words, "#")) pT = [a + delim + b for a, b in zip(g1, g2)] pTuple, pTupleMask = self.prepare_data(pT) neT1 = [a + delim + b for a, b in zip(p11, g2)] neTuple1, neTuple1Mask = self.prepare_data(neT1) neT2 = [a + delim + b for a, b in zip(g1, p22)] neTuple2, neTuple2Mask = self.prepare_data(neT2) return (R, p3, pTuple, pTupleMask, neTuple1, neTuple1Mask, neTuple2, neTuple2Mask)
def prepare_aedata(self, list_of_seqs, contextsize, words): lengths = [len(s) for s in list_of_seqs] sumlength = sum(lengths) n_samples = len(list_of_seqs) D = np.zeros((sumlength, 10)).astype('int32') x = np.zeros((sumlength, 2 * contextsize + 1)).astype('int32') index = 0 for i in range(n_samples): seq_id = lookupwordID(words, list_of_seqs[i]) new_seq = [0] * contextsize + seq_id + [1] * contextsize for j in range(lengths[i]): x[index, :] = new_seq[j:j + 2 * contextsize + 1] word_j = list_of_seqs[i][j] if len(word_j) == 0: print 'error' print list_of_seqs[i] sys.exit() punc_flag = 1 a = 0 for s in word_j: if s in string.punctuation: a = a + 1 if a == len(word_j): punc_flag = 0 if word_j == '<@MENTION>': D[index, 0] = 1 elif (word_j[0] == '#') and (len(word_j) != 1): D[index, 1] = 1 elif word_j == 'rt': D[index, 2] = 1 elif 'URL' in word_j: D[index, 3] = 1 elif word_j.replace('.', '', 1).isdigit(): D[index, 4] = 1 # check whether it is punc elif '$' in word_j: D[index, 5] = 1 elif word_j == ':': D[index, 7] = 1 elif word_j == '...': D[index, 8] = 1 elif (len(word_j) == 1) and (word_j[0] in string.punctuation): D[index, 9] = 1 elif punc_flag == 0: D[index, 6] = 1 index = index + 1 #print len(labels) return x, n_samples, D
def get_threshold(evafile, We, words, rel, Rel, tm, relSize): f1 = open(evafile, 'r') lines = f1.readlines() S = [] T = [] T1 = [] T2 = [] R = [] Exp_S = [] result = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) temp1 = lookupwordID(We, words, t1) temp2 = lookupwordID(We, words, t2) T1.append(temp1) T2.append(temp2) tp = rel[r.lower()] c = Rel[tp * relSize:tp * relSize + relSize, :] R.append(c) x1, x1_mask = tm.prepare_data(T1) x2, x2_mask = tm.prepare_data(T2) emb1 = tm.GetVector(x1, x1_mask) emb2 = tm.GetVector(x2, x2_mask) for j in range(len(R)): gv1 = emb1[j] gv2 = emb2[j] v_r = R[j] temp1 = np.dot(gv1, v_r) exp_score = np.inner(temp1, gv2) Exp_S.append(exp_score) right = 0 wrong = 0 threshold = 0 accurancy = 0 binaryScore = [] Exp_S_sorted = sorted(Exp_S) for j in xrange(len(Exp_S)): temp_thr = Exp_S_sorted[j] for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= temp_thr): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] <= temp_thr): right = right + 1 else: wrong = wrong + 1 if ((right / (len(Exp_S))) > accurancy): accurancy = (1.0 * right / (len(Exp_S))) threshold = temp_thr right = 0 wrong = 0 #print 'Dev1-Accurancy',accurancy return threshold, accurancy
def evaCOPA(evafile, words, We, rel, Rel, evaType, tm, relSize): f = open(evafile, 'r') lines = f.readlines() trueAns = [] q = [] alter1 = [] alter2 = [] causality = [] same = 0 diff = 0 totalScore1 = [] totalScore2 = [] for i in xrange(4, len(lines) - 1, 6): singleAns = lines[i][lines[i].find('alternative=') + 13:lines[i].find('>') - 1] trueAns.append(singleAns) if (lines[i].find('effect') != -1): causality.append(1) else: causality.append(0) singleq = lines[i + 1][lines[i + 1].find('<p>') + 3:lines[i + 1].find('</p>') - 1] temp0 = lookupwordID(We, words, singleq) q.append(temp0) singleAlter1 = lines[i + 2][lines[i + 2].find('<a1>') + 4:lines[i + 2].find('</a1>') - 1] temp1 = lookupwordID(We, words, singleAlter1) alter1.append(temp1) singleAlter2 = lines[i + 3][lines[i + 3].find('<a2>') + 4:lines[i + 3].find('</a2>') - 1] temp2 = lookupwordID(We, words, singleAlter2) alter2.append(temp2) idx = 0 while idx < len(q): qq = q[idx:idx + 100 if idx + 100 < len(q) else len(q)] alter1q = alter1[idx:idx + 100 if idx + 100 < len(q) else len(q)] alter2q = alter2[idx:idx + 100 if idx + 100 < len(q) else len(q)] causalityq = causality[idx:idx + 100 if idx + 100 < len(q) else len(q)] x0, x0_mask = tm.prepare_data(qq) x1, x1_mask = tm.prepare_data(alter1q) x2, x2_mask = tm.prepare_data(alter2q) emb0 = tm.GetVector(x0, x0_mask) emb1 = tm.GetVector(x1, x1_mask) emb2 = tm.GetVector(x2, x2_mask) for j in range(100): if (causalityq[j] == 0): scores1 = score(emb1[j], emb0[j], words, We, rel, Rel, relSize) scores2 = score(emb2[j], emb0[j], words, We, rel, Rel, relSize) if (evaType.lower() == 'max'): score1 = scores1[0] score2 = scores2[0] if (evaType.lower() == 'sum'): score1 = scores1[1] score2 = scores2[1] if (evaType.lower() == 'cause'): score1 = scores1[2] score2 = scores2[2] totalScore1.append(scores1) totalScore2.append(scores2) else: scores1 = score(emb0[j], emb1[j], words, We, rel, Rel, relSize) scores2 = score(emb0[j], emb2[j], words, We, rel, Rel, relSize) if (evaType.lower() == 'max'): score1 = scores1[0] score2 = scores2[0] if (evaType.lower() == 'sum'): score1 = scores1[1] score2 = scores2[1] if (evaType.lower() == 'cause'): score2 = scores1[2] score2 = scores2[2] totalScore1.append(scores1) totalScore2.append(scores2) if (score1 > score2): ans = 1 else: ans = 2 if (ans == int(trueAns[idx])): same = same + 1 else: diff = diff + 1 idx = idx + 100 # print 'totalScore1',len(totalScore1) # print 'totalScore2',len(totalScore2) # print 'trueAns',len(trueAns) #print same, diff return same / (same + diff), totalScore1, totalScore2, trueAns
def get_threshold(We, words, Rel, rel, tm, relSize, dev1_file): f = open(dev1_file, 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list t2batch = t2_list x1, x1_mask, x1_length = tm.prepare_data(t1batch) x2, x2_mask, x2_length = tm.prepare_data(t2batch) v1 = tm.GetVector(x1, x1_mask, x1_length) v2 = tm.GetVector(x2, x2_mask, x2_length) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()] * relSize:rel[r_list[j].lower()] * relSize + relSize, :] temp1 = np.dot(v1[j], v_r) exp_score = np.inner(temp1, v2[j]) Exp_S.append(exp_score) right = 0 wrong = 0 threshold = 0 accurancy = 0 binaryScore = [] Exp_S_sorted = sorted(Exp_S) for j in xrange(len(Exp_S)): temp_thr = Exp_S_sorted[j] for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= temp_thr): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < temp_thr): right = right + 1 else: wrong = wrong + 1 if ((right / (len(Exp_S))) > accurancy): accurancy = (right / (len(Exp_S))) threshold = temp_thr right = 0 wrong = 0 print 'Dev1-Accurancy', accurancy return threshold
def get_threshold(We, words, Rel, rel, tm, memsize, relSize, fin): f = open('../../commonsendata/Eval/conceptnet/new_omcs_dev1.txt', 'r') lines = f.readlines() Exp_S = [] r_list = [] t1_list = [] t2_list = [] for i in lines: i = i.strip() i = i.split('\t') (r, t1, t2, score) = (i[0].strip(), i[1].strip(), i[2].strip(), float(i[3])) t1id = lookupwordID(We, words, t1) t2id = lookupwordID(We, words, t2) t1_list.append(t1id) t2_list.append(t2id) r_list.append(r) t1batch = t1_list[0:len(lines)] t2batch = t2_list[0:len(lines)] print 't1batch: ', len(t1batch) delim = (lookupwordID(We, words, "#")) batchTuple = [a + delim + b for a, b in zip(t1batch, t2batch)] xx, xx_mask = tm.prepare_data(batchTuple) vector = tm.GetVector(xx, xx_mask) for j in range(len(lines)): v_r = Rel[rel[r_list[j].lower()], :].reshape((1, relSize)) vectorg = vector[j].reshape((1, memsize)) input_vec = np.concatenate((vectorg, v_r), axis=1) softmaxScore = tm.GetVectorNew(input_vec) Exp_S.append(softmaxScore[0][0]) right = 0 wrong = 0 threshold = 0 accurancy = 0 binaryScore = [] Exp_S_sorted = sorted(Exp_S) for j in xrange(len(Exp_S)): temp_thr = Exp_S_sorted[j] for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= temp_thr): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] < temp_thr): right = right + 1 else: wrong = wrong + 1 if ((right / (len(Exp_S))) > accurancy): accurancy = (right / (len(Exp_S))) threshold = temp_thr right = 0 wrong = 0 print 'Dev1-Accurancy', accurancy fin.write('Dev1-Accurancy' + str(accurancy) + "\n") return threshold