def sim_getCorrelation(We, words, f, weight4ind, scoring_function, params): f = open(f, 'r') lines = f.readlines() golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[0] p2 = i[1] score = float(i[2]) X1, X2 = data_io.getSeqs(p1, p2, words) seq1.append(X1) seq2.append(X2) golds.append(score) x1, m1 = data_io.prepare_data(seq1) x2, m2 = data_io.prepare_data(seq2) m1 = data_io.seq2weight(x1, m1, weight4ind) m2 = data_io.seq2weight(x2, m2, weight4ind) scores = scoring_function(We, x1, x2, m1, m2, params) print seq1[0] print seq2[0] print scores[0] preds = np.squeeze(scores) return pearsonr(preds, golds)[0], spearmanr(preds, golds)[0]
def sim_badSents(We, words, weight4ind, scoring_function, params, fpc, sent1, sent2): seq1 = [] seq2 = [] X1, X2 = data_io.getSeqs(sent1, sent2, words) seq1.append(X1) seq2.append(X2) x1, m1 = data_io.prepare_data(seq1) x2, m2 = data_io.prepare_data(seq2) m1 = data_io.seq2weight(x1, m1, weight4ind) m2 = data_io.seq2weight(x2, m2, weight4ind) scores = scoring_function(We, x1, x2, m1, m2, params, fpc) preds = np.squeeze(scores) preds = preds * 2 + 3 return preds
def sim_getCorrelation(We, words, f, weight4ind, scoring_function, params, fpc, test_name): f = open(f, 'r') lines = f.readlines() golds = [] seq1 = [] seq2 = [] index = [] idx = 0 for i in lines: i = i.split("\t") p1 = i[0] p2 = i[1] score = float(i[2]) X1, X2 = data_io.getSeqs(p1, p2, words) seq1.append(X1) seq2.append(X2) golds.append(score) index.append(idx) idx += 1 x1, m1 = data_io.prepare_data(seq1) x2, m2 = data_io.prepare_data(seq2) m1 = data_io.seq2weight(x1, m1, weight4ind) m2 = data_io.seq2weight(x2, m2, weight4ind) golds = np.asarray(golds) scores = scoring_function(We, x1, x2, m1, m2, params, fpc) # scores = scoring_function(We, x1, x2, m1, m2, golds, params, fpc) # preds = np.squeeze(scores).reshape(-1, 1) preds = np.squeeze(scores) # print('the prediction list is {}'.format(preds)) # add SVM predictor # clf = pickle.load(open('../score_predictor/model_svm', 'rb')) # clf.fit(preds, golds) # preds = clf.predict(preds) print(preds) # np.save(open("../pred_list", 'wb'), preds) # np.save(open("../gold_list", 'wb'), golds) # show_result_image(preds, golds, index, fpc, test_name) # find_bad_scores(preds.tolist(), lower_threshold=2.5, higher_threshold=3.8) MSE = sqrt(mean_squared_error(golds, preds)) return pearsonr(preds, golds)[0], MSE
def getCorrelation(model,words,f, params=[]): f = open(f,'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] for i in lines: i = i.split("\t") p1 = i[0]; p2 = i[1]; score = float(i[2]) X1, X2 = data_io.getSeqs(p1,p2,words) seq1.append(X1) seq2.append(X2) golds.append(score) x1,m1 = data_io.prepare_data(seq1) x2,m2 = data_io.prepare_data(seq2) if params and params.weightfile: m1 = data_io.seq2weight(x1, m1, params.weight4ind) m2 = data_io.seq2weight(x2, m2, params.weight4ind) scores = model.scoring_function(x1,x2,m1,m2) preds = np.squeeze(scores) return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
def getAcc(model, words, f, params=[]): f = open(f, 'r') lines = f.readlines() preds = [] golds = [] seq1 = [] seq2 = [] ct = 0 for i in lines: i = i.split("\t") p1 = i[0] p2 = i[1] score = i[2] X1, X2 = data_io.getSeqs(p1, p2, words) seq1.append(X1) seq2.append(X2) ct += 1 if ct % 100 == 0: x1, m1 = data_io.prepare_data(seq1) x2, m2 = data_io.prepare_data(seq2) if params and params.weightfile: m1 = data_io.seq2weight(x1, m1, params.weight4ind) m2 = data_io.seq2weight(x2, m2, params.weight4ind) scores = model.scoring_function(x1, x2, m1, m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) seq1 = [] seq2 = [] golds.append(score) if len(seq1) > 0: x1, m1 = data_io.prepare_data(seq1) x2, m2 = data_io.prepare_data(seq2) if params and params.weightfile: m1 = data_io.seq2weight(x1, m1, params.weight4ind) m2 = data_io.seq2weight(x2, m2, params.weight4ind) scores = model.scoring_function(x1, x2, m1, m2) scores = np.squeeze(scores) preds.extend(scores.tolist()) return acc(preds, golds)
def sim_getCorrelation1(We, words, file_index, weight4ind, scoring_function, params): f = open(file_index[0], 'r') #print(f) line = f.readlines() lines = [lin for lin in line] f = open(file_index[1], 'r') #print(f) score_line = f.readlines() score_lines = [score for score in score_line] golds = [] seq1 = [] seq2 = [] for index in range(len(lines)): i = lines[index] j = score_lines[index] i = i.split("\t") #print(i) #print(i) p1 = i[0].lower() p2 = i[1].lower() try: score = float(j) X1, X2 = data_io.getSeqs(p1, p2, words) seq1.append(X1) seq2.append(X2) golds.append(score) except: pass x1, m1 = data_io.prepare_data(seq1) x2, m2 = data_io.prepare_data(seq2) m1 = data_io.seq2weight(x1, m1, weight4ind) m2 = data_io.seq2weight(x2, m2, weight4ind) # print(x1,x2,m1,m2) # print(x1.shape,x2.shape,m1.shape,m2.shape) scores = scoring_function(We, x1, x2, m1, m2, params) preds = np.squeeze(scores) return pearsonr(preds, golds)[0]