def whole_evaluation(self): e = Evaluation() sql = "SELECT score1, score2, score3, score4, score5 From experiment_system_user Where system_type = 1" self.cursor.execute(sql) w2v_score = [] w2v_scores = [] scores_list = self.cursor.fetchall() for score_list in scores_list: w2v_score.append(score_list[0]) w2v_score.append(score_list[1]) w2v_score.append(score_list[2]) w2v_score.append(score_list[3]) w2v_score.append(score_list[4]) w2v_scores.append(w2v_score) w2v_score = [] sql = "SELECT score1, score2, score3, score4, score5 From experiment_system_user Where system_type = 2" self.cursor.execute(sql) rstr_score = [] rstr_scores = [] scores_list = self.cursor.fetchall() for score_list in scores_list: rstr_score.append(score_list[0]) rstr_score.append(score_list[1]) rstr_score.append(score_list[2]) rstr_score.append(score_list[3]) rstr_score.append(score_list[4]) rstr_scores.append(rstr_score) rstr_score = [] print("w2v_scores:" + str(w2v_scores)) print("length:" + str(len(w2v_scores))) print("rstr_scores:" + str(rstr_scores)) print("length:" + str(len(rstr_scores))) w2v_ndcg = e.average_ndcg(w2v_scores) w2v_MAP = e.MAP(w2v_scores) rstr_ndcg = e.average_ndcg(rstr_scores) rstr_MAP = e.MAP(rstr_scores) print("\nNDCG:") print(w2v_ndcg) print(rstr_ndcg) plt.title("Evaluate System Performance NDCG@k") plt.xlabel("Top K Recommendation") plt.ylabel("NDCG@k") plt.plot(range(1, 6), w2v_ndcg, "-v", color='y', label="W2V") plt.plot(range(1, 6), rstr_ndcg, "-v", color='m', label="RSTR") plt.legend(loc="best") # save image plt.savefig('image/System_NDCG.png') plt.close() print("\nMAP:") print(w2v_MAP) print(rstr_MAP) plt.title("Evaluate System Performance MAP@k") plt.xlabel("Top K Recommendation") plt.ylabel("MAP@k") plt.plot(range(1, 6), w2v_MAP, "-v", color='y', label="W2V") plt.plot(range(1, 6), rstr_MAP, "-v", color='m', label="RSTR") plt.legend(loc="best") # save image plt.savefig('image/System_MAP.png') plt.close()
def evaluate(self, args, data, cnn): res = [] for idts, idbs, labels in data: xt = self.embedding.forward(idts.ravel()) xt = xt.reshape((idts.shape[0], idts.shape[1], self.embedding.n_d)) xb = self.embedding.forward(idbs.ravel()) xb = xb.reshape((idbs.shape[0], idbs.shape[1], self.embedding.n_d)) titles = Variable(torch.from_numpy(xt)).float() bodies = Variable(torch.from_numpy(xb)).float() if args.cuda: titles = titles.cuda() bodies = bodies.cuda() outputs = cnn(titles, bodies) pos = outputs[0].view(1, outputs[0].size(0)) scores = torch.mm(pos, outputs[1:].transpose(1, 0)).squeeze() if args.cuda: scores = scores.data.cpu().numpy() else: scores = scores.data.numpy() assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_labels = labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return MAP, MRR, P1, P5
def evaluate(self, data, session): # return for each query the labels, ranked results, and scores eval_func = self.score_func all_ranked_labels = [] all_ranked_ids = [] all_ranked_scores = [] query_ids = [] all_MAP, all_MRR, all_Pat1, all_Pat5 = [], [], [], [] for idts, idbs, labels, pid, qids in data: scores = eval_func(idts, idbs, session) assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_scores = np.array(scores)[ranks] ranked_labels = labels[ranks] ranked_ids = np.array(qids)[ranks] query_ids.append(pid) all_ranked_labels.append(ranked_labels) all_ranked_ids.append(ranked_ids) all_ranked_scores.append(ranked_scores) this_ev = Evaluation([ranked_labels]) all_MAP.append(this_ev.MAP()) all_MRR.append(this_ev.MRR()) all_Pat1.append(this_ev.Precision(1)) all_Pat5.append(this_ev.Precision(5)) print 'average all ... ', sum(all_MAP) / len(all_MAP), sum( all_MRR) / len(all_MRR), sum(all_Pat1) / len(all_Pat1), sum( all_Pat5) / len(all_Pat5) return all_MAP, all_MRR, all_Pat1, all_Pat5, all_ranked_labels, all_ranked_ids, query_ids, all_ranked_scores
def evaluate(self, data, sess): res = [] all_labels = [] all_scores = [] sample = 0 for idts, idbs, id_labels in data: sample += 1 cur_scores = self.eval_batch(idts, idbs, sess) assert len(id_labels) == len(cur_scores) # equal to 20 all_labels.append(id_labels) all_scores.append(cur_scores) ranks = (-cur_scores).argsort() ranked_labels = id_labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() MRR = e.MRR() P1 = e.Precision(1) P5 = e.Precision(5) if 'mlp_dim' in self.args and self.args.mlp_dim != 0: loss1 = dev_entropy_loss(all_labels, all_scores) else: loss1 = devloss1(all_labels, all_scores) loss0 = devloss0(all_labels, all_scores) loss2 = devloss2(all_labels, all_scores) return MAP, MRR, P1, P5, loss0, loss1, loss2
def evaluate(all_ranked_labels): evaluator = Evaluation(all_ranked_labels) MAP = evaluator.MAP()*100 MRR = evaluator.MRR()*100 P1 = evaluator.Precision(1)*100 P5 = evaluator.Precision(5)*100 return MAP, MRR, P1, P5
def evaluate(data, labels, model): res = [ ] model.eval() res = compute_scores(data, labels, model) evaluation = Evaluation(res) MAP = evaluation.MAP()*100 MRR = evaluation.MRR()*100 P1 = evaluation.Precision(1)*100 P5 = evaluation.Precision(5)*100 print MAP, MRR, P1, P5 return MAP, MRR, P1, P5
def evaluate(self, data, eval_func): res = [] for idts, idbs, labels in data: scores = eval_func(idts, idbs) assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_labels = labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return MAP, MRR, P1, P5
def evaluate(self, data, eval_func): res = [] for idts, idbs, labels, weights in data: qq_query_weights = weights[0] individual_scores = [] individual_scores_weights = [] # for every (original and generated) query question for i, qq_query_weight in enumerate(qq_query_weights): idts_t = idts.transpose() # score all other question titles and generated questions idts_individual = np.array( [idts_t[i]] + idts_t[len(qq_query_weights):].tolist(), dtype=np.int32).transpose() # now we will add all scores scores_for_qq = eval_func(idts_individual) for j, cq_weights in enumerate(weights[1:]): if len(individual_scores) == j: individual_scores.append([]) individual_scores_weights.append([]) individual_scores[j] += scores_for_qq[:len(cq_weights )].tolist() individual_scores_weights[j] += [ cq_weight * qq_query_weight for cq_weight in cq_weights ] scores_for_qq = scores_for_qq[len(cq_weights):] # now we determine the weights scores = [] for individual_scores_item, individual_scores_weights_item in zip( individual_scores, individual_scores_weights): scores.append( np.average(individual_scores_item, weights=individual_scores_weights_item)) assert len(scores) == len(labels) scores = np.array(scores) ranks = (-scores).argsort() ranked_labels = labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return MAP, MRR, P1, P5
def evaluate(self, data, eval_func): res = [] for t, b, labels in data: idts, idbs = myio.create_one_batch(t, b, self.padding_id) scores = eval_func(idts) #assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_labels = labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return MAP, MRR, P1, P5
def on_test_epoch_end(self): print("Calculating test accuracy...") vacc = self.testaccuracy.compute() e = Evaluation(self.eval_res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 # print(e) print("Test accuracy:", vacc), print(MAP, MRR, P1, P5) self.log('test_acc_epoch', vacc) self.log('t_MAP', MAP) self.log('t_Mrr', MRR) self.log('t_p1', P1) self.log('t_p5', P5) return vacc, MAP, MRR, P1, P5
def individual_evaluation(self): e = Evaluation() sql = "SELECT article_id, score1, score2, score3, score4, score5 From experiment_system_user Where system_type = 2" self.cursor.execute(sql) e2v_score = [] e2v_kinship_scores = [] e2v_romantic_scores = [] e2v_friendship_scores = [] e2v_teacher_student_scores = [] e2v_business_scores = [] e2v_others_scores = [] scores_list = self.cursor.fetchall() for score_list in scores_list: sql = "SELECT relationship_type From articles Where id = " + str( score_list[0]) self.cursor.execute(sql) relationship_type = self.cursor.fetchone() relationship_type = relationship_type[0] print("article_id:" + str(score_list[0]) + " relationship_type:" + str(relationship_type)) if relationship_type == '1': e2v_score.append(score_list[1]) e2v_score.append(score_list[2]) e2v_score.append(score_list[3]) e2v_score.append(score_list[4]) e2v_score.append(score_list[5]) e2v_kinship_scores.append(e2v_score) e2v_score = [] if relationship_type == '2': e2v_score.append(score_list[1]) e2v_score.append(score_list[2]) e2v_score.append(score_list[3]) e2v_score.append(score_list[4]) e2v_score.append(score_list[5]) e2v_romantic_scores.append(e2v_score) e2v_score = [] if relationship_type == '3': e2v_score.append(score_list[1]) e2v_score.append(score_list[2]) e2v_score.append(score_list[3]) e2v_score.append(score_list[4]) e2v_score.append(score_list[5]) e2v_friendship_scores.append(e2v_score) e2v_score = [] if relationship_type == '4': e2v_score.append(score_list[1]) e2v_score.append(score_list[2]) e2v_score.append(score_list[3]) e2v_score.append(score_list[4]) e2v_score.append(score_list[5]) e2v_teacher_student_scores.append(e2v_score) e2v_score = [] if relationship_type == '5': e2v_score.append(score_list[1]) e2v_score.append(score_list[2]) e2v_score.append(score_list[3]) e2v_score.append(score_list[4]) e2v_score.append(score_list[5]) e2v_business_scores.append(e2v_score) e2v_score = [] if relationship_type == '6': e2v_score.append(score_list[1]) e2v_score.append(score_list[2]) e2v_score.append(score_list[3]) e2v_score.append(score_list[4]) e2v_score.append(score_list[5]) e2v_others_scores.append(e2v_score) e2v_score = [] print("kinship:" + str(e2v_kinship_scores)) print("length:" + str(len(e2v_kinship_scores))) print("remantic:" + str(e2v_romantic_scores)) print("length:" + str(len(e2v_romantic_scores))) print("friendship:" + str(e2v_friendship_scores)) print("length:" + str(len(e2v_friendship_scores))) print("teacher student:" + str(e2v_teacher_student_scores)) print("length:" + str(len(e2v_teacher_student_scores))) print("business:" + str(e2v_business_scores)) print("length:" + str(len(e2v_business_scores))) print("others:" + str(e2v_others_scores)) print("length:" + str(len(e2v_others_scores))) e2v_kinship_ndcg = e.average_ndcg(e2v_kinship_scores) e2v_romantic_ndcg = e.average_ndcg(e2v_romantic_scores) e2v_friendship_ndcg = e.average_ndcg(e2v_friendship_scores) e2v_teacher_student_ndcg = e.average_ndcg(e2v_teacher_student_scores) e2v_business_ndcg = e.average_ndcg(e2v_business_scores) e2v_others_ndcg = e.average_ndcg(e2v_others_scores) e2v_kinship_MAP = e.MAP(e2v_kinship_scores) e2v_romantic_MAP = e.MAP(e2v_romantic_scores) e2v_friendship_MAP = e.MAP(e2v_friendship_scores) e2v_teacher_student_MAP = e.MAP(e2v_teacher_student_scores) e2v_business_MAP = e.MAP(e2v_business_scores) e2v_others_MAP = e.MAP(e2v_others_scores) plt.title("Evaluate Different Relationship Performance NDCG@k") plt.xlabel("Top K Recommendation") plt.ylabel("NDCG@k") plt.plot(range(1, 6), e2v_kinship_ndcg, "-v", color='y', label="kinship") plt.plot(range(1, 6), e2v_romantic_ndcg, "-v", color='m', label="romantic relationship") plt.plot(range(1, 6), e2v_friendship_ndcg, "-v", color='g', label="friendship") plt.plot(range(1, 6), e2v_teacher_student_ndcg, "-v", color='b', label="teacher student relationship") plt.plot(range(1, 6), e2v_business_ndcg, "-v", color='c', label="business relationship") plt.plot(range(1, 6), e2v_others_ndcg, "-v", color='k', label="others") plt.legend(loc="best") # save image plt.savefig('image/Relationship_NDCG.png') plt.close() plt.title("Evaluate Different Relationship Performance MAP@k") plt.xlabel("Top K Recommendation") plt.ylabel("MAP@k") plt.plot(range(1, 6), e2v_kinship_MAP, "-v", color='y', label="kinship") plt.plot(range(1, 6), e2v_romantic_MAP, "-v", color='m', label="romantic relationship") plt.plot(range(1, 6), e2v_friendship_MAP, "-v", color='g', label="friendship") plt.plot(range(1, 6), e2v_teacher_student_MAP, "-v", color='b', label="teacher student relationship") plt.plot(range(1, 6), e2v_business_MAP, "-v", color='c', label="business relationship") plt.plot(range(1, 6), e2v_others_MAP, "-v", color='k', label="others") plt.legend(loc="best") # save image plt.savefig('image/Relationship_MAP.png') plt.close()
def evaluate_z(self, data, data_raw, ids_corpus, zeval_func, dump_path=None): args = self.args padding_id = self.padding_id tot_p1 = 0.0 portion_title = 0.0 tot_selected = 0.0 res = [] output_data = [] for i in range(len(data)): idts, labels = data[i] pid, qids, _ = data_raw[i] scores, p1, z = zeval_func(idts) assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_labels = labels[ranks] res.append(ranked_labels) tot_p1 += p1 for wids_i, z_i, question_id in zip(idts.T, z.T, [pid] + qids): z2_i = [ zv for wid, zv in zip(wids_i, z_i) if wid != padding_id ] title, body = ids_corpus[question_id] #portion_title += sum(z2_i[:len(title)]) if args.merge == 1 or question_id % 2 == 0: portion_title += sum(z2_i[:len(title)]) else: portion_title += sum(z2_i[-len(title):]) tot_selected += sum(z2_i) if dump_path is not None: output_data.append(("Query: ", idts[:, 0], z[:, 0], pid)) for id in ranks[:3]: output_data.append(("Retrieved: {} label={}".format( scores[id], labels[id]), idts[:, id + 1], z[:, id + 1], qids[id])) if dump_path is not None: embedding_layer = self.embedding_layer padding = "<padding>" filter_func = lambda w: w != padding with open(dump_path, "w") as fout: for heading, wordids, z, question_id in output_data: words = embedding_layer.map_to_words(wordids) fout.write(heading + "\tID: {}\n".format(question_id)) fout.write(" " + " ".join(filter(filter_func, words)) + "\n") fout.write("------------\n") fout.write("Rationale:\n") fout.write(" " + " ".join(w if zv == 1 else "__" for w, zv in zip(words, z) if w != padding) + "\n") fout.write("\n\n") e = Evaluation(res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return MAP, MRR, P1, P5, tot_p1 / len(data), portion_title / ( tot_selected + 1e-8)