def evaluate(model, data, dev=False): model.eval() labels_list = {} for step, one_data in enumerate(data): label = one_data[-1].data.numpy() predictions = model(one_data[0]) if torch.cuda.is_available(): # run in GPU pred_label = predictions.cpu().data.numpy() uc_pairs = one_data[0].cpu().data.numpy() else: pred_label = predictions.data.numpy() uc_pairs = one_data[0].data.numpy() for n in xrange(len(label)): try: labels_list[uc_pairs[n, 0]].append((pred_label[n], label[n])) except KeyError: labels_list[uc_pairs[n, 0]] = [(pred_label[n], label[n])] res_map = cal_map(labels_list) if dev: return res_map else: res_p1 = cal_precision_N(labels_list, 1) res_p5 = cal_precision_N(labels_list, 5) res_ndcg5 = cal_ndcg_all(labels_list, 5) res_ndcg10 = cal_ndcg_all(labels_list, 10) return res_map, res_p1, res_p5, res_ndcg5, res_ndcg10
def validate(model, data): dataloader = DataLoader(data, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0) model.eval() model.prepare_test() scorelist = {} index = 0 for dialog_users, dialog_sentences, dialog_sentence_lens, dialog_lens, indices in dataloader: dialog_users = dialog_users.cuda( non_blocking=True) # [batch_size, dialog_len] dialog_sentences = dialog_sentences.cuda( non_blocking=True) # [batch_size, dialog_len, sentence_len] dialog_sentence_lens = dialog_sentence_lens.cuda( non_blocking=True) # [batch_size, dialog_len] sample_indices = np.zeros([indices.size(0), 128], dtype=np.int64) for i in range(len(indices)): _index = indices[i] _len = len(data.targets[_index]) sample_set = set() for j in range(_len): sample_indices[i, j] = data.targets[_index][j] sample_set.add(data.targets[_index][j]) for j in range(_len, _len + 100): while True: rand_index = random.randint(0, user_num - 1) if rand_index not in sample_set: sample_indices[i, j] = rand_index sample_set.add(rand_index) break sample_indices_tensor = torch.from_numpy(sample_indices).cuda() probs = model(dialog_users, dialog_sentences, dialog_sentence_lens, dialog_lens, sample_indices_tensor) # [batch_size] probs = probs.tolist() for i in range(len(indices)): temp = [] _index = indices[i] _len = len(data.targets[_index]) for j in range(_len): temp.append((probs[i][j], 1)) for j in range(_len, _len + 100): temp.append((probs[i][j], 0)) scorelist[index] = temp index += 1 MAP = rank_eval.cal_map(scorelist) Precision_1 = rank_eval.cal_precision_N(scorelist, 1) Precision_5 = rank_eval.cal_precision_N(scorelist, 5) NDCG_10 = rank_eval.cal_ndcg_all(scorelist, 10) MRR = rank_eval.cal_mrr(scorelist) print("MAP :", MAP) print("precision@1 :", Precision_1) print("precision@5 :", Precision_5) print("nDCG@10 :", NDCG_10) print("MRR :", MRR) return MAP, Precision_1, Precision_5, NDCG_10, MRR
def test_best_model(result_dict, metric): assert len(result_dict) > 0 best_model_id = 0 best_value = 0 for k in result_dict: if result_dict[k] > best_value: best_model_id = k best_value = result_dict[k] model = RRN(user_num, vocabulary_size, max_sentence_len) model.load_state_dict( torch.load(os.path.join(model_dir, model_prefix + str(best_model_id)), map_location=torch.device('cuda'))['model_state_dict']) model.cuda() test_data = RecommendationData(test_data_file[dataset], 'test', test_dialogtag_id, username_id, stoi, max_sentence_len) dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0) model.eval() model.prepare_test() scorelist = {} index = 0 for dialog_users, dialog_sentences, dialog_sentence_lens, dialog_lens, indices in dataloader: dialog_users = dialog_users.cuda( non_blocking=True) # [batch_size, dialog_len] dialog_sentences = dialog_sentences.cuda( non_blocking=True) # [batch_size, dialog_len, sentence_len] dialog_sentence_lens = dialog_sentence_lens.cuda( non_blocking=True) # [batch_size, dialog_len] sample_indices = np.zeros([indices.size(0), 128], dtype=np.int64) for i in range(len(indices)): _index = indices[i] _len = len(test_data.targets[_index]) sample_set = set() for j in range(_len): sample_indices[i, j] = test_data.targets[_index][j] sample_set.add(test_data.targets[_index][j]) for j in range(_len, _len + 100): while True: rand_index = random.randint(0, user_num - 1) if rand_index not in sample_set: sample_indices[i, j] = rand_index sample_set.add(rand_index) break sample_indices_tensor = torch.from_numpy(sample_indices).cuda() probs = model(dialog_users, dialog_sentences, dialog_sentence_lens, dialog_lens, sample_indices_tensor) # [batch_size] probs = probs.tolist() for i in range(len(indices)): temp = [] _index = indices[i] _len = len(test_data.targets[_index]) for j in range(_len): temp.append((probs[i][j], 1)) for j in range(_len, _len + 100): temp.append((probs[i][j], 0)) scorelist[index] = temp index += 1 MAP = rank_eval.cal_map(scorelist) Precision_1 = rank_eval.cal_precision_N(scorelist, 1) Precision_5 = rank_eval.cal_precision_N(scorelist, 5) NDCG_10 = rank_eval.cal_ndcg_all(scorelist, 10) MRR = rank_eval.cal_mrr(scorelist) print('Best model on ' + metric + ' : ' + model_prefix + str(best_model_id)) print(metric + ' on validation : ' + str(best_value)) print("MAP :", MAP) print("precision@1 :", Precision_1) print("precision@5 :", Precision_5) print("nDCG@10 :", NDCG_10) print("MRR :", MRR) return MAP, Precision_1, Precision_5, NDCG_10, MRR