예제 #1
0
def evaluate(model, data, dev=False):
    model.eval()
    labels_list = {}
    for step, one_data in enumerate(data):
        label = one_data[-1].data.numpy()
        predictions = model(one_data[0])
        if torch.cuda.is_available():  # run in GPU
            pred_label = predictions.cpu().data.numpy()
            uc_pairs = one_data[0].cpu().data.numpy()
        else:
            pred_label = predictions.data.numpy()
            uc_pairs = one_data[0].data.numpy()
        for n in xrange(len(label)):
            try:
                labels_list[uc_pairs[n, 0]].append((pred_label[n], label[n]))
            except KeyError:
                labels_list[uc_pairs[n, 0]] = [(pred_label[n], label[n])]
    res_map = cal_map(labels_list)
    if dev:
        return res_map
    else:
        res_p1 = cal_precision_N(labels_list, 1)
        res_p5 = cal_precision_N(labels_list, 5)
        res_ndcg5 = cal_ndcg_all(labels_list, 5)
        res_ndcg10 = cal_ndcg_all(labels_list, 10)
        return res_map, res_p1, res_p5, res_ndcg5, res_ndcg10
예제 #2
0
def validate(model, data):
    dataloader = DataLoader(data,
                            batch_size=batch_size,
                            shuffle=False,
                            drop_last=False,
                            num_workers=0)
    model.eval()
    model.prepare_test()
    scorelist = {}
    index = 0
    for dialog_users, dialog_sentences, dialog_sentence_lens, dialog_lens, indices in dataloader:
        dialog_users = dialog_users.cuda(
            non_blocking=True)  # [batch_size, dialog_len]
        dialog_sentences = dialog_sentences.cuda(
            non_blocking=True)  # [batch_size, dialog_len, sentence_len]
        dialog_sentence_lens = dialog_sentence_lens.cuda(
            non_blocking=True)  # [batch_size, dialog_len]
        sample_indices = np.zeros([indices.size(0), 128], dtype=np.int64)
        for i in range(len(indices)):
            _index = indices[i]
            _len = len(data.targets[_index])
            sample_set = set()
            for j in range(_len):
                sample_indices[i, j] = data.targets[_index][j]
                sample_set.add(data.targets[_index][j])
            for j in range(_len, _len + 100):
                while True:
                    rand_index = random.randint(0, user_num - 1)
                    if rand_index not in sample_set:
                        sample_indices[i, j] = rand_index
                        sample_set.add(rand_index)
                        break
        sample_indices_tensor = torch.from_numpy(sample_indices).cuda()
        probs = model(dialog_users, dialog_sentences, dialog_sentence_lens,
                      dialog_lens, sample_indices_tensor)  # [batch_size]
        probs = probs.tolist()
        for i in range(len(indices)):
            temp = []
            _index = indices[i]
            _len = len(data.targets[_index])
            for j in range(_len):
                temp.append((probs[i][j], 1))
            for j in range(_len, _len + 100):
                temp.append((probs[i][j], 0))
            scorelist[index] = temp
            index += 1
    MAP = rank_eval.cal_map(scorelist)
    Precision_1 = rank_eval.cal_precision_N(scorelist, 1)
    Precision_5 = rank_eval.cal_precision_N(scorelist, 5)
    NDCG_10 = rank_eval.cal_ndcg_all(scorelist, 10)
    MRR = rank_eval.cal_mrr(scorelist)
    print("MAP :", MAP)
    print("precision@1 :", Precision_1)
    print("precision@5 :", Precision_5)
    print("nDCG@10 :", NDCG_10)
    print("MRR :", MRR)
    return MAP, Precision_1, Precision_5, NDCG_10, MRR
예제 #3
0
def test_best_model(result_dict, metric):
    assert len(result_dict) > 0
    best_model_id = 0
    best_value = 0
    for k in result_dict:
        if result_dict[k] > best_value:
            best_model_id = k
            best_value = result_dict[k]
    model = RRN(user_num, vocabulary_size, max_sentence_len)
    model.load_state_dict(
        torch.load(os.path.join(model_dir, model_prefix + str(best_model_id)),
                   map_location=torch.device('cuda'))['model_state_dict'])
    model.cuda()
    test_data = RecommendationData(test_data_file[dataset], 'test',
                                   test_dialogtag_id, username_id, stoi,
                                   max_sentence_len)
    dataloader = DataLoader(test_data,
                            batch_size=batch_size,
                            shuffle=False,
                            drop_last=False,
                            num_workers=0)
    model.eval()
    model.prepare_test()
    scorelist = {}
    index = 0
    for dialog_users, dialog_sentences, dialog_sentence_lens, dialog_lens, indices in dataloader:
        dialog_users = dialog_users.cuda(
            non_blocking=True)  # [batch_size, dialog_len]
        dialog_sentences = dialog_sentences.cuda(
            non_blocking=True)  # [batch_size, dialog_len, sentence_len]
        dialog_sentence_lens = dialog_sentence_lens.cuda(
            non_blocking=True)  # [batch_size, dialog_len]
        sample_indices = np.zeros([indices.size(0), 128], dtype=np.int64)
        for i in range(len(indices)):
            _index = indices[i]
            _len = len(test_data.targets[_index])
            sample_set = set()
            for j in range(_len):
                sample_indices[i, j] = test_data.targets[_index][j]
                sample_set.add(test_data.targets[_index][j])
            for j in range(_len, _len + 100):
                while True:
                    rand_index = random.randint(0, user_num - 1)
                    if rand_index not in sample_set:
                        sample_indices[i, j] = rand_index
                        sample_set.add(rand_index)
                        break
        sample_indices_tensor = torch.from_numpy(sample_indices).cuda()
        probs = model(dialog_users, dialog_sentences, dialog_sentence_lens,
                      dialog_lens, sample_indices_tensor)  # [batch_size]
        probs = probs.tolist()
        for i in range(len(indices)):
            temp = []
            _index = indices[i]
            _len = len(test_data.targets[_index])
            for j in range(_len):
                temp.append((probs[i][j], 1))
            for j in range(_len, _len + 100):
                temp.append((probs[i][j], 0))
            scorelist[index] = temp
            index += 1
    MAP = rank_eval.cal_map(scorelist)
    Precision_1 = rank_eval.cal_precision_N(scorelist, 1)
    Precision_5 = rank_eval.cal_precision_N(scorelist, 5)
    NDCG_10 = rank_eval.cal_ndcg_all(scorelist, 10)
    MRR = rank_eval.cal_mrr(scorelist)
    print('Best model on ' + metric + ' : ' + model_prefix +
          str(best_model_id))
    print(metric + ' on validation : ' + str(best_value))
    print("MAP :", MAP)
    print("precision@1 :", Precision_1)
    print("precision@5 :", Precision_5)
    print("nDCG@10 :", NDCG_10)
    print("MRR :", MRR)
    return MAP, Precision_1, Precision_5, NDCG_10, MRR