Beispiel #1
0
    def compute(self, config):

        #         'train_file': self.train_paths[i],
        #                  'test_file': self.test_paths[i],
        #                  'output_file': self.pred_paths[i],
        train_kwargs = {
            'sep': self.sep_write,
            'model': config['type'],
            'config': config
        }

        if self.predictor == 'rating':

            results = {'MAE': [], 'RMSE': []}

            for i in range(self.n_splits):

                eval_args = {self.pred_paths[i], self.test_paths[i]}

                RatingModel(train_file=self.train_paths[i],
                            test_file=self.test_paths[i],
                            output_file=self.pred_paths[i],
                            **train_kwargs)

                this_result = RatingPredictionEvaluation(
                    verbose=False).evaluate_with_files(*eval_args)

                results['MAE'].append(this_result['MAE'])
                results['RMSE'].append(this_result['RMSE'])

                print("Split {} Loss: {}".format(i,
                                                 results[self.eval_metric][i]))

                if i > self.early_stop_split:
                    if mean(results[self.eval_metric]) > self.best_loss:
                        break

        else:

            train_kwargs['rank_length'] = self.rank_length
            results = {self.eval_metric: []}

            for i in range(self.n_splits):

                eval_args = {
                    'prediction_file': self.pred_paths[i],
                    'test_file': self.test_paths[i]
                }
                #                 print(eval_args)
                #                 print(train_kwargs)
                ItemModel(train_file=self.train_paths[i],
                          test_file=self.test_paths[i],
                          output_file=self.pred_paths[i],
                          **train_kwargs)
                this_result = ItemRecommendationEvaluation(
                    verbose=False).evaluate_with_files(**eval_args)
                results[self.eval_metric].append(this_result[self.eval_metric])

                print("Split {} Loss: {}".format(i,
                                                 results[self.eval_metric][i]))

                if i > self.early_stop_split and self.iteration > 0:
                    if max(results[self.eval_metric]) < self.best_loss:
                        break
        return results
Beispiel #2
0
    def evaluate(self, predictions, test_set):
        """
        Method to calculate all the metrics for item recommendation scenario using dictionaries of ranking
        and test set. Use read() in ReadFile to transform your prediction and test files in a dict

        :param predictions: Dict of predictions
        :type predictions: dict

        :param test_set: Dictionary with test set information.
        :type test_set: dict

        :return: Dictionary with all evaluation metrics and results
        :rtype: dict

        """

        eval_results = {}
        predictions_list = []
        test_list = []

        if not self.as_rank:
            # Create All but one set, selecting only one sample from the test set for each user
            if self.all_but_one_eval:
                for user in test_set['users']:
                    # select a random item
                    item = random.choice(test_set['feedback'][user])
                    test_set['feedback'][user] = {
                        item: test_set['feedback'][user][item]
                    }

            for user in predictions:
                for item in predictions[user]:
                    rui_predict = predictions[user][item]
                    rui_test = test_set["feedback"].get(user,
                                                        {}).get(item, np.nan)
                    if not np.isnan(rui_test):
                        predictions_list.append(rui_predict)
                        test_list.append(float(rui_test))

            eval_results.update({
                'MAE':
                round(mean_absolute_error(test_list, predictions_list), 6),
                'RMSE':
                round(np.sqrt(mean_squared_error(test_list, predictions_list)),
                      6)
            })

            if self.verbose:
                self.print_results(eval_results)

        else:
            new_predict_set = []
            new_test_set = {}

            for user in predictions:
                partial_predictions = []
                for item in predictions[user]:

                    if predictions[user][item] > 3:
                        partial_predictions.append(
                            [user, item, predictions[user][item]])

                    if test_set["feedback"].get(user, {}).get(item, 0) > 3:
                        new_test_set.setdefault(user, []).append(item)

                partial_predictions = sorted(partial_predictions,
                                             key=lambda x: -x[2])
                new_predict_set += partial_predictions

            new_test_set['items_seen_by_user'] = new_test_set
            new_test_set['users'] = test_set['users']

            ItemRecommendationEvaluation(
                n_ranks=self.n_rank,
                all_but_one_eval=self.all_but_one_eval).evaluate_recommender(
                    new_predict_set, new_test_set)

        return eval_results
Beispiel #3
0
 def evaluate(self, measures):
     res = ItemRecommendationEvaluation().evaluation_ranking(self.ranking, self.test_file)
     evaluation = 'Eval:: '
     for measure in measures:
         evaluation += measure + ': ' + str(res[measure]) + ' '
     print(evaluation)
def case_rec_evaluate(FLAGS,
                      model,
                      eval_iter,
                      eval_dict,
                      all_dicts,
                      logger,
                      i,
                      eval_descending=True,
                      is_report=False):

    # Evaluate
    total_batches = len(eval_iter)
    # processing bar
    pbar = tqdm(total=total_batches)
    pbar.set_description("Run Eval")

    model.eval()
    model.disable_grad()

    results = []
    for u_ids in eval_iter:
        u_var = to_gpu(V(torch.LongTensor(u_ids)))
        # batch * item
        scores = model.evaluate(u_var)
        preds = zip(u_ids, scores.data.cpu().numpy())

        results.extend(
            evalRecProcess(list(preds),
                           eval_dict,
                           all_dicts=all_dicts,
                           descending=eval_descending,
                           num_processes=FLAGS.num_processes,
                           topn=FLAGS.topn,
                           queue_limit=FLAGS.max_queue))

        pbar.update(1)
    pbar.close()

    predictions = [result[5] for result in results
                   ]  # [(pred[0], top_ids, gold), ...], gold is test
    print("Saving predictions. Size: {}.".format(str(len(predictions))))

    predictions_output_filepath = os.path.join(
        FLAGS.log_path, FLAGS.experiment_name + '_pred.dat')
    print_list = []
    for triple in predictions:
        u_id = triple[0]
        top_ids = triple[1]
        #gold = triple[2]
        for i_id in top_ids:
            score = 1.0 / (top_ids.index(i_id) + 1)
            print_list.append((u_id, i_id, score))
    WriteFile(predictions_output_filepath, data=print_list, sep='\t').write()

    # Using CaseRecommender ReadFile class to read test_set from file
    dataset_path = os.path.join(FLAGS.data_path, FLAGS.dataset)
    eval_files = FLAGS.rec_test_files.split(':')
    test_path = os.path.join(dataset_path, eval_files[i])
    eval_data = ReadFile(input_file=test_path).read()
    predictions_data = ReadFile(input_file=predictions_output_filepath).read()
    print("Reading predictions. Size: {}.".format(
        str(len(predictions_data['feedback']))))

    # Creating CaseRecommender evaluator with item-recommendation parameters
    evaluator = ItemRecommendationEvaluation(n_ranks=[10])
    item_rec_metrics = evaluator.evaluate(predictions_data['feedback'],
                                          eval_data)
    print("From CaseRecommender evaluator: {}.".format(str(item_rec_metrics)))
    logger.info("From CaseRecommender evaluator: {}.".format(
        str(item_rec_metrics)))

    # Creating kg-summ-rec evaluator with diversity parameters
    dataset_name = os.path.basename(
        os.path.dirname(os.path.dirname(FLAGS.log_path)))
    tags = dataset_name.split('_')
    if tags[0] == 'ml-sun':
        evaluator2 = DiversityEvaluation(n_ranks=[10])
        dataset_path = os.path.normpath(FLAGS.data_path + os.sep + os.pardir)
        #tags = dataset_name.split('-')
        #if len(tags) > 2:
        #    mode = dataset_name.split('-')[2]
        #    ratio = dataset_name.split('-')[4]
        #else:
        #    mode = 'sv'
        #    ratio = '100'
        dataset_path = os.path.normpath(FLAGS.data_path + os.sep + os.pardir +
                                        os.sep + os.pardir + os.sep + tags[0] +
                                        '_' + tags[1] + '_' + 'oKG')
        mode = 'sv'
        ratio = '100'
        i2genre_map = read_i2genre_map(dataset_path, mode, ratio)
        diversity_metrics = evaluator2.evaluate(predictions_data['feedback'],
                                                eval_data, i2genre_map)
        print("From kg-summ-rec diversity evaluator: {}.".format(
            str(diversity_metrics)))
        logger.info("From kg-summ-rec diversity evaluator: {}.".format(
            str(diversity_metrics)))

    model.enable_grad()
    return item_rec_metrics
class modelEvaluation():
    def __init__(self, recommen):
        self.recommen = recommen

    ItemRecommendationEvaluation().evaluate_with_files(predictions_file,
                                                       test_file)