def compute(self, config): # 'train_file': self.train_paths[i], # 'test_file': self.test_paths[i], # 'output_file': self.pred_paths[i], train_kwargs = { 'sep': self.sep_write, 'model': config['type'], 'config': config } if self.predictor == 'rating': results = {'MAE': [], 'RMSE': []} for i in range(self.n_splits): eval_args = {self.pred_paths[i], self.test_paths[i]} RatingModel(train_file=self.train_paths[i], test_file=self.test_paths[i], output_file=self.pred_paths[i], **train_kwargs) this_result = RatingPredictionEvaluation( verbose=False).evaluate_with_files(*eval_args) results['MAE'].append(this_result['MAE']) results['RMSE'].append(this_result['RMSE']) print("Split {} Loss: {}".format(i, results[self.eval_metric][i])) if i > self.early_stop_split: if mean(results[self.eval_metric]) > self.best_loss: break else: train_kwargs['rank_length'] = self.rank_length results = {self.eval_metric: []} for i in range(self.n_splits): eval_args = { 'prediction_file': self.pred_paths[i], 'test_file': self.test_paths[i] } # print(eval_args) # print(train_kwargs) ItemModel(train_file=self.train_paths[i], test_file=self.test_paths[i], output_file=self.pred_paths[i], **train_kwargs) this_result = ItemRecommendationEvaluation( verbose=False).evaluate_with_files(**eval_args) results[self.eval_metric].append(this_result[self.eval_metric]) print("Split {} Loss: {}".format(i, results[self.eval_metric][i])) if i > self.early_stop_split and self.iteration > 0: if max(results[self.eval_metric]) < self.best_loss: break return results
def evaluate(self, predictions, test_set): """ Method to calculate all the metrics for item recommendation scenario using dictionaries of ranking and test set. Use read() in ReadFile to transform your prediction and test files in a dict :param predictions: Dict of predictions :type predictions: dict :param test_set: Dictionary with test set information. :type test_set: dict :return: Dictionary with all evaluation metrics and results :rtype: dict """ eval_results = {} predictions_list = [] test_list = [] if not self.as_rank: # Create All but one set, selecting only one sample from the test set for each user if self.all_but_one_eval: for user in test_set['users']: # select a random item item = random.choice(test_set['feedback'][user]) test_set['feedback'][user] = { item: test_set['feedback'][user][item] } for user in predictions: for item in predictions[user]: rui_predict = predictions[user][item] rui_test = test_set["feedback"].get(user, {}).get(item, np.nan) if not np.isnan(rui_test): predictions_list.append(rui_predict) test_list.append(float(rui_test)) eval_results.update({ 'MAE': round(mean_absolute_error(test_list, predictions_list), 6), 'RMSE': round(np.sqrt(mean_squared_error(test_list, predictions_list)), 6) }) if self.verbose: self.print_results(eval_results) else: new_predict_set = [] new_test_set = {} for user in predictions: partial_predictions = [] for item in predictions[user]: if predictions[user][item] > 3: partial_predictions.append( [user, item, predictions[user][item]]) if test_set["feedback"].get(user, {}).get(item, 0) > 3: new_test_set.setdefault(user, []).append(item) partial_predictions = sorted(partial_predictions, key=lambda x: -x[2]) new_predict_set += partial_predictions new_test_set['items_seen_by_user'] = new_test_set new_test_set['users'] = test_set['users'] ItemRecommendationEvaluation( n_ranks=self.n_rank, all_but_one_eval=self.all_but_one_eval).evaluate_recommender( new_predict_set, new_test_set) return eval_results
def evaluate(self, measures): res = ItemRecommendationEvaluation().evaluation_ranking(self.ranking, self.test_file) evaluation = 'Eval:: ' for measure in measures: evaluation += measure + ': ' + str(res[measure]) + ' ' print(evaluation)
def case_rec_evaluate(FLAGS, model, eval_iter, eval_dict, all_dicts, logger, i, eval_descending=True, is_report=False): # Evaluate total_batches = len(eval_iter) # processing bar pbar = tqdm(total=total_batches) pbar.set_description("Run Eval") model.eval() model.disable_grad() results = [] for u_ids in eval_iter: u_var = to_gpu(V(torch.LongTensor(u_ids))) # batch * item scores = model.evaluate(u_var) preds = zip(u_ids, scores.data.cpu().numpy()) results.extend( evalRecProcess(list(preds), eval_dict, all_dicts=all_dicts, descending=eval_descending, num_processes=FLAGS.num_processes, topn=FLAGS.topn, queue_limit=FLAGS.max_queue)) pbar.update(1) pbar.close() predictions = [result[5] for result in results ] # [(pred[0], top_ids, gold), ...], gold is test print("Saving predictions. Size: {}.".format(str(len(predictions)))) predictions_output_filepath = os.path.join( FLAGS.log_path, FLAGS.experiment_name + '_pred.dat') print_list = [] for triple in predictions: u_id = triple[0] top_ids = triple[1] #gold = triple[2] for i_id in top_ids: score = 1.0 / (top_ids.index(i_id) + 1) print_list.append((u_id, i_id, score)) WriteFile(predictions_output_filepath, data=print_list, sep='\t').write() # Using CaseRecommender ReadFile class to read test_set from file dataset_path = os.path.join(FLAGS.data_path, FLAGS.dataset) eval_files = FLAGS.rec_test_files.split(':') test_path = os.path.join(dataset_path, eval_files[i]) eval_data = ReadFile(input_file=test_path).read() predictions_data = ReadFile(input_file=predictions_output_filepath).read() print("Reading predictions. Size: {}.".format( str(len(predictions_data['feedback'])))) # Creating CaseRecommender evaluator with item-recommendation parameters evaluator = ItemRecommendationEvaluation(n_ranks=[10]) item_rec_metrics = evaluator.evaluate(predictions_data['feedback'], eval_data) print("From CaseRecommender evaluator: {}.".format(str(item_rec_metrics))) logger.info("From CaseRecommender evaluator: {}.".format( str(item_rec_metrics))) # Creating kg-summ-rec evaluator with diversity parameters dataset_name = os.path.basename( os.path.dirname(os.path.dirname(FLAGS.log_path))) tags = dataset_name.split('_') if tags[0] == 'ml-sun': evaluator2 = DiversityEvaluation(n_ranks=[10]) dataset_path = os.path.normpath(FLAGS.data_path + os.sep + os.pardir) #tags = dataset_name.split('-') #if len(tags) > 2: # mode = dataset_name.split('-')[2] # ratio = dataset_name.split('-')[4] #else: # mode = 'sv' # ratio = '100' dataset_path = os.path.normpath(FLAGS.data_path + os.sep + os.pardir + os.sep + os.pardir + os.sep + tags[0] + '_' + tags[1] + '_' + 'oKG') mode = 'sv' ratio = '100' i2genre_map = read_i2genre_map(dataset_path, mode, ratio) diversity_metrics = evaluator2.evaluate(predictions_data['feedback'], eval_data, i2genre_map) print("From kg-summ-rec diversity evaluator: {}.".format( str(diversity_metrics))) logger.info("From kg-summ-rec diversity evaluator: {}.".format( str(diversity_metrics))) model.enable_grad() return item_rec_metrics
class modelEvaluation(): def __init__(self, recommen): self.recommen = recommen ItemRecommendationEvaluation().evaluate_with_files(predictions_file, test_file)