def divide_dataset(self): tp = ReadFile(self.dataset, space_type=self.space_type) tp.triple_information() random.shuffle(tp.triple_dataset) # Get the number of interactions that each partition should have. partition_size = int(float(tp.number_interactions) / float(self.n_folds)) list_folds = list() last = -1 for p in xrange(self.n_folds): initial = 1 + last final = (p + 1) * partition_size list_folds.append(tp.triple_dataset[initial:final]) last = final for fold in xrange(self.n_folds): train_set = list() for fold_train in xrange(self.n_folds): if fold_train != fold: train_set += list_folds[fold_train] train_set.sort() list_folds[fold].sort() self.dict_folds[fold] = {'train': train_set, 'test': list_folds[fold]}
def read_ranking_files(self): for ranking_file in self.list_rank_files: ranking = ReadFile(ranking_file, space_type=self.space_type) rank_interaction, list_interaction = ranking.read_rankings() self.rankings.append(rank_interaction) self.normalization.append([min(list_interaction), max(list_interaction)]) self.averages.append(ranking.average_scores) self.num_user_int.append(ranking.num_user_interactions)
def test_env(self, ranking, test_file): ranking_dict = dict() test = ReadFile(test_file, space_type=self.space_type) test.main_information_item_recommendation() for sample in ranking: ranking_dict.setdefault(sample[0], list()).append(sample[1]) return self.default_evaluation(ranking_dict, test)
def treat_interactions(self): for num, interaction_file in enumerate(self.list_train_files): interaction = ReadFile(interaction_file, space_type=self.space_type) interaction.triple_information() self.individual_datasets.append(interaction.triple_dataset) self.final_dataset += interaction.triple_dataset self.dict_item, self.dict_not_item, self.list_users, self.list_items, \ self.dict_index = return_list_info(self.final_dataset) self.list_users = list(self.list_users) self.list_items = list(self.list_items)
def treat_interactions(self): for num, interaction_file in enumerate(self.list_train_files): interaction = ReadFile(interaction_file, space_type=self.space_type) interaction.triple_information() self.individual_datasets.append(interaction.triple_dataset) self.final_dataset += interaction.triple_dataset if num + 1 == len(self.list_train_files): for triple in interaction.triple_dataset: self.dict_item_tag[triple[0]] = self.dict_item_tag.get(triple[0], 0) + 1 self.dict_item_tag[triple[1]] = self.dict_item_tag.get(triple[1], 0) + 1 self.dict_item, self.dict_not_item, self.list_users, self.list_items, \ self.dict_index = return_list_info(self.final_dataset) self.list_users = list(self.list_users) self.list_items = list(self.list_items)
def all_but_one_evaluation(self, file_result, file_test, n_ranks=list([1, 3, 5, 10])): check_error_file(file_result) check_error_file(file_test) predict = ReadFile(file_result, space_type=self.space_type) predict.main_information_item_recommendation() test = ReadFile(file_test, space_type=self.space_type) test.main_information_item_recommendation() num_user = len(test.list_users) final_values = list() for user in test.list_users: test.dict_users[user] = [test.dict_users[user][0]] for i, n in enumerate(n_ranks): if n < 1: print('Error: N must >= 1.') sys.exit() partial_precision = list() partial_recall = list() avg_prec_total = list() for user in test.list_users: num_user_interactions = len(test.dict_users[user]) hit_cont = 0 avg_prec_sum = 0 try: # Generate user intersection list between the recommended items and test. intersection = list(set(predict.dict_users[user][:n]).intersection( set(test.dict_users[user]))) if len(intersection) > 0: partial_precision.append((float(len(intersection)) / float(n))) partial_recall.append((float(len(intersection)) / float(num_user_interactions))) for item in intersection: hit_cont += 1 avg_prec_sum += (float(hit_cont) / float(test.dict_users[user].index(item) + 1)) avg_prec_total.append(float(avg_prec_sum) / float(num_user_interactions)) except KeyError: pass if not self.only_map: final_precision = sum(partial_precision) / float(num_user) final_values.append(final_precision) final_recall = sum(partial_recall) / float(num_user) final_values.append(final_recall) final_map = sum(avg_prec_total) / float(num_user) final_values.append(final_map) return final_values
def simple_evaluation(self, file_result, file_test): # Verify that the files are valid check_error_file(file_result) check_error_file(file_test) predict = ReadFile(file_result, space_type=self.space_type) predict.main_information() test = ReadFile(file_test, space_type=self.space_type) test.main_information() rmse = 0 mae = 0 count_comp = 0 for user in test.list_users: for item in test.dict_users[user]: try: rui_predict = float(predict.dict_users[user][item]) rui_test = float(test.dict_users[user][item]) rmse += math.pow((rui_predict - rui_test), 2) mae += math.fabs(rui_predict - rui_test) count_comp += 1 except KeyError: pass if count_comp != 0: rmse = math.sqrt(float(rmse) / float(count_comp)) mae = math.sqrt(float(mae) / float(count_comp)) return rmse, mae
def divide_dataset(self): tp = ReadFile(self.dataset, space_type=self.space_type) tp.split_dataset() for fold in xrange(self.n_folds): dict_feedback = list() tp.triple_dataset = list(set(tp.triple_dataset)) random.shuffle(tp.triple_dataset) sp = int((1-self.test_ratio) * len(tp.triple_dataset)) train = tp.triple_dataset[:sp] test = tp.triple_dataset[sp:] train.sort() test.sort(key=lambda x: x[0]) train_set = list() test_set = list() for i, feedback in enumerate(self.dataset): dict_individual = dict() for triple in train: try: dict_individual.setdefault(triple[0], {}).update( {triple[1]: tp.individual_interaction[i][triple[0]][triple[1]]}) train_set.append([triple[0], triple[1], tp.individual_interaction[i][triple[0]][triple[1]]]) except KeyError: pass for triple_test in test: try: test_set.append([triple_test[0], triple_test[1], tp.individual_interaction[i][triple_test[0]][triple_test[1]]]) except KeyError: pass dict_feedback.append(dict_individual) self.dict_feedback_folds[fold] = dict_feedback self.dict_folds[fold] = {'train': train_set, 'test': test_set}
def simple_evaluation(self, file_result, file_test): # Verify that the files are valid check_error_file(file_result) check_error_file(file_test) predict = ReadFile(file_result, space_type=self.space_type) predict.main_information_item_recommendation() test = ReadFile(file_test, space_type=self.space_type) test.main_information_item_recommendation() return self.default_evaluation(predict.dict_users, test)