def divide_dataset(self):
        tp = ReadFile(self.dataset, space_type=self.space_type)
        tp.triple_information()
        random.shuffle(tp.triple_dataset)

        # Get the number of interactions that each partition should have.
        partition_size = int(float(tp.number_interactions) / float(self.n_folds))

        list_folds = list()
        last = -1

        for p in xrange(self.n_folds):
            initial = 1 + last
            final = (p + 1) * partition_size
            list_folds.append(tp.triple_dataset[initial:final])
            last = final

        for fold in xrange(self.n_folds):
            train_set = list()
            for fold_train in xrange(self.n_folds):
                if fold_train != fold:
                    train_set += list_folds[fold_train]
                train_set.sort()

            list_folds[fold].sort()
            self.dict_folds[fold] = {'train': train_set, 'test': list_folds[fold]}
 def read_ranking_files(self):
     for ranking_file in self.list_rank_files:
         ranking = ReadFile(ranking_file, space_type=self.space_type)
         rank_interaction, list_interaction = ranking.read_rankings()
         self.rankings.append(rank_interaction)
         self.normalization.append([min(list_interaction), max(list_interaction)])
         self.averages.append(ranking.average_scores)
         self.num_user_int.append(ranking.num_user_interactions)
    def test_env(self, ranking, test_file):
        ranking_dict = dict()
        test = ReadFile(test_file, space_type=self.space_type)
        test.main_information_item_recommendation()

        for sample in ranking:
            ranking_dict.setdefault(sample[0], list()).append(sample[1])

        return self.default_evaluation(ranking_dict, test)
    def treat_interactions(self):
        for num, interaction_file in enumerate(self.list_train_files):
            interaction = ReadFile(interaction_file, space_type=self.space_type)
            interaction.triple_information()
            self.individual_datasets.append(interaction.triple_dataset)
            self.final_dataset += interaction.triple_dataset

        self.dict_item, self.dict_not_item, self.list_users, self.list_items, \
            self.dict_index = return_list_info(self.final_dataset)

        self.list_users = list(self.list_users)
        self.list_items = list(self.list_items)
    def treat_interactions(self):
        for num, interaction_file in enumerate(self.list_train_files):
            interaction = ReadFile(interaction_file, space_type=self.space_type)
            interaction.triple_information()
            self.individual_datasets.append(interaction.triple_dataset)
            self.final_dataset += interaction.triple_dataset

            if num + 1 == len(self.list_train_files):
                for triple in interaction.triple_dataset:
                    self.dict_item_tag[triple[0]] = self.dict_item_tag.get(triple[0], 0) + 1
                    self.dict_item_tag[triple[1]] = self.dict_item_tag.get(triple[1], 0) + 1

        self.dict_item, self.dict_not_item, self.list_users, self.list_items, \
            self.dict_index = return_list_info(self.final_dataset)

        self.list_users = list(self.list_users)
        self.list_items = list(self.list_items)
    def all_but_one_evaluation(self, file_result, file_test, n_ranks=list([1, 3, 5, 10])):
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type)
        predict.main_information_item_recommendation()
        test = ReadFile(file_test, space_type=self.space_type)
        test.main_information_item_recommendation()

        num_user = len(test.list_users)
        final_values = list()

        for user in test.list_users:
            test.dict_users[user] = [test.dict_users[user][0]]

        for i, n in enumerate(n_ranks):
            if n < 1:
                print('Error: N must >= 1.')
                sys.exit()

            partial_precision = list()
            partial_recall = list()
            avg_prec_total = list()

            for user in test.list_users:
                num_user_interactions = len(test.dict_users[user])
                hit_cont = 0
                avg_prec_sum = 0

                try:
                    # Generate user intersection list between the recommended items and test.
                    intersection = list(set(predict.dict_users[user][:n]).intersection(
                        set(test.dict_users[user])))

                    if len(intersection) > 0:
                        partial_precision.append((float(len(intersection)) / float(n)))
                        partial_recall.append((float(len(intersection)) / float(num_user_interactions)))

                        for item in intersection:
                            hit_cont += 1
                            avg_prec_sum += (float(hit_cont) / float(test.dict_users[user].index(item) + 1))

                        avg_prec_total.append(float(avg_prec_sum) / float(num_user_interactions))

                except KeyError:
                    pass

            if not self.only_map:
                final_precision = sum(partial_precision) / float(num_user)
                final_values.append(final_precision)
                final_recall = sum(partial_recall) / float(num_user)
                final_values.append(final_recall)
            final_map = sum(avg_prec_total) / float(num_user)
            final_values.append(final_map)

        return final_values
    def simple_evaluation(self, file_result, file_test):
        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type)
        predict.main_information()
        test = ReadFile(file_test, space_type=self.space_type)
        test.main_information()

        rmse = 0
        mae = 0
        count_comp = 0
        for user in test.list_users:
            for item in test.dict_users[user]:
                try:
                    rui_predict = float(predict.dict_users[user][item])
                    rui_test = float(test.dict_users[user][item])
                    rmse += math.pow((rui_predict - rui_test), 2)
                    mae += math.fabs(rui_predict - rui_test)
                    count_comp += 1
                except KeyError:
                    pass

        if count_comp != 0:
            rmse = math.sqrt(float(rmse) / float(count_comp))
            mae = math.sqrt(float(mae) / float(count_comp))

        return rmse, mae
Ejemplo n.º 8
0
    def divide_dataset(self):
        tp = ReadFile(self.dataset, space_type=self.space_type)
        tp.split_dataset()

        for fold in xrange(self.n_folds):
            dict_feedback = list()
            tp.triple_dataset = list(set(tp.triple_dataset))
            random.shuffle(tp.triple_dataset)
            sp = int((1-self.test_ratio) * len(tp.triple_dataset))
            train = tp.triple_dataset[:sp]
            test = tp.triple_dataset[sp:]
            train.sort()
            test.sort(key=lambda x: x[0])
            train_set = list()
            test_set = list()

            for i, feedback in enumerate(self.dataset):
                dict_individual = dict()

                for triple in train:
                    try:
                        dict_individual.setdefault(triple[0], {}).update(
                            {triple[1]: tp.individual_interaction[i][triple[0]][triple[1]]})
                        train_set.append([triple[0], triple[1], tp.individual_interaction[i][triple[0]][triple[1]]])
                    except KeyError:
                        pass

                for triple_test in test:
                    try:
                        test_set.append([triple_test[0], triple_test[1],
                                         tp.individual_interaction[i][triple_test[0]][triple_test[1]]])
                    except KeyError:
                        pass

                dict_feedback.append(dict_individual)

            self.dict_feedback_folds[fold] = dict_feedback
            self.dict_folds[fold] = {'train': train_set, 'test': test_set}
    def simple_evaluation(self, file_result, file_test):
        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type)
        predict.main_information_item_recommendation()
        test = ReadFile(file_test, space_type=self.space_type)
        test.main_information_item_recommendation()

        return self.default_evaluation(predict.dict_users, test)