def simple_evaluation(self, file_result, file_test):
        predict = ReadFile(file_result,
                           space_type=self.space_type).return_information()
        test = ReadFile(file_test,
                        space_type=self.space_type).return_information()

        rmse = 0
        mae = 0
        count_comp = 0
        for user in test['users']:
            for item in test['feedback'][user]:
                try:
                    rui_predict = float(predict['feedback'][user][item])
                    rui_test = float(test['feedback'][user][item])
                    rmse += math.pow((rui_predict - rui_test), 2)
                    mae += math.fabs(rui_predict - rui_test)
                    count_comp += 1
                except KeyError:
                    pass

        if count_comp != 0:
            rmse = math.sqrt(float(rmse) / float(count_comp))
            mae = math.sqrt(float(mae) / float(count_comp))

        return rmse, mae
Exemple #2
0
 def __init__(self,
              train_file,
              test_file=None,
              ranking_file=None,
              similarity_metric="correlation",
              neighbors=30,
              rank_number=10,
              implicit=False,
              space_type='\t'):
     self.train_set = ReadFile(
         train_file, space_type=space_type).return_information(implicit)
     self.test_file = test_file
     self.users = self.train_set['users']
     self.items = self.train_set['items']
     if self.test_file is not None:
         self.test_set = ReadFile(test_file).return_information()
         self.users = sorted(
             list(self.train_set['users']) + list(self.test_set['users']))
         self.items = sorted(
             list(self.train_set['items']) + list(self.test_set['items']))
     self.k = neighbors
     self.similarity_metric = similarity_metric
     self.ranking_file = ranking_file
     self.rank_number = rank_number
     self.space_type = space_type
     self.matrix = self.train_set['matrix']
     self.ranking = list()
     self.si_matrix = None
 def read_ranking_files(self):
     for ranking_file in self.list_rank_files:
         ranking = ReadFile(ranking_file, space_type=self.space_type)
         rank_interaction, list_interaction = ranking.read_rankings()
         self.rankings.append(rank_interaction)
         self.normalization.append(
             [min(list_interaction),
              max(list_interaction)])
Exemple #4
0
    def treat_interactions(self):
        for num, interaction_file in enumerate(self.list_train_files):
            interaction = ReadFile(interaction_file, space_type=self.space_type)
            interaction.triple_information()
            self.individual_datasets.append(interaction.triple_dataset)
            self.final_dataset += interaction.triple_dataset

        self.dict_item, self.dict_not_item, self.list_users, self.list_items, \
            self.dict_index = return_list_info(self.final_dataset)

        self.list_users = list(self.list_users)
        self.list_items = list(self.list_items)
 def __init__(self, train_file, test_file=None, ranking_file=None, rank_number=10, space_type='\t'):
     self.train_set = ReadFile(train_file, space_type=space_type).return_information()
     self.test_file = test_file
     self.users = self.train_set['users']
     self.items = self.train_set['items']
     if self.test_file is not None:
         self.test_set = ReadFile(test_file).return_information()
         self.users = sorted(list(self.train_set['users']) + list(self.test_set['users']))
         self.items = sorted(list(self.train_set['items']) + list(self.test_set['items']))
     self.ranking_file = ranking_file
     self.rank_number = rank_number
     self.space_type = space_type
     self.ranking = list()
Exemple #6
0
    def divide_dataset(self):
        tp = ReadFile(self.dataset, space_type=self.space_type)
        tp.split_dataset()

        for fold in range(self.n_folds):
            dict_feedback = list()
            tp.triple_dataset = list(set(tp.triple_dataset))
            random.shuffle(tp.triple_dataset)
            sp = int((1 - self.test_ratio) * len(tp.triple_dataset))
            train = tp.triple_dataset[:sp]
            test = tp.triple_dataset[sp:]
            train.sort()
            test.sort(key=lambda x: x[0])
            train_set = list()
            test_set = list()

            for i, feedback in enumerate(self.dataset):
                dict_individual = dict()

                for triple in train:
                    try:
                        dict_individual.setdefault(triple[0], {}).update({
                            triple[1]:
                            tp.individual_interaction[i][triple[0]][triple[1]]
                        })
                        train_set.append([
                            triple[0], triple[1],
                            tp.individual_interaction[i][triple[0]][triple[1]]
                        ])
                    except KeyError:
                        pass

                for triple_test in test:
                    try:
                        test_set.append([
                            triple_test[0], triple_test[1],
                            tp.individual_interaction[i][triple_test[0]][
                                triple_test[1]]
                        ])
                    except KeyError:
                        pass

                dict_feedback.append(dict_individual)

            self.dict_feedback_folds[fold] = dict_feedback
            self.dict_folds[fold] = {'train': train_set, 'test': test_set}

        if self.dir_folds is not None:
            WriteFile(self.dir_folds, self.dict_folds,
                      self.space_type).split_dataset(self.dict_feedback_folds,
                                                     self.dataset)
Exemple #7
0
    def divide_dataset(self):
        self.tp = ReadFile(self.dataset, space_type=self.space_type).return_information()
        random.shuffle(self.tp['list_feedback'])

        # Get the number of interactions that each partition should have.
        partition_size = int(float(self.tp['ni']) / float(self.n_folds))

        list_folds = list()
        last = -1

        for p in range(self.n_folds):
            initial = 1 + last
            final = (p + 1) * partition_size
            list_folds.append(self.tp['list_feedback'][initial:final])
            last = final

        for fold in range(self.n_folds):
            train_set = list()
            for fold_train in range(self.n_folds):
                if fold_train != fold:
                    train_set += list_folds[fold_train]
                train_set.sort()

            list_folds[fold].sort()
            self.dict_folds[fold] = {'train': train_set, 'test': list_folds[fold]}

        if self.dir_folds is not None:
            WriteFile(self.dir_folds, self.dict_folds, self.space_type).cross_fold_validation()
Exemple #8
0
    def execute(self,
                measures=('Prec@5', 'Prec@10', 'NDCG@5', 'NDCG@10', 'MAP@5',
                          'MAP@10')):
        print(
            "[Case Recommender: Item Recommendation > Item Attribute KNN Algorithm]\n"
        )
        print("training data:: ", len(self.train_set['users']), " users and ",
              len(self.train_set['items']), " items and ",
              self.train_set['ni'], " interactions | sparsity ",
              self.train_set['sparsity'])

        if self.test_file is not None:
            test_set = ReadFile(
                self.test_file,
                space_type=self.space_type).return_information()
            print("test data:: ", len(self.test_set['users']), " users and ",
                  len(self.test_set['items']), " items and ",
                  (self.test_set['ni']), " interactions | sparsity ",
                  self.test_set['sparsity'])
            del test_set

        if self.similarity_matrix_file is not None:
            print("training time:: ", timed(self.read_matrix), " sec")
        else:
            print("training time:: ", timed(self.compute_similarity), " sec")
            print("prediction_time:: ", timed(self.predict), " sec\n")
        if self.test_file is not None:
            self.evaluate(measures)
    def __init__(self,
                 train_file,
                 test_file=None,
                 metadata_file=None,
                 similarity_matrix_file=None,
                 ranking_file=None,
                 neighbors=30,
                 rank_number=10,
                 similarity_metric="correlation",
                 space_type='\t'):
        UserKNN.__init__(self,
                         train_file,
                         test_file=test_file,
                         ranking_file=ranking_file,
                         neighbors=neighbors,
                         rank_number=rank_number,
                         similarity_metric=similarity_metric,
                         space_type=space_type)

        if metadata_file is None and similarity_matrix_file is None:
            print(
                "This algorithm needs a similarity matrix or a metadata file!")
            sys.exit(0)

        if metadata_file is not None:
            self.metadata = ReadFile(
                metadata_file, space_type=space_type).read_metadata(self.users)
            self.matrix = self.metadata['matrix']
        self.similarity_matrix_file = similarity_matrix_file
Exemple #10
0
    def __init__(self,
                 train_file,
                 test_file,
                 metadata_file=None,
                 similarity_matrix_file=None,
                 prediction_file=None,
                 neighbors=30,
                 similarity_metric="correlation",
                 space_type='\t'):
        ItemKNN.__init__(self,
                         train_file,
                         test_file,
                         prediction_file=prediction_file,
                         neighbors=neighbors,
                         similarity_metric=similarity_metric,
                         space_type=space_type)

        if metadata_file is None and similarity_matrix_file is None:
            print(
                "This algorithm needs a similarity matrix or a metadata file!")
            sys.exit(0)

        if metadata_file is not None:
            self.metadata = ReadFile(
                metadata_file, space_type=space_type).read_metadata(self.items)
            self.matrix = self.metadata['matrix'].T
        self.similarity_matrix_file = similarity_matrix_file
Exemple #11
0
 def __init__(self,
              train_file,
              test_file,
              prediction_file=None,
              similarity_metric="correlation",
              neighbors=30,
              space_type='\t'):
     self.train_set = ReadFile(train_file,
                               space_type=space_type).return_information()
     self.test_set = ReadFile(test_file,
                              space_type=space_type).return_information()
     BaseKNNRecommenders.__init__(self, self.train_set, self.test_set)
     self.k = neighbors
     self.similarity_metric = similarity_metric
     self.prediction_file = prediction_file
     self.predictions = list()
     self.su_matrix = None
Exemple #12
0
    def simple_evaluation(self, file_result, file_test):
        """
        A simple evaluation method to return the quality of a ranking

        :param file_result: (file) ranking file to evaluate
        :param file_test: (file) test file
        :return: Values of evaluation
        """

        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type).return_information()
        test = ReadFile(file_test, space_type=self.space_type).return_information()

        return self.default_evaluation(predict, test)
Exemple #13
0
    def evaluation_ranking(self, ranking, test_file):
        ranking_dict = {'du_order': {}}
        test = ReadFile(test_file, space_type=self.space_type).return_information()

        for sample in ranking:
            ranking_dict['du_order'].setdefault(sample[0], list()).append(sample[1])

        return self.default_evaluation(ranking_dict, test)
Exemple #14
0
    def __init__(self,
                 train_file,
                 test_file,
                 prediction_file=None,
                 factors=10,
                 init_mean=0.1,
                 init_stdev=0.1,
                 space_type='\t'):
        self.train_set = ReadFile(train_file,
                                  space_type=space_type).return_information()
        self.test_set = ReadFile(test_file,
                                 space_type=space_type).return_information()
        self.prediction_file = prediction_file
        self.factors = factors
        self.init_mean = init_mean
        self.init_stdev = init_stdev
        self.users = sorted(
            set(list(self.train_set["users"]) + list(self.test_set["users"])))
        self.items = sorted(
            set(list(self.train_set["items"]) + list(self.test_set["items"])))
        self.number_users = len(self.users)
        self.number_items = len(self.items)
        self.metadata = None
        self.number_metadata = None
        self.map_items = dict()
        self.map_items_index = dict()
        self.map_users = dict()
        self.map_users_index = dict()
        for i, item in enumerate(self.items):
            self.map_items.update({item: i})
            self.map_items_index.update({i: item})
        for u, user in enumerate(self.users):
            self.map_users.update({user: u})
            self.map_users_index.update({u: user})

        # internal vars
        self.x = None
        self.p = None
        self.q = None
        self.w = None
        self.b = None
        self.c = None
        self.last_rmse = 0
        self.predictions = list()
    def treat_interactions(self):
        for num, interaction_file in enumerate(self.list_train_files):
            interaction = ReadFile(interaction_file,
                                   space_type=self.space_type)
            interaction.triple_information()
            self.individual_datasets.append(interaction.triple_dataset)
            self.final_dataset += interaction.triple_dataset

            if num + 1 == len(self.list_train_files):
                for triple in interaction.triple_dataset:
                    self.dict_item_tag[triple[0]] = self.dict_item_tag.get(
                        triple[0], 0) + 1
                    self.dict_item_tag[triple[1]] = self.dict_item_tag.get(
                        triple[1], 0) + 1

        self.dict_item, self.dict_not_item, self.list_users, self.list_items, \
            self.dict_index = return_list_info(self.final_dataset)

        self.list_users = list(self.list_users)
        self.list_items = list(self.list_items)
Exemple #16
0
    def __init__(self, list_ranks, test_file, write_file=""):
        self.list_ranks = list_ranks
        self.test_file = test_file
        self.write_file = write_file
        self.dict_ranks = ReadFile(self.list_ranks).ensemble()
        self.list_users = set()
        self.final_ranking = dict()

        # methods
        self.ensemble()
        self.write_results()
Exemple #17
0
    def all_but_one_evaluation(self, file_result, file_test):
        """
        All-but-one Protocol: Considers only one pair (u, i) from the test set to evaluate the ranking

        :param file_result: (file) ranking file to evaluate
        :param file_test: (file) test file
        :return: Values of evaluation
        """

        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type).return_information()
        test = ReadFile(file_test, space_type=self.space_type).return_information()

        for user in test['users']:
            test['du'][user] = [list(test['du'][user])[0]]

        return self.default_evaluation(predict, test)
    def simple_evaluation_item_cold_start(self,
                                          file_result,
                                          file_test,
                                          file_train,
                                          min_feedback=10):
        predict = ReadFile(file_result,
                           space_type=self.space_type).return_information()
        test = ReadFile(file_test,
                        space_type=self.space_type).return_information()
        train = ReadFile(file_train,
                         space_type=self.space_type).return_information()
        new_items = set()
        for item in train['items']:
            if len(train['di'][item]) <= min_feedback:
                new_items.add(item)

        print(len(new_items))

        rmse = 0
        mae = 0
        count_comp = 0
        for user in test['users']:
            for item in test['feedback'][user]:
                if item in new_items:
                    try:
                        rui_predict = float(predict['feedback'][user][item])
                        rui_test = float(test['feedback'][user][item])
                        rmse += math.pow((rui_predict - rui_test), 2)
                        mae += math.fabs(rui_predict - rui_test)
                        count_comp += 1
                    except KeyError:
                        pass

        if count_comp != 0:
            rmse = math.sqrt(float(rmse) / float(count_comp))
            mae = math.sqrt(float(mae) / float(count_comp))

        return rmse, mae
    def read_training_data(self):
        self.train_info, self.list_users, self.list_items, self.num_events, _ = ReadFile(
            self.list_train_files).ensemble_test()
        self.number_users = len(self.list_users)
        self.number_items = len(self.list_items)
        # remove

        self.num_events = 5000

        for u, user in enumerate(self.list_users):
            self.map_user[user] = u
        for i, item in enumerate(self.list_items):
            self.map_item[item] = i

        for r in range(len(self.list_train_files)):
            p, q, bias, beta = self._create_factors()
            self.rf[r] = {"p": p, "q": q, "bias": bias, "beta": beta}
Exemple #20
0
    def __init__(self, training_file, k_row=5, l_col=5, density_low=0.008):
        """
        :param training_file: (string:: file)
        :param k_row: (int) number of clusters generated by k-means in rows
        :param l_col: (int) number of clusters generated by k-means in rows
        :param density_low: (float) threshold to change the density matrix values
        """

        self.training_set = ReadFile(training_file).return_information(
            implicit=True)
        self.k_row = k_row
        self.l_col = l_col
        self.density_low = density_low

        self.list_row = [list() for _ in range(self.k_row)]
        self.list_col = [list() for _ in range(self.l_col)]

        self.count_total, self.count_ones = list(), list()
        self.density = None
        self.delta_entropy = list()
Exemple #21
0
    def __init__(self,
                 train_file,
                 test_file,
                 metadata_file,
                 prediction_file=None,
                 steps=30,
                 learn_rate=0.01,
                 delta=0.015,
                 factors=10,
                 init_mean=0.1,
                 init_stdev=0.1,
                 alpha=0.001,
                 batch=False,
                 n2=10,
                 learn_rate2=0.01,
                 delta2=0.015,
                 space_type='\t'):
        BaseNSVD1.__init__(self, train_file, test_file, prediction_file,
                           factors, init_mean, init_stdev, space_type)
        self.metadata = ReadFile(
            metadata_file, space_type=space_type).read_metadata(self.items)
        self.number_metadata = len(self.metadata["metadata"])
        self.batch = batch
        self.steps = steps
        self.learn_rate = learn_rate
        self.delta = delta
        self.alpha = alpha
        self.n2 = n2
        self.learn_rate2 = learn_rate2
        self.delta2 = delta2

        # Internal
        self.x = self.metadata['matrix']
        self.non_zero_x = list()
        self.d = list()
        for i in range(self.number_items):
            self.non_zero_x.append(list(np.where(self.x[i] != 0)[0]))
            with np.errstate(divide='ignore'):
                self.d.append(1 / np.dot(self.x[i].T, self.x[i]))
Exemple #22
0
    def execute(self,
                measures=('Prec@5', 'Prec@10', 'NDCG@5', 'NDCG@10', 'MAP@5',
                          'MAP@10')):
        # methods
        print("[Case Recommender: Item Recommendation > BPR MF Algorithm]\n")
        print("training data:: ", len(self.train_set['users']), " users and ",
              len(self.train_set['items']), " items and ",
              self.train_set['ni'], " interactions | sparsity ",
              self.train_set['sparsity'])

        if self.test_file is not None:
            test_set = ReadFile(self.test_file).return_information()
            print("test data:: ", len(self.test_set['users']), " users and ",
                  len(self.test_set['items']), " items and ",
                  (self.test_set['ni']), " interactions | sparsity ",
                  self.test_set['sparsity'])
            del test_set
        self._create_factors()
        print("training time:: ", timed(self.train_model), " sec")
        print("prediction_time:: ", timed(self.predict), " sec\n")
        if self.test_file is not None:
            self.evaluate(measures)
Exemple #23
0
 def read_matrix(self):
     self.si_matrix = ReadFile(self.similarity_matrix_file).read_matrix()
Exemple #24
0
    def __init__(self,
                 train_file,
                 test_file=None,
                 ranking_file=None,
                 factors=10,
                 learn_rate=0.05,
                 num_interactions=30,
                 num_events=None,
                 predict_items_number=10,
                 init_mean=0.1,
                 init_stdev=0.1,
                 reg_u=0.0025,
                 reg_i=0.0025,
                 reg_j=0.00025,
                 reg_bias=0,
                 use_loss=True,
                 rank_number=10,
                 space_type='\t'):
        # external vars
        self.train_set = ReadFile(train_file,
                                  space_type=space_type).return_information()
        self.test_file = test_file
        self.ranking_file = ranking_file
        self.factors = factors
        self.learn_rate = learn_rate
        self.predict_items_number = predict_items_number
        self.init_mean = init_mean
        self.init_stdev = init_stdev
        self.num_interactions = num_interactions
        self.reg_bias = reg_bias
        self.reg_u = reg_u
        self.reg_i = reg_i
        self.reg_j = reg_j
        self.use_loss = use_loss
        self.rank_number = rank_number
        self.train_set["users"] = list(self.train_set['users'])
        self.train_set["items"] = list(self.train_set['items'])
        self.users = list(self.train_set['users'])
        self.items = list(self.train_set['items'])
        if self.test_file is not None:
            self.test_set = ReadFile(test_file).return_information()
            self.test_set['users'] = list(self.test_set['users'])
            self.test_set['items'] = list(self.test_set['items'])
            self.users = sorted(self.train_set['users'] +
                                self.test_set['users'])
            self.items = sorted(self.train_set['items'] +
                                self.test_set['items'])
        if num_events is None:
            self.num_events = self.train_set['ni']
        else:
            self.num_events = num_events

        # internal vars
        self.loss = None
        self.loss_sample = list()
        self.ranking = list()
        self.map_items = dict()
        self.map_items_index = dict()
        self.map_users = dict()
        self.map_users_index = dict()

        for i, item in enumerate(self.items):
            self.map_items.update({item: i})
            self.map_items_index.update({i: item})
        for u, user in enumerate(self.users):
            self.map_users.update({user: u})
            self.map_users_index.update({u: user})
    def __init__(self,
                 train_file,
                 test_file,
                 prediction_file=None,
                 steps=30,
                 learn_rate=0.01,
                 delta=0.015,
                 factors=10,
                 init_mean=0.1,
                 init_stdev=0.1,
                 baseline=False,
                 bias_learn_rate=0.005,
                 delta_bias=0.002,
                 random_seed=0):
        self.train_set = ReadFile(train_file).return_information()
        self.test_set = ReadFile(test_file).return_information()
        self.prediction_file = prediction_file
        self.steps = steps
        self.learn_rate = learn_rate
        self.delta = delta
        self.factors = factors
        self.init_mean = init_mean
        self.init_stdev = init_stdev
        self.baseline = baseline
        self.predictions = list()
        self.map_items = dict()
        self.map_items_index = dict()
        self.map_users = dict()
        self.map_users_index = dict()
        self.bias_learn_rate = bias_learn_rate
        self.delta_bias = delta_bias

        if random_seed != 0:
            np.random.seed(1)

        self.p = None
        self.q = None
        self.bu = None
        self.bi = None

        self.users = sorted(
            set(list(self.train_set['users']) + list(self.test_set['users'])))
        self.items = sorted(
            set(list(self.train_set['items']) + list(self.test_set['items'])))

        for i, item in enumerate(self.items):
            self.map_items.update({item: i})
            self.map_items_index.update({i: item})
        for u, user in enumerate(self.users):
            self.map_users.update({user: u})
            self.map_users_index.update({u: user})

        list_feedback = list()
        self.dict_index = dict()
        for user, item, feedback in self.train_set['list_feedback']:
            list_feedback.append(
                (self.map_users[user], self.map_items[item], feedback))
            self.dict_index.setdefault(self.map_users[user],
                                       []).append(self.map_items[item])
        self.train_set['list_feedback'] = list_feedback
        self._create_factors()
 def read_rankings(self):
     self.rankings_info, _, _, _, self.ir = ReadFile(
         self.list_rankings_files).ensemble_test()