def __init__(self, input_file, sep='\t', header=None, names=None, as_binary=False, binary_col=2):
        """
        ReadFile is responsible to read and process all input files in the Case Recommender

        We used as default csv files with delimiter '\t'. e.g: user item    score\n

        :param input_file: Input File with at least 2 columns.
        :type input_file: str

        :param sep: Delimiter for input files
        :type sep: str, default '\t'

        :param header: Skip header line (only work with method: read_with_pandas)
        :type header: int, default None

        :param names: Name of columns (only work with method: read_with_pandas)
        :type names: str, default None

        :param as_binary: If True, the explicit feedback will be transform to binary
        :type as_binary: bool, default False

        :param binary_col: Index of columns to read as binary (only work with method: read_with_pandas)
        :type binary_col: int, default 2

        """

        self.input_file = input_file
        self.sep = sep
        self.header = header
        self.names = names
        self.as_binary = as_binary
        self.binary_col = binary_col

        check_error_file(self.input_file)
Beispiel #2
0
 def read_matrix(self):
     matrix = list()
     check_error_file(self.file_read)
     with open(self.file_read) as infile:
         for line in infile:
             if line.strip():
                 inline = line.split(self.space_type)
                 inline = np.array(inline)
                 inline = np.delete(inline, len(inline) - 1)
                 matrix.append(inline.astype(float))
     return np.array(matrix)
Beispiel #3
0
    def triple_information(self):
        check_error_file(self.file_read)
        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    if len(inline) == 1:
                        print("Error: Space type is invalid!")
                        sys.exit()

                    try:
                        user, item, feedback = int(inline[0]), int(
                            inline[1]), inline[2].replace("\n", "")
                        self.triple_dataset.append([user, item, feedback])
                        self.number_interactions += 1
                    except ValueError:
                        pass
Beispiel #4
0
    def simple_evaluation(self, file_result, file_test):
        """
        A simple evaluation method to return the quality of a ranking

        :param file_result: (file) ranking file to evaluate
        :param file_test: (file) test file
        :return: Values of evaluation
        """

        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type).return_information()
        test = ReadFile(file_test, space_type=self.space_type).return_information()

        return self.default_evaluation(predict, test)
Beispiel #5
0
    def all_but_one_evaluation(self, file_result, file_test):
        """
        All-but-one Protocol: Considers only one pair (u, i) from the test set to evaluate the ranking

        :param file_result: (file) ranking file to evaluate
        :param file_test: (file) test file
        :return: Values of evaluation
        """

        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type).return_information()
        test = ReadFile(file_test, space_type=self.space_type).return_information()

        for user in test['users']:
            test['du'][user] = [list(test['du'][user])[0]]

        return self.default_evaluation(predict, test)
Beispiel #6
0
    def split_dataset(self):
        for i, feedback in enumerate(self.file_read):
            self.dict_users = dict()
            check_error_file(feedback)
            with open(feedback) as infile:
                for line in infile:
                    if line.strip():
                        inline = line.split(self.space_type)
                        if len(inline) == 1:
                            print("Error: Space type is invalid!")
                            sys.exit()

                        self.number_interactions += 1
                        try:
                            user, item, feedback = int(inline[0]), int(
                                inline[1]), float(inline[2])
                            self.triple_dataset.append((user, item))
                            self.dict_users.setdefault(user, {}).update(
                                {item: feedback})
                        except ValueError:
                            pass

            self.individual_interaction.append(self.dict_users)
Beispiel #7
0
    def read_rankings(self):
        list_feedback = list()
        check_error_file(self.file_read)
        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    if len(inline) == 1:
                        print("Error: Space type is invalid!")
                        sys.exit()
                    try:
                        user, item, feedback = int(inline[0]), int(
                            inline[1]), float(inline[2])
                        self.dict_users.setdefault(user,
                                                   {}).update({item: feedback})
                        list_feedback.append(feedback)
                        self.average_scores[user] = self.average_scores.get(
                            user, 0) + feedback
                        self.num_user_interactions[
                            user] = self.num_user_interactions.get(user, 0) + 1
                    except ValueError:
                        pass

        return self.dict_users, list_feedback
Beispiel #8
0
    def return_information(self, implicit=False):
        check_error_file(self.file_read)
        dict_file = dict()
        d_feedback = dict()
        list_feedback = list()
        not_seen = dict()
        map_user = dict()
        map_index_user = dict()
        du_feed = dict()

        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    if len(inline) == 1:
                        print("Error: Space type is invalid!")
                        sys.exit()
                    try:
                        user, item, feedback = int(inline[0]), int(
                            inline[1]), float(inline[2])
                        d_feedback.setdefault(user,
                                              {}).update({item: feedback})
                        self.triple_dataset.append((user, item, feedback))
                        self.dict_users.setdefault(user, set()).add(item)
                        self.dict_items.setdefault(item, set()).add(user)
                        du_feed.setdefault(user, list()).append(item)
                        self.list_users.add(user)
                        self.list_items.add(item)
                        self.mean_feedback += feedback
                        list_feedback.append(feedback)
                        self.number_interactions += 1
                    except ValueError:
                        pass

        self.triple_dataset = sorted(self.triple_dataset)
        self.mean_feedback /= float(self.number_interactions)
        self.list_users = set(sorted(list(self.list_users)))
        self.list_items = set(sorted(list(self.list_items)))

        for user in self.list_users:
            not_seen[user] = list(
                set(self.list_items) - set(self.dict_users[user]))

        for u, user in enumerate(self.list_users):
            map_user[user] = u
            map_index_user[u] = user

        map_item = dict()
        map_index_item = dict()
        self.list_items = set(sorted(list(self.list_items)))

        for i, item in enumerate(self.list_items):
            map_item[item] = i
            map_index_item[i] = item

        matrix = np.zeros((len(self.list_users), len(self.list_items)))

        di = copy.deepcopy(self.dict_items)

        for user in self.list_users:
            for item in self.dict_users[user]:
                if implicit:
                    matrix[map_user[user]][map_item[item]] = 1
                else:
                    matrix[map_user[user]][
                        map_item[item]] = d_feedback[user][item]
                self.dict_items.setdefault(map_item[item],
                                           set()).add(map_user[user])

        sparsity = (1 -
                    (self.number_interactions /
                     float(len(self.list_users) * len(self.list_items)))) * 100
        dict_file.update({
            'feedback': d_feedback,
            'users': self.list_users,
            'items': self.list_items,
            'du': self.dict_users,
            'dir': self.dict_items,
            'mean_rates': self.mean_feedback,
            'list_feedback': self.triple_dataset,
            'ni': self.number_interactions,
            'max': max(list_feedback),
            'min': min(list_feedback),
            'sparsity': sparsity,
            'not_seen': not_seen,
            'matrix': matrix,
            'map_user': map_index_user,
            'map_item': map_index_item,
            'mu': map_user,
            'mi': map_item,
            'du_order': du_feed,
            'di': di
        })

        return dict_file
Beispiel #9
0
    def read_metadata(self, l_items):
        dict_file = dict()
        d_feedback = dict()
        list_feedback = list()
        map_user = dict()
        map_index_user = dict()
        map_item = dict()
        map_index_item = dict()
        check_error_file(self.file_read)

        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    if len(inline) == 1:
                        print("Error: Space type is invalid in metadata file!")
                        print(inline, self.space_type)
                        sys.exit()
                    try:
                        user, item, feedback = int(inline[0]), int(
                            inline[1]), float(inline[2])
                        d_feedback.setdefault(user,
                                              {}).update({item: feedback})
                        self.triple_dataset.append((user, item, feedback))
                        self.dict_users.setdefault(user, set()).add(item)
                        self.dict_items.setdefault(item, set()).add(user)
                        self.list_items.add(item)
                        self.mean_feedback += feedback
                        list_feedback.append(feedback)
                        self.number_interactions += 1
                    except ValueError:
                        pass

        self.triple_dataset = sorted(self.triple_dataset)
        self.mean_feedback /= float(self.number_interactions)
        self.list_users = set(sorted(list(l_items)))
        self.list_items = set(sorted(list(self.list_items)))

        for u, user in enumerate(self.list_users):
            map_user[user] = u
            map_index_user[u] = user

        for i, item in enumerate(self.list_items):
            map_item[item] = i
            map_index_item[i] = item

        matrix = np.zeros((len(self.list_users), len(self.list_items)))

        for user in self.list_users:
            try:
                for item in d_feedback[user]:
                    matrix[map_user[user]][
                        map_item[item]] = d_feedback[user][item]
            except KeyError:
                pass

        dict_file.update({
            'feedback': d_feedback,
            'items': self.list_users,
            'metadata': self.list_items,
            'di': self.dict_users,
            'dm': self.dict_items,
            'mean_rates': self.mean_feedback,
            'list_feedback': self.triple_dataset,
            'ni': self.number_interactions,
            'max': max(list_feedback),
            'min': min(list_feedback),
            'matrix': matrix
        })

        return dict_file