def rating_prediction(self): dict_file = dict() d_feedback = dict() list_feedback = list() check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2]) d_feedback.setdefault(user, {}).update({item: feedback}) self.triple_dataset.append((user, item, feedback)) self.dict_users.setdefault(user, set()).add(item) self.dict_items.setdefault(item, set()).add(user) self.list_users.add(user) self.list_items.add(item) self.mean_feedback += feedback list_feedback.append(feedback) self.triple_dataset = sorted(self.triple_dataset) self.mean_feedback /= float(self.number_interactions) self.list_users = sorted(list(self.list_users)) self.list_items = sorted(list(self.list_items)) dict_file.update({'feedback': d_feedback, 'users': self.list_users, 'items': self.list_items, 'du': self.dict_users, 'di': self.dict_items, 'mean_rates': self.mean_feedback, 'list_feedback': self.triple_dataset, 'ni': self.number_interactions, 'max': max(list_feedback), 'min': min(list_feedback)}) return dict_file
def simple_evaluation(self, file_result, file_test): # Verify that the files are valid check_error_file(file_result) check_error_file(file_test) predict = ReadFile(file_result, space_type=self.space_type) predict.main_information() test = ReadFile(file_test, space_type=self.space_type) test.main_information() rmse = 0 mae = 0 count_comp = 0 for user in test.list_users: for item in test.dict_users[user]: try: rui_predict = float(predict.dict_users[user][item]) rui_test = float(test.dict_users[user][item]) rmse += math.pow((rui_predict - rui_test), 2) mae += math.fabs(rui_predict - rui_test) count_comp += 1 except KeyError: pass if count_comp != 0: rmse = math.sqrt(float(rmse) / float(count_comp)) mae = math.sqrt(float(mae) / float(count_comp)) return rmse, mae
def triple_information(self): check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 user, item, feedback = int(inline[0]), int(inline[1]), inline[2].replace("\n", "") self.triple_dataset.append([user, item, feedback])
def all_but_one_evaluation(self, file_result, file_test, n_ranks=list([1, 3, 5, 10])): check_error_file(file_result) check_error_file(file_test) predict = ReadFile(file_result, space_type=self.space_type) predict.main_information_item_recommendation() test = ReadFile(file_test, space_type=self.space_type) test.main_information_item_recommendation() num_user = len(test.list_users) final_values = list() for user in test.list_users: test.dict_users[user] = [test.dict_users[user][0]] for i, n in enumerate(n_ranks): if n < 1: print('Error: N must >= 1.') sys.exit() partial_precision = list() partial_recall = list() avg_prec_total = list() for user in test.list_users: num_user_interactions = len(test.dict_users[user]) hit_cont = 0 avg_prec_sum = 0 try: # Generate user intersection list between the recommended items and test. intersection = list(set(predict.dict_users[user][:n]).intersection( set(test.dict_users[user]))) if len(intersection) > 0: partial_precision.append((float(len(intersection)) / float(n))) partial_recall.append((float(len(intersection)) / float(num_user_interactions))) for item in intersection: hit_cont += 1 avg_prec_sum += (float(hit_cont) / float(test.dict_users[user].index(item) + 1)) avg_prec_total.append(float(avg_prec_sum) / float(num_user_interactions)) except KeyError: pass if not self.only_map: final_precision = sum(partial_precision) / float(num_user) final_values.append(final_precision) final_recall = sum(partial_recall) / float(num_user) final_values.append(final_recall) final_map = sum(avg_prec_total) / float(num_user) final_values.append(final_map) return final_values
def simple_evaluation(self, file_result, file_test): # Verify that the files are valid check_error_file(file_result) check_error_file(file_test) predict = ReadFile(file_result, space_type=self.space_type) predict.main_information_item_recommendation() test = ReadFile(file_test, space_type=self.space_type) test.main_information_item_recommendation() return self.default_evaluation(predict.dict_users, test)
def read_matrix(self): matrix = list() check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) inline = np.array(inline) inline = np.delete(inline, len(inline)-1) matrix.append(inline.astype(float)) return np.array(matrix)
def read_metadata(self, l_items): dict_file = dict() d_feedback = dict() list_feedback = list() map_user = dict() map_index_user = dict() map_item = dict() map_index_item = dict() check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2]) d_feedback.setdefault(user, {}).update({item: feedback}) self.triple_dataset.append((user, item, feedback)) self.dict_users.setdefault(user, set()).add(item) self.dict_items.setdefault(item, set()).add(user) self.list_items.add(item) self.mean_feedback += feedback list_feedback.append(feedback) self.triple_dataset = sorted(self.triple_dataset) self.mean_feedback /= float(self.number_interactions) self.list_users = sorted(list(l_items)) self.list_items = sorted(list(self.list_items)) for u, user in enumerate(self.list_users): map_user[user] = u map_index_user[u] = user for i, item in enumerate(self.list_items): map_item[item] = i map_index_item[i] = item matrix = np.zeros((len(self.list_users), len(self.list_items))) for user in self.list_users: try: for item in d_feedback[user]: matrix[map_user[user]][map_item[item]] = d_feedback[user][item] except KeyError: pass dict_file.update({'feedback': d_feedback, 'items': self.list_users, 'metadata': self.list_items, 'di': self.dict_users, 'dm': self.dict_items, 'mean_rates': self.mean_feedback, 'list_feedback': self.triple_dataset, 'ni': self.number_interactions, 'max': max(list_feedback), 'min': min(list_feedback), 'matrix': matrix}) return dict_file
def read_rankings(self): list_feedback = list() check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2]) self.dict_users.setdefault(user, {}).update({item: feedback}) list_feedback.append(feedback) self.average_scores[user] = self.average_scores.get(user, 0) + feedback self.num_user_interactions[user] = self.num_user_interactions.get(user, 0) + 1 return self.dict_users, list_feedback
def split_dataset(self): for i, feedback in enumerate(self.file_read): self.dict_users = dict() check_error_file(feedback) with open(feedback) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2]) self.triple_dataset.append((user, item)) self.dict_users.setdefault(user, {}).update({item: feedback}) self.individual_interaction.append(self.dict_users)
def return_matrix(self, implicit=False): check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2]) self.list_users.add(user) self.list_items.add(item) self.dict_users.setdefault(user, {}).update({item: feedback}) map_user = dict() map_index_user = dict() self.list_users = sorted(list(self.list_users)) for u, user in enumerate(self.list_users): map_user[user] = u map_index_user[u] = user map_item = dict() map_index_item = dict() self.list_items = sorted(list(self.list_items)) for i, item in enumerate(self.list_items): map_item[item] = i map_index_item[i] = item matrix = np.zeros((len(self.list_users), len(self.list_items))) for user in self.list_users: for item in self.dict_users[user]: if implicit: matrix[map_user[user]][map_item[item]] = 1 else: matrix[map_user[user]][map_item[item]] = self.dict_users[user][item] self.dict_items.setdefault(map_item[item], set()).add(map_user[user]) return {"matrix": matrix, "map_user": map_index_user, "map_item": map_index_item, "number_interactions": self.number_interactions, "di": self.dict_items, "mu": map_user, "users": self.list_users, "items": self.list_items, "feedback": self.dict_users}
def main_information_item_recommendation(self): check_error_file(self.file_read) with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 try: user, item = int(inline[0]), int(inline[1]) except ValueError: print('Error: Space type is invalid!') sys.exit() self.num_user_interactions[user] = self.num_user_interactions.get(user, 0) + 1 self.num_items_interactions[item] = self.num_items_interactions.get(item, 0) + 1 self.list_users.add(user) self.list_items.add(item) self.dict_users.setdefault(user, []).append(item) self.list_users = sorted(self.list_users) self.list_items = sorted(self.list_items)
def return_bprmf(self): check_error_file(self.file_read) not_seen = dict() with open(self.file_read) as infile: for line in infile: if line.strip(): inline = line.split(self.space_type) self.number_interactions += 1 user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2]) self.list_users.add(user) self.list_items.add(item) self.dict_users.setdefault(user, list()).append(item) self.list_users = sorted(list(self.list_users)) self.list_items = sorted(list(self.list_items)) for user in self.list_users: not_seen[user] = list(set(self.list_items) - set(self.dict_users[user])) return {"users": self.list_users, "items": self.list_items, "feedback": self.dict_users, "not_seen": not_seen, "number_interactions": self.number_interactions}