def eval_fit_recsys(cls, recsys: RecSys, split_list: List[Split], test_items_list: List[pd.DataFrame]): """ Method which is usually called by the 'PredictionCalculator' module that generates recommendation lists. For every user, items that will be ranked are specified by the 'test_items_list' parameter. Rankings generated will be stored into a class attribute (rank_truth_list), which is a list that contains Split objects: every object has two DataFrames, the first one has recommendation lists for every user, the second one has the 'ground truth' for every user. If the class attribute is non-empty, then the 'AlreadyFittedRecSys' exception is raised, so remember to clean the class attribute by calling the private method '_clean_pred_truth_list(...)' upon every new evaluation Args: recsys (RecSys): Recommender system which will generate predictions that will later be evaluated split_list (List[Split]): List of Split objects where every Split contains two DataFrames, the first has the 'train set' for every user, the second has the 'test set' for every user test_items_list (List[pd.DataFrame]): List of DataFrames, one for every Split object inside the split_list parameter, where every DataFrame contains for every user the list of items that must be ranked Raises: AlreadyFittedRecSys exception when the class attribute 'rank_truth_list' is non-empty, meaning that recommendation lists are already been calculated """ if len(cls.rank_truth_list) != 0: raise AlreadyFittedRecSys for counter, (split, test_items_frame) in enumerate(zip( split_list, test_items_list), start=1): train = split.train test = split.test rank_truth = Split() rank_truth.truth = test frame_to_concat = [] user_list_to_fit = set(train.from_id) for user in progbar( user_list_to_fit, prefix='Calculating rank for user {} - split {}'.format( '{}', counter), substitute_with_current=True): user_ratings_train = train.loc[train['from_id'] == user] test_items = list( test_items_frame.query('from_id == @user').to_id) result = recsys._eval_fit_rank(user_ratings_train, test_items) frame_to_concat.append(result) rank_truth.pred = pd.concat(frame_to_concat) cls.rank_truth_list.append(rank_truth)
def test_perform_both(self): # Save both graph when it makes sense (eg. they are different) metric = PopRecsCorrelation('test_pop_recs/both_yes', mode='both') metric.perform(split_i21_missing_in_recs) self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_yes', 'pop_recs_correlation.png'))) # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros' self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_yes', 'pop_recs_correlation_no_zeros.png'))) truth = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': ['i2', 'i1', 'i3', 'i5', 'i4'], 'score': [5, 3, 3.6, 4, 2.2]} ) recs = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2'], 'to_id': ['i1', 'i2', 'inew1', 'inew2', 'i5', 'i4', 'i3'], 'score': [300, 250, 200, 400, 350, 300, 100]} ) # All items in the truth set have been recommended, so there's no 'zero' recommendation split_no_zero_present = Split(recs, truth) metric = PopRecsCorrelation('test_pop_recs/both_identical', mode='both') metric.perform(split_no_zero_present) self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_identical', 'pop_recs_correlation.png'))) # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros' self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_identical', 'pop_recs_correlation_no_zeros.png')))
def test_perform_w_new_items(self): metric_top_n = self.metric_top_n metric_k_sampling = self.metric_k_sampling user_pred_w_new_items = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i8' ], 'score': [650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50] }) split_w_new_items = Split(user_pred_w_new_items, user_truth) result_top_n = float( metric_top_n.perform(split_w_new_items)[str(metric_top_n)]) self.assertTrue(0 <= result_top_n <= 100) result_k_sampling = float( metric_k_sampling.perform(split_w_new_items)[str( metric_k_sampling)]) self.assertTrue(0 <= result_k_sampling <= 100)
def test_perform_equi_top_n(self): metric_top_n = self.metric_top_n # i1 and i2 and i3 are recommended in equal ways to users user_pred_equi = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': ['i1', 'i2', 'i3', 'i2', 'i3', 'i1'], 'score': [650, 600, 500, 750, 700, 680] }) split_pred_equi = Split(user_pred_equi, user_truth) result = float( metric_top_n.perform(split_pred_equi)[str(metric_top_n)]) # In the top 2 i2 is recommended more, so there's no equality self.assertTrue(0 < result <= 1) metric_top_3 = GiniIndex(top_n=3) result = float( metric_top_3.perform(split_pred_equi)[str(metric_top_3)]) # In the top 3 (total length of rec lists) there's equality self.assertEqual(0, result)
def test_perform_increased_pop_percentage(self): truth = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5', 'u5', 'u5' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i8', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i21', 'i3', 'i5', 'i1' ], 'score': [ 650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 500, 400, 300, 200, 150, 100, 50, 800, 600, 500 ] }) recs = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5', 'u5', 'u5', 'u5', 'u5' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i5', 'i35', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i3', 'i5', 'i1', 'i9', 'i36', 'i6' ], 'score': [ 650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 25, 500, 400, 300, 200, 350, 100, 50, 800, 600, 500, 400, 300 ] }) split = Split(recs, truth) result_pop_normal = DeltaGap(user_groups={ 'a': 0.3, 'b': 0.3, 'c': 0.4 }).perform(split) result_pop_increased = DeltaGap(user_groups={ 'a': 0.3, 'b': 0.3, 'c': 0.4 }, pop_percentage=0.6).perform(split) result_pop_normal = np.array(result_pop_normal) result_pop_increased = np.array(result_pop_increased) result_pop_normal.sort(axis=0) result_pop_increased.sort(axis=0) # Just check that results with pop_percentage increased are different, # since users are put into groups differently self.assertFalse( np.array_equal(result_pop_normal, result_pop_increased))
def test_perform_0_gap(self): # DeltaGap with 2 equals frame should return 0 for every group split = Split(user_truth, user_truth) metric = DeltaGap(user_groups={'a': 0.5, 'b': 0.5}) result = metric.perform(split) for col in result.columns: self.assertTrue(v == 0 for v in result[col])
def setUpClass(cls) -> None: cls.recs = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i8' ], 'score': [650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50] }) cls.split = Split(cls.recs, user_truth)
def eval_fit_recsys(cls, recsys: RecSys, split_list: List[Split], test_items_list: List[pd.DataFrame]): if len(cls.rank_truth_list) != 0: raise AlreadyFittedRecSys for counter, (split, test_items_frame) in enumerate(zip( split_list, test_items_list), start=1): train = split.train test = split.test rank_truth = Split() rank_truth.truth = test frame_to_concat = [] user_list_to_fit = set(train.from_id) for user in progbar( user_list_to_fit, prefix='Calculating rank for user {} - split {}'.format( '{}', counter), substitute_with_current=True): user_ratings_train = train.loc[train['from_id'] == user] test_items = list( test_items_frame.query('from_id == @user').to_id) result = recsys._eval_fit_rank(user_ratings_train, test_items) frame_to_concat.append(result) rank_truth.pred = pd.concat(frame_to_concat) cls.rank_truth_list.append(rank_truth)
def test_perform_only_new(self): metric = self.metric user_pred_only_new_items = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u2', 'u2'], 'to_id': ['inew1', 'inew2', 'inew3', 'inew4'], 'score': [650, 600, 500, 650] }) split_only_new = Split(user_pred_only_new_items, user_truth) expected = 0 result = float(metric.perform(split_only_new)[str(metric)]) self.assertEqual(expected, result)
def test_perform_equi(self): metric = self.metric # i1 and i2 and i3 are recommended in equal ways to users user_pred_equi = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': ['i1', 'i2', 'i3', 'i2', 'i3', 'i1'], 'score': [650, 600, 500, 750, 700, 680] }) split_pred_equi = Split(user_pred_equi, user_truth) expected = 0 result = float(metric.perform(split_pred_equi)[str(metric)]) self.assertEqual(expected, result)
def test_perform_all_items(self): metric = self.metric user_pred_all_items = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i10', 'i7' ], 'score': [650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100] }) split_all = Split(user_pred_all_items, user_truth) expected = 100 result = float(metric.perform(split_all)[str(metric)]) self.assertEqual(expected, result)
def test_perform_all_items_and_plus(self): catalog = {'i2'} metric_top_n = CatalogCoverage(catalog, top_n=2) metric_k_sampling = CatalogCoverage(catalog, k=1) user_pred_all_items_and_plus = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u2', 'u2'], 'to_id': ['i2', 'i1', 'i2', 'i7'], 'score': [650, 600, 200, 100] }) split_all = Split(user_pred_all_items_and_plus, user_truth) expected = 100 result = float(metric_top_n.perform(split_all)[str(metric_top_n)]) self.assertEqual(expected, result) result = float( metric_k_sampling.perform(split_all)[str(metric_k_sampling)]) self.assertEqual(expected, result)
def test_perform_all_items_and_plus(self): catalog = {'i1'} metric = PredictionCoverage(catalog) # All catalog plus more user_pred_all_items_and_plus = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u1', 'u1'], 'to_id': ['i2', 'i1', 'i4', 'i5'], 'score': [ 650, 600, 500, 400, ] }) split_all = Split(user_pred_all_items_and_plus, user_truth) expected = 100 result = float(metric.perform(split_all)[str(metric)]) self.assertEqual(expected, result)
def test_perform_only_one(self): metric = self.metric metric_top_n = self.metric_top_n user_pred_only_one_item = pd.DataFrame({ 'from_id': ['u1'], 'to_id': ['i4'], 'score': [650] }) split_only_one = Split(user_pred_only_one_item, user_truth) expected = 0 result = float(metric.perform(split_only_one)[str(metric)]) self.assertEqual(expected, result) # Even if there's only one element and top_n = 2, everything works correctly expected = 0 result = float(metric_top_n.perform(split_only_one)[str(metric_top_n)]) self.assertEqual(expected, result)
def test_perform_mixed(self): metric = self.metric metric_top_n = self.metric_top_n user_pred_mixed = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i2', 'i6', 'i1', 'i8' ], 'score': [650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50] }) split_mixed = Split(user_pred_mixed, user_truth) result = float(metric.perform(split_mixed)[str(metric)]) self.assertTrue(0 < result <= 1) result = float(metric_top_n.perform(split_mixed)[str(metric_top_n)]) self.assertTrue(0 < result <= 1)
def setUpClass(cls) -> None: rank1 = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3' ], 'to_id': [ 'i9', 'i6', 'inew1', 'inew2', 'i6', 'i2', 'i1', 'i8', 'i10', 'inew3', 'i2', 'i1', 'i8', 'i4', 'i9', 'i3', 'i12', 'i2' ], 'score': [ 500, 450, 400, 350, 300, 250, 200, 150, 400, 300, 200, 100, 50, 25, 10, 100, 50, 20 ] }) score1 = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3' ], 'to_id': [ 'i9', 'i6', 'inew1', 'inew2', 'i6', 'i2', 'i1', 'i8', 'i10', 'inew3', 'i2', 'i1', 'i8', 'i4', 'i9', 'i3', 'i12', 'i2' ], 'score': [ 4.36, 2.55, 1.23, 4.36, 3.55, 2.58, 5, 4.2, 3.56, 4.22, 4.25, 1.4, 4.4, 3.33, 2.53, 2.21, 1.53, 3.32 ] }) truth1 = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u3' ], 'to_id': [ 'i1', 'i2', 'i6', 'i8', 'i9', 'i1', 'i2', 'i4', 'i9', 'i10', 'i2', 'i3', 'i12', 'imissing3', 'imissing4' ], 'score': [3, 3, 4, 1, 1, 5, 3, 3, 4, 4, 4, 2, 3, 3, 3] }) rank2 = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3' ], 'to_id': [ 'i10', 'i5', 'i4', 'i3', 'i7', 'i70', 'i3', 'i71', 'i8', 'i11', 'i10', 'i1', 'i4' ], 'score': [500, 400, 300, 200, 100, 400, 300, 200, 100, 50, 150, 100, 50] }) score2 = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3' ], 'to_id': [ 'i10', 'i5', 'i4', 'i3', 'i7', 'i70', 'i3', 'i71', 'i8', 'i11', 'i10', 'i1', 'i4' ], 'score': [ 4.4, 3.35, 2.22, 2.56, 3.1, 2.55, 1.89, 4.3, 3.77, 3.89, 4.23, 4.56, 5 ] }) truth2 = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u3' ], 'to_id': [ 'i3', 'i4', 'i5', 'i7', 'i10', 'i3', 'i70', 'i71', 'i8', 'i11', 'i4', 'i1', 'i10', 'imissing1', 'imissing2' ], 'score': [4, 2, 2, 5, 1, 5, 4, 4, 3, 4, 2, 3, 1, 1, 1] }) rank_split1 = Split(rank1, truth1) rank_split2 = Split(rank2, truth2) score_split1 = Split(score1, truth1) score_split2 = Split(score2, truth2) cls.rank_split_list = [rank_split1, rank_split2] cls.score_split_list = [score_split1, score_split2] catalog = ['i' + str(num) for num in range(100)] cls.catalog = set(catalog)
{'from_id': ['u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5', 'u5', 'u5'], 'to_id': ['i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i8', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i21', 'i3', 'i5', 'i1'], 'score': [5, 3, 3.6, 4, 2.2, 1, 1.5, 3.2, 3.6, 4, 5, 3.5, 2.2, 2.8, 4, 5, 4.5, 3.5, 5, 4, 4.5, 3.3]}) recs = pd.DataFrame( {'from_id': ['u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5', 'u5', 'u5', 'u5', 'u5'], 'to_id': ['i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i8', 'i35', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i3', 'i5', 'i1', 'i9', 'i36', 'i6'], 'score': [650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 25, 500, 400, 300, 200, 350, 100, 50, 800, 600, 500, 400, 300]}) split_i21_missing_in_recs = Split(recs, truth) class TestLongTail(TestCase): def test_string_error(self): # Test invalid string passed with self.assertRaises(StringNotSupported): LongTailDistr('.', on='invalid') def test_perform(self): # Save on same folder metric = LongTailDistr(on='truth') metric.perform(split_i21_missing_in_recs) # The graph is created with file_name = file_name + '_truth' self.assertTrue(os.path.isfile('./long_tail_distr_truth.png'))
user_pred_only_one_item = pd.DataFrame( {'from_id': ['u1', 'u2'], 'to_id': ['i4', 'i8'], 'score': [650, 600]}) user_pred_i1_i4_missing = pd.DataFrame( {'from_id': ['u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': ['i2', 'i5', 'i6', 'i3', 'i8', 'i9', 'i6', 'i1', 'i8'], 'score': [600, 400, 300, 220, 100, 50, 200, 100, 50]}) user_truth = pd.DataFrame({'from_id': ['u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': ['i1', 'i2', 'i3', 'i4', 'i6', 'i1', 'i8', 'i4'], 'score': [3, 2, 3, 1, 2, 4, 3, 3]}) split_only_new = Split(user_pred_only_new_items, user_truth) split_w_new_items = Split(user_pred_w_new_items, user_truth) split_only_one = Split(user_pred_only_one_item, user_truth) split_missing = Split(user_pred_i1_i4_missing, user_truth) class TestPrecision(TestCase): @classmethod def setUpClass(cls) -> None: cls.metric_macro = Precision(relevant_threshold=3, sys_average='macro') cls.metric_micro = Precision(relevant_threshold=3, sys_average='micro') cls.metric_mean = Precision(sys_average='macro') def test_perform_only_new(self): metric_macro = self.metric_macro
'score': [650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50] }) user_pred_only_one_item = pd.DataFrame({ 'from_id': ['u1', 'u2'], 'to_id': ['i4', 'i8'], 'score': [650, 600] }) user_truth = pd.DataFrame({ 'from_id': ['u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2'], 'to_id': ['i1', 'i2', 'i3', 'i4', 'i6', 'i1', 'i8', 'i4'], 'score': [3, 2, 3, 1, 2, 4, 3, 3] }) split_only_new = Split(user_pred_only_new_items, user_truth) split_w_new_items = Split(user_pred_w_new_items, user_truth) split_only_one = Split(user_pred_only_one_item, user_truth) def for_each_method(test_func): def wrapper(self, *args, **kwargs): for method in self.methods_list: with self.subTest(current_method=method): test_func(*((self, method) + args), **kwargs) return wrapper class TestNDCG(unittest.TestCase): @classmethod
("003", "tt0113041", 3, "54654675"), ("003", "tt0112281", 5, "54654675")], columns=["from_id", "to_id", "score", "timestamp"]) train2 = pd.DataFrame.from_records( [("001", "tt0112641", 2, "54654675"), ("001", "tt0112760", 1, "54654675"), ("002", "tt0112641", 3, "54654675"), ("002", "tt0112896", 2, "54654675"), ("003", "tt0113041", 3, "54654675"), ("003", "tt0112281", 5, "54654675")], columns=["from_id", "to_id", "score", "timestamp"]) test2 = pd.DataFrame.from_records( [("001", "tt0112281", 3.5, "54654675"), ("001", "tt0112302", 4.5, "54654675"), ("002", "tt0112346", 4, "54654675"), ("003", "tt0112453", 2, "54654675")], columns=["from_id", "to_id", "score", "timestamp"]) split_list = [Split(train1, test1), Split(train2, test2)] class TestMethodology(TestCase): def test_get_item_to_predict(self): result_list = TestRatingsMethodology().get_item_to_predict(split_list) # for every user get the items in its test_set1 expected_list = [ test1[['from_id', 'to_id']], test2[['from_id', 'to_id']] ] self.assertTrue(len(expected_list), len(result_list)) for expected, result in zip(expected_list, result_list):