def test_perform_0_gap(self): # DeltaGap with 2 equals frame should return 0 for every group split = Split(user_truth, user_truth) metric = DeltaGap(user_groups={'a': 0.5, 'b': 0.5}) result = metric.perform(split) for col in result.columns: self.assertTrue(v == 0 for v in result[col])
def test_perform_2_users_2_groups(self): metric = DeltaGap(user_groups={'a': 0.5, 'b': 0.5}) result = metric.perform(self.split) pop_by_item_truth = Counter(list(user_truth['to_id'])) # group_a = { u2 } (since it has higher popular ratio, it is put into the first group) # group_b = { u1 } # For every user in the group calculate the average popularity of the recommendations. # To calculate the avg popularity, pop_by_item_pred is used, since due to the methodology # items in the recommendation lists may differ from item in the truth RECS_avg_pop_group_a = { 'u2': 6 / 4 } # for every user sum_pop_item_rated / n_item_rated RECS_avg_pop_group_b = { 'u1': 8 / 8 } # for every user sum_pop_item_rated / n_item_rated # For every user in the group calculate the average popularity of the profile. # To calculate the avg popularity, pop_by_item_truth is used, since due to the methodology # items in the truth may differ from item in the recommendation lists PROFILE_avg_pop_group_a = { 'u2': 5 / 3 } # for every user sum_pop_item_rated / n_item_rated PROFILE_avg_pop_group_b = { 'u1': 7 / 5 } # for every user sum_pop_item_rated / n_item_rated RECS_gap_group_a = ( 6 / 4 ) / 1 # sum the RECS_avg_pop of every user of the group_a / n_users in group_a RECS_gap_group_b = ( 8 / 8 ) / 1 # sum the RECS_avg_pop of every user of the group_b / n_users in group_b PROFILE_gap_group_a = ( 5 / 3 ) / 1 # sum the PROFILE_avg_pop of every user of the group_a / n_users in group_a PROFILE_gap_group_b = ( 7 / 5 ) / 1 # sum the PROFILE_avg_pop of every user of the group_b / n_users in group_b expected_delta_gap_group_a = ( RECS_gap_group_a - PROFILE_gap_group_a) / PROFILE_gap_group_a expected_delta_gap_group_b = ( RECS_gap_group_b - PROFILE_gap_group_b) / PROFILE_gap_group_b result_delta_gap_group_a = float(result["{} | a".format(str(metric))]) result_delta_gap_group_b = float(result["{} | b".format(str(metric))]) self.assertAlmostEqual(expected_delta_gap_group_a, result_delta_gap_group_a) self.assertAlmostEqual(expected_delta_gap_group_b, result_delta_gap_group_b)
def test_perform_increased_pop_percentage(self): truth = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5', 'u5', 'u5' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i8', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i21', 'i3', 'i5', 'i1' ], 'score': [ 650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 500, 400, 300, 200, 150, 100, 50, 800, 600, 500 ] }) recs = pd.DataFrame({ 'from_id': [ 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2', 'u2', 'u3', 'u3', 'u3', 'u3', 'u4', 'u4', 'u4', 'u5', 'u5', 'u5', 'u5', 'u5' ], 'to_id': [ 'i2', 'i1', 'i4', 'i5', 'i6', 'i3', 'i8', 'i9', 'i4', 'i6', 'i1', 'i5', 'i35', 'i2', 'i4', 'i3', 'i20', 'i3', 'i1', 'i3', 'i5', 'i1', 'i9', 'i36', 'i6' ], 'score': [ 650, 600, 500, 400, 300, 220, 100, 50, 350, 200, 100, 50, 25, 500, 400, 300, 200, 350, 100, 50, 800, 600, 500, 400, 300 ] }) split = Split(recs, truth) result_pop_normal = DeltaGap(user_groups={ 'a': 0.3, 'b': 0.3, 'c': 0.4 }).perform(split) result_pop_increased = DeltaGap(user_groups={ 'a': 0.3, 'b': 0.3, 'c': 0.4 }, pop_percentage=0.6).perform(split) result_pop_normal = np.array(result_pop_normal) result_pop_increased = np.array(result_pop_increased) result_pop_normal.sort(axis=0) result_pop_increased.sort(axis=0) # Just check that results with pop_percentage increased are different, # since users are put into groups differently self.assertFalse( np.array_equal(result_pop_normal, result_pop_increased))
def test_calculate_gap(self): # This is basically the inner part of the GAP equation, the fraction at the numerator # of the GAP formula calculated preemptively for every user. # It is set manually for the sake of the test, but it is obtained by the # get_avg_pop_by_users() method of the GroupFairnessMetric Class avg_pop_by_users = {'u1': 2, 'u2': 1.78, 'u3': 3.5, 'u4': 1.1} expected_u1_u3 = (avg_pop_by_users['u1'] + avg_pop_by_users['u3']) / 2 result_u1_u3 = DeltaGap.calculate_gap({'u1', 'u3'}, avg_pop_by_users) self.assertAlmostEqual(expected_u1_u3, result_u1_u3) expected_u2_u4 = (avg_pop_by_users['u2'] + avg_pop_by_users['u4']) / 2 result_u2_u4 = DeltaGap.calculate_gap({'u2', 'u4'}, avg_pop_by_users) self.assertAlmostEqual(expected_u2_u4, result_u2_u4)
def test_calculate_delta_gap(self): gap_profile = 2.32 gap_recs = 3 result = DeltaGap.calculate_delta_gap(gap_recs, gap_profile) expected = (gap_recs - gap_profile) / gap_profile self.assertAlmostEqual(expected, result)
def test_all(self): ratings_filename = os.path.join(contents_path, '..', 'datasets', 'examples', 'new_ratings.csv') ratings_frame = RatingsImporter( CSVFile(ratings_filename)).import_ratings() rs = ContentBasedRS( LinearPredictor( {"Plot": ['tfidf', 'embedding']}, SkLinearRegression(), ), ratings_frame, items_dir) catalog = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[ Precision(sys_average='micro'), PrecisionAtK(1, sys_average='micro'), RPrecision(), Recall(), RecallAtK(3, ), FMeasure(1, sys_average='macro'), FMeasureAtK(2, beta=1, sys_average='micro'), NDCG(), NDCGAtK(3), MRR(), MRRAtK(5, ), Correlation('pearson', top_n=5), Correlation('kendall', top_n=3), Correlation('spearman', top_n=4), MAE(), MSE(), RMSE(), CatalogCoverage(catalog), CatalogCoverage(catalog, k=2), CatalogCoverage(catalog, top_n=3), GiniIndex(), GiniIndex(top_n=3), DeltaGap({ 'primo': 0.5, 'secondo': 0.5 }) ], methodology=TestItemsMethodology()) result = em.fit()
def test_perform_top_3(self): metric = DeltaGap(user_groups={'a': 1}, top_n=3) result = metric.perform(self.split) pop_by_item_truth = Counter(list(user_truth['to_id'])) # group_a = { u2, u1 } # For every user in the group calculate the average popularity of the recommendations. # To calculate the avg popularity, pop_by_item_pred is used, since due to the methodology # items in the recommendation lists may differ from item in the truth RECS_avg_pop_group_a = { 'u2': 5 / 3, 'u1': 5 / 3 } # for every user sum_pop_item_rated / n_item_rated # For every user in the group calculate the average popularity of the profile. # To calculate the avg popularity, pop_by_item_truth is used, since due to the methodology # items in the truth may differ from item in the recommendation lists PROFILE_avg_pop_group_a = { 'u2': 5 / 3, 'u1': 7 / 5 } # for every user sum_pop_item_rated / n_item_rated # Sum the RECS_avg_pop of every user of the group_a / n_users in group_a RECS_gap_group_a = ((5 / 3) + (5 / 3)) / 2 # Sum the PROFILE_avg_pop of every user of the group_a / n_users in group_a PROFILE_gap_group_a = ((5 / 3) + (7 / 5)) / 2 expected_delta_gap_group_a = ( RECS_gap_group_a - PROFILE_gap_group_a) / PROFILE_gap_group_a result_delta_gap_group_a = float(result["{} | a".format(str(metric))]) self.assertAlmostEqual(expected_delta_gap_group_a, result_delta_gap_group_a)
def test_graph(self): catalog = set(ratings.to_id) users_dir = os.path.join(dir_test_files, 'complex_contents', 'users_codified/') graph = NXFullGraph( ratings, user_contents_dir=users_dir, item_contents_dir=items_dir, item_exo_representation="dbpedia", user_exo_representation='local', item_exo_properties=['starring'], user_exo_properties=['1' ] # It's the column in the users .DAT which # identifies the gender ) graph_rs = GraphBasedRS(NXPageRank(), graph) em = EvalModel(graph_rs, KFoldPartitioning(), metric_list=[ Precision(relevant_threshold=3), Recall(), FMeasure(beta=1), FMeasure(beta=2, sys_average='micro'), MRR(), Correlation('pearson'), GiniIndex(), DeltaGap({ 'popular': 0.5, 'niche': 0.5 }), PredictionCoverage(catalog), PopProfileVsRecs(user_groups={ 'popular': 0.5, 'niche': 0.5 }, out_dir='plots/'), LongTailDistr('plots/', format='svg'), PopRecsCorrelation('plots/') ], verbose_predictions=True, methodology=TestItemsMethodology()) em.fit()
def test_eval_ranking_needed_metrics_implicit_split(self): # We set the split_list directly by the class attribute c = MetricCalculator() RankingNeededMetric.rank_truth_list = self.rank_split_list system_res, each_user_res = c.eval_metrics([ Precision(), PrecisionAtK(2), RPrecision(), Recall(), RecallAtK(2), FMeasure(), FMeasureAtK(2), NDCG(), NDCGAtK(2), MRR(), MRRAtK(2), Correlation('pearson'), Correlation('kendall'), Correlation('spearman'), PredictionCoverage(self.catalog), CatalogCoverage(self.catalog, top_n=2), GiniIndex(), DeltaGap(user_groups={ 'a': 0.5, 'b': 0.5 }), LongTailDistr(out_dir='test_plot'), PopProfileVsRecs(user_groups={ 'a': 0.5, 'b': 0.5 }, out_dir='test_plot'), PopRecsCorrelation(out_dir='test_plot') ]) self.assertIsInstance(system_res, pd.DataFrame) self.assertIsInstance(each_user_res, pd.DataFrame)
def test_invalid_percentage(self): with self.assertRaises(PercentageError): DeltaGap(user_groups={'a': 0.5}, pop_percentage=-0.5) DeltaGap(user_groups={'a': 0.5}, pop_percentage=0) DeltaGap(user_groups={'a': 0.5}, pop_percentage=1.5)