Beispiel #1
0
 def test_perform_no_zeros(self):
     # Save only 'no-zeros' graph
     metric = PopRecsCorrelation('test_pop_recs/no_zeros', mode='no_zeros')
     metric.perform(split_i21_missing_in_recs)
     self.assertFalse(os.path.isfile(os.path.join('test_pop_recs/no_zeros', 'pop_recs_correlation.png')))
     # The 'no-zeros' graph is created adding with file_name = file_name + '_no_zeros'
     self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/no_zeros', 'pop_recs_correlation_no_zeros.png')))
Beispiel #2
0
 def test_perform_w_zeros(self):
     # Save only 'w-zeros' graph
     metric = PopRecsCorrelation('test_pop_recs/w_zeros', mode='w_zeros')
     metric.perform(split_i21_missing_in_recs)
     self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/w_zeros', 'pop_recs_correlation.png')))
     # If The 'no-zeros' graph was created, its file_name would be: file_name = file_name + '_no_zeros'
     self.assertFalse(os.path.isfile(os.path.join('test_pop_recs/w_zeros', 'pop_recs_correlation_no_zeros.png')))
Beispiel #3
0
    def test_perform_both(self):
        # Save both graph when it makes sense (eg. they are different)
        metric = PopRecsCorrelation('test_pop_recs/both_yes', mode='both')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_yes', 'pop_recs_correlation.png')))
        # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros'
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_yes', 'pop_recs_correlation_no_zeros.png')))

        truth = pd.DataFrame({
            'from_id': ['u1', 'u1', 'u2', 'u2', 'u2'],
            'to_id': ['i2', 'i1', 'i3', 'i5', 'i4'],
            'score': [5, 3, 3.6, 4, 2.2]}
        )

        recs = pd.DataFrame({
            'from_id': ['u1', 'u1', 'u1', 'u2', 'u2', 'u2', 'u2'],
            'to_id': ['i1', 'i2', 'inew1', 'inew2', 'i5', 'i4', 'i3'],
            'score': [300, 250, 200, 400, 350, 300, 100]}
        )
        # All items in the truth set have been recommended, so there's no 'zero' recommendation

        split_no_zero_present = Split(recs, truth)

        metric = PopRecsCorrelation('test_pop_recs/both_identical', mode='both')
        metric.perform(split_no_zero_present)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_identical', 'pop_recs_correlation.png')))
        # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros'
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/both_identical', 'pop_recs_correlation_no_zeros.png')))
Beispiel #4
0
    def test_graph(self):
        catalog = set(ratings.to_id)

        users_dir = os.path.join(dir_test_files, 'complex_contents',
                                 'users_codified/')

        graph = NXFullGraph(
            ratings,
            user_contents_dir=users_dir,
            item_contents_dir=items_dir,
            item_exo_representation="dbpedia",
            user_exo_representation='local',
            item_exo_properties=['starring'],
            user_exo_properties=['1'
                                 ]  # It's the column in the users .DAT which
            # identifies the gender
        )

        graph_rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(graph_rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(relevant_threshold=3),
                           Recall(),
                           FMeasure(beta=1),
                           FMeasure(beta=2, sys_average='micro'),
                           MRR(),
                           Correlation('pearson'),
                           GiniIndex(),
                           DeltaGap({
                               'popular': 0.5,
                               'niche': 0.5
                           }),
                           PredictionCoverage(catalog),
                           PopProfileVsRecs(user_groups={
                               'popular': 0.5,
                               'niche': 0.5
                           },
                                            out_dir='plots/'),
                           LongTailDistr('plots/', format='svg'),
                           PopRecsCorrelation('plots/')
                       ],
                       verbose_predictions=True,
                       methodology=TestItemsMethodology())

        em.fit()
    def test_eval_ranking_needed_metrics_implicit_split(self):

        # We set the split_list directly by the class attribute
        c = MetricCalculator()
        RankingNeededMetric.rank_truth_list = self.rank_split_list

        system_res, each_user_res = c.eval_metrics([
            Precision(),
            PrecisionAtK(2),
            RPrecision(),
            Recall(),
            RecallAtK(2),
            FMeasure(),
            FMeasureAtK(2),
            NDCG(),
            NDCGAtK(2),
            MRR(),
            MRRAtK(2),
            Correlation('pearson'),
            Correlation('kendall'),
            Correlation('spearman'),
            PredictionCoverage(self.catalog),
            CatalogCoverage(self.catalog, top_n=2),
            GiniIndex(),
            DeltaGap(user_groups={
                'a': 0.5,
                'b': 0.5
            }),
            LongTailDistr(out_dir='test_plot'),
            PopProfileVsRecs(user_groups={
                'a': 0.5,
                'b': 0.5
            },
                             out_dir='test_plot'),
            PopRecsCorrelation(out_dir='test_plot')
        ])

        self.assertIsInstance(system_res, pd.DataFrame)
        self.assertIsInstance(each_user_res, pd.DataFrame)
Beispiel #6
0
    def test_overwrite(self):
        # Save both graph when it makes sense (eg. they are different)
        metric = PopRecsCorrelation('test_pop_recs/overwrite', mode='both')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation.png')))
        # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros'
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation_no_zeros.png')))

        # Save both graph already existent for the first time
        metric = PopRecsCorrelation('test_pop_recs/overwrite', mode='both')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation (1).png')))
        # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros'
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation_no_zeros (1).png')))

        # Save both graph already existent for the second time
        metric = PopRecsCorrelation('test_pop_recs/overwrite', mode='both')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation (2).png')))
        # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros'
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation_no_zeros (2).png')))

        # Save both graph already existent but overwrite them
        metric = PopRecsCorrelation('test_pop_recs/overwrite', mode='both', overwrite=True)
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation.png')))
        # If The 'no-zeros' is created, its file_name will be: file_name = file_name + '_no_zeros'
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation_no_zeros.png')))
        self.assertFalse(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation (3).png')))
        self.assertFalse(os.path.isfile(os.path.join('test_pop_recs/overwrite', 'pop_recs_correlation (3).png')))
Beispiel #7
0
    def test_perform(self):

        # Save on same folder
        metric = PopRecsCorrelation()
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile('./pop_recs_correlation.png'))
        self.assertTrue(os.path.isfile('./pop_recs_correlation_no_zeros.png'))

        # Save on same folder with a different format (svg)
        metric = PopRecsCorrelation(format='svg')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile('./pop_recs_correlation.svg'))
        self.assertTrue(os.path.isfile('./pop_recs_correlation_no_zeros.svg'))

        # Save on a not existent folder with a specified file_name
        metric = PopRecsCorrelation('test_pop_recs', file_name='pop_recs_custom_name')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs', 'pop_recs_custom_name.png')))
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs', 'pop_recs_custom_name_no_zeros.png')))

        # Save on an existent folder with a specified file_name and with a different format (svg)
        # Save also the frame used to build the box_plot
        metric = PopRecsCorrelation('test_pop_recs', file_name='pop_recs_custom_name', format='svg')
        metric.perform(split_i21_missing_in_recs)
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs', 'pop_recs_custom_name.svg')))
        self.assertTrue(os.path.isfile(os.path.join('test_pop_recs', 'pop_recs_custom_name_no_zeros.svg')))