def test2_no_pruning_correlation(self):
     
     df = pd.DataFrame({
         'entities': ['Paris', 'Buenos Aires', 'Mannheim', "München"],
         'link': ['http://dbpedia.org/resource/Paris', 'http://dbpedia.org/resource/Buenos_Aires',
                  'http://dbpedia.org/resource/Mannheim', 'http://dbpedia.org/resource/Munich']
         })
     
     expected_df = pd.read_csv("test\data\feature_selection\hierarchy_based_test2_expected.csv")
     
     input_df = direct_type_generator(df, ["link"], regex_filter=['A'], result_type="boolean", bundled_mode=True, hierarchy=True)
     
     output_df = hierarchy_based_filter(input_df, "link", threshold=0.99, G=input_DG, metric="correlation", pruning=False)
     
     pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)
    def test8_nan(self):

        input_df = pd.read_csv("test/data/feature_selection/hill_climbing_test3_input.csv")

        input_DG = nx.DiGraph()
        labels = ['http://chancellor', 'http://president', 'http://European_politician', 
                  'http://head_of_state', 'http://politician', 'http://man', 'http://person', 'http://being']
        input_DG.add_nodes_from(labels)
        input_DG.add_edges_from([('http://chancellor', 'http://politician'), ('http://president', 'http://politician'),
        ('http://chancellor', 'http://head_of_state'), ('http://president', 'http://head_of_state'), ('http://head_of_state', 'http://person'),
        ('http://European_politician', 'http://politician'), ('http://politician', 'http://person'),
        ('http://man', 'http://person'), ('http://person', 'http://being')])

        expected_df = pd.read_csv("test/data/feature_selection/hierarchy_based_test8_expected.csv")

        output_df = hierarchy_based_filter(input_df, 'class', G=input_DG, threshold=0.99, metric="info_gain", pruning=True)
        
        pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)
    def test9_callable_function(self):
        
        input_df = pd.read_csv("test/data/feature_selection/hill_climbing_test1_input.csv")

        input_DG = nx.DiGraph()
        labels = ['http://chancellor', 'http://president', 'http://European_politician', 
                  'http://head_of_state', 'http://politician', 'http://man', 'http://person', 'http://being']
        input_DG.add_nodes_from(labels)
        input_DG.add_edges_from([('http://chancellor', 'http://politician'), ('http://president', 'http://politician'),
        ('http://chancellor', 'http://head_of_state'), ('http://president', 'http://head_of_state'), ('http://head_of_state', 'http://person'),
        ('http://European_politician', 'http://politician'), ('http://politician', 'http://person'),
        ('http://man', 'http://person'), ('http://person', 'http://being')])

        def fake_metric(df_from_hierarchy, l, d):
            equivalence = df_from_hierarchy[l] == df_from_hierarchy[d]
            return equivalence.sum()/len(equivalence)
    
        expected_df = pd.read_csv("test/data/feature_selection/hierarchy_based_test9_expected.csv")
        
        output_df = hierarchy_based_filter(input_df, 'uri_bool_http://class', G= input_DG, threshold=0.99, metric=fake_metric, pruning=True)
        
        pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)
 def transform(self, X, y=None):
     X = hierarchy_based_filter(X, self.label_column, self.G,
                                self.threshold, self.metric, self.pruning,
                                self.all_remove, self.progress)
     return X