Exemple #1
0
    def test_cife(self):
        # Given
        n_cols = 5
        n_rows = 1000
        model = LogisticRegression()

        # When
        mg = MatrixGenerator()
        X, y, costs = mg.generate(n_rows=n_rows,
                                  n_basic_cols=n_cols,
                                  noise_sigmas=[0.1, 0.5],
                                  seed=2)
        r = 1
        beta = 0.5

        dvs = FractionVariableSelector()
        dvs.fit(data=X,
                target_variable=y,
                costs=costs,
                r=r,
                j_criterion_func='cife',
                beta=beta)
        dvs.score(model=model, scoring_function=roc_auc_score)
        dvs.plot_scores(compare_no_cost_method=True,
                        model=model,
                        annotate=True)

        # Then
        self.assertIsInstance(dvs.variables_selected_order, list)
        self.assertEqual(len(dvs.variables_selected_order), len(costs))
        self.assertAlmostEqual(sum(costs),
                               sum(dvs.cost_variables_selected_order))
Exemple #2
0
    def test_theoretical_output(self):
        integer_matrix = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 2], [0, 1, 3],
                                   [1, 1, 5]])
        diverse_target = np.array([0, 0, 0, 0, 1])
        costs = [1, 1, 1]
        r = 1

        fvs = FractionVariableSelector()
        fvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                r=r,
                j_criterion_func='mim')

        self.assertEqual(fvs.variables_selected_order[0], 2)
Exemple #3
0
    def test_pandas_input(self):
        integer_matrix = pd.DataFrame(np.random.randint(0, 10, (100, 3)),
                                      columns=['AA', 'BB', 'CC'])
        diverse_target = np.random.randint(0, 2, (100))
        costs = {'AA': 10, 'BB': 1, 'CC': 1.5}
        r = 1
        fvs = FractionVariableSelector()
        fvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                r=r,
                j_criterion_func='mim')

        self.assertIsInstance(fvs.variables_selected_order, list)
        self.assertEqual(len(fvs.variables_selected_order), len(costs))
Exemple #4
0
    def test_numpy_input(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 10, (100))
        costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38]
        r = 1

        fvs = FractionVariableSelector()
        fvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                r=r,
                j_criterion_func='mim')

        self.assertIsInstance(fvs.variables_selected_order, list)
        self.assertEqual(len(fvs.variables_selected_order), len(costs))
Exemple #5
0
    def test_stop_budget(self):
        integer_matrix = pd.DataFrame(np.random.randint(0, 10, (100, 3)),
                                      columns=['AA', 'BB', 'CC'])
        diverse_target = pd.Series(np.random.randint(0, 2, (100)))
        costs = {'AA': 2, 'BB': 1.1, 'CC': 1.5}
        r = 1

        fvs = FractionVariableSelector()
        fvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                r=r,
                j_criterion_func='mim',
                budget=2,
                stop_budget=True)
        self.assertGreater(2, sum(fvs.cost_variables_selected_order))
        self.assertGreaterEqual(2, len(fvs.variables_selected_order))
Exemple #6
0
    def test_score(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 2, (100))
        costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38]
        r = 1

        fvs = FractionVariableSelector()
        fvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                r=r,
                j_criterion_func='mim')

        model = LogisticRegression()
        fvs.score(model, scoring_function=roc_auc_score)

        self.assertEqual(len(fvs.total_scores), len(costs))
Exemple #7
0
    def test_regard_to_cost_is_better_cife(self):
        # Given
        n_cols = 3
        n_rows = 1000
        model = LogisticRegression()
        sigmas = [1, 10, 100]

        # When
        mg = MatrixGenerator()
        X, y, costs = mg.generate(n_rows=n_rows,
                                  n_basic_cols=n_cols,
                                  basic_cost=1,
                                  noise_sigmas=sigmas,
                                  seed=42)
        r = 0.8

        fvs = FractionVariableSelector()
        fvs.fit(data=X,
                target_variable=y,
                costs=costs,
                r=r,
                j_criterion_func='cife',
                beta=0.05)
        fvs.score(model=model, scoring_function=roc_auc_score)
        fvs.plot_scores(compare_no_cost_method=True, model=model)

        def find_nearest_idx(list, value):
            array = np.asarray(list)
            idx = (np.abs(array - value)).argmin()
            return idx

        when_better = []
        for i in range(fvs.data.shape[1]):
            idx_1_no_cost = i
            idx_1_cost = find_nearest_idx(
                fvs.total_costs, fvs.no_cost_total_costs[idx_1_no_cost])
            if fvs.total_scores[idx_1_cost] > fvs.no_cost_total_scores[
                    idx_1_no_cost]:
                when_better.append(True)
            else:
                when_better.append(False)

        # Then
        self.assertTrue(sum(when_better) / len(when_better) >= 0.5)