def test_simple_input_mifs_no_beta_provided(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 10, (100))
        prev_variables_index = [3, 4, 5]
        candidates_index = [0, 1, 2, 6, 7, 8, 9]
        costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38]

        with self.assertWarns(Warning):
            no_cost_find_best_feature(
                j_criterion_func=mifs,
                data=integer_matrix,
                target_variable=diverse_target,
                possible_variables_index=candidates_index,
                costs=costs,
                prev_variables_index=prev_variables_index)
Ejemplo n.º 2
0
    def fit(self,
            data,
            target_variable,
            costs,
            j_criterion_func='cife',
            **kwargs):

        super().fit(data, target_variable, costs, j_criterion_func, **kwargs)

        S = set()
        U = set([i for i in range(self.data.shape[1])])

        self.variables_selected_order = []
        self.cost_variables_selected_order = []

        while len(U) > 0:
            k, _, cost = no_cost_find_best_feature(
                j_criterion_func=self.j_criterion_func,
                data=self.data,
                target_variable=self.target_variable,
                prev_variables_index=list(S),
                possible_variables_index=list(U),
                costs=self.costs)
            S.add(k)
            self.variables_selected_order.append(k)
            self.cost_variables_selected_order.append(cost)
            U = U.difference(set([k]))
Ejemplo n.º 3
0
    def fit(self,
            data,
            target_variable,
            costs,
            j_criterion_func='cife',
            **kwargs):

        super().fit(data, target_variable, costs, j_criterion_func, **kwargs)
        S = set()
        U = set([i for i in range(self.data.shape[1])])

        self.variables_selected_order = []
        self.cost_variables_selected_order = []

        for _ in tqdm(range(len(U)), desc='Scoring No-cost Features'):
            k, _, cost = no_cost_find_best_feature(
                j_criterion_func=self.j_criterion_func,
                data=self.data,
                target_variable=self.target_variable,
                prev_variables_index=list(S),
                possible_variables_index=list(U),
                costs=self.costs)
            S.add(k)
            self.variables_selected_order.append(k)
            self.cost_variables_selected_order.append(cost)
            U = U.difference(set([k]))
            if len(S) == self.number_of_features:
                break
Ejemplo n.º 4
0
    def _fit_no_cost(self, stop_budget=False, **kwargs):
        S = set()
        U = set([i for i in range(self.data.shape[1])])

        self.no_cost_variables_selected_order = []
        self.no_cost_cost_variables_selected_order = []

        for _ in tqdm(range(self.number_of_features),
                      desc='Selecting No-cost Features'):
            k, _, cost = no_cost_find_best_feature(
                j_criterion_func=self.j_criterion_func,
                data=self.data,
                target_variable=self.target_variable,
                prev_variables_index=list(S),
                possible_variables_index=list(U),
                costs=self.costs,
                beta=self.beta)
            S.add(k)
            if stop_budget is True and (
                    sum(self.no_cost_cost_variables_selected_order) +
                    cost) >= (self.budget or np.inf):
                break
            self.no_cost_variables_selected_order.append(k)
            self.no_cost_cost_variables_selected_order.append(cost)
            U = U.difference(set([k]))
            if len(S) == self.number_of_features:
                break
    def test_simple_input_mim(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 10, (100))
        candidates_index = [0, 1, 2, 6, 7, 8, 9]
        costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38]

        selected_feature, criterion_value, cost = no_cost_find_best_feature(
            j_criterion_func=mim,
            data=integer_matrix,
            target_variable=diverse_target,
            possible_variables_index=candidates_index,
            costs=costs)

        self.assertIsInstance(selected_feature, int)
        self.assertIsInstance(criterion_value, float)
        self.assertIsInstance(cost, float)
Ejemplo n.º 6
0
    def _no_cost_scoreCV(self, **kwargs):
        # Rank variables with NoCostVariableSelector
        S = set()
        U = set([i for i in range(self.data.shape[1])])

        self.no_cost_variables_selected_order = []
        self.no_cost_cost_variables_selected_order = []

        while len(U) > 0:
            k, _, cost = no_cost_find_best_feature(
                j_criterion_func=self.j_criterion_func,
                data=self.data,
                target_variable=self.target_variable,
                prev_variables_index=list(S),
                possible_variables_index=list(U),
                costs=self.costs,
                beta=self.beta)
            S.add(k)
            self.no_cost_variables_selected_order.append(k)
            self.no_cost_cost_variables_selected_order.append(cost)
            U = U.difference(set([k]))

        current_cost = 0
        self.no_cost_total_scores = []
        self.no_cost_total_costs = []

        for i, var_id in enumerate(self.no_cost_variables_selected_order):
            cur_vars = self.no_cost_variables_selected_order[0:i + 1]
            score = cross_val_score(
                estimator=self.model,
                X=self.data[:, cur_vars],
                y=self.target_variable,
                cv=self.cv_kwargs.get('cv'),
                scoring=self.cv_kwargs.get('scoring'),
                n_jobs=self.cv_kwargs.get('n_jobs')).mean()
            current_cost += self.costs[var_id]
            self.no_cost_total_scores.append(score)
            self.no_cost_total_costs.append(current_cost)