def test_simple_input_mifs_no_beta_provided(self): integer_matrix = np.random.randint(0, 10, (100, 10)) diverse_target = np.random.randint(0, 10, (100)) prev_variables_index = [3, 4, 5] candidates_index = [0, 1, 2, 6, 7, 8, 9] costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38] with self.assertWarns(Warning): no_cost_find_best_feature( j_criterion_func=mifs, data=integer_matrix, target_variable=diverse_target, possible_variables_index=candidates_index, costs=costs, prev_variables_index=prev_variables_index)
def fit(self, data, target_variable, costs, j_criterion_func='cife', **kwargs): super().fit(data, target_variable, costs, j_criterion_func, **kwargs) S = set() U = set([i for i in range(self.data.shape[1])]) self.variables_selected_order = [] self.cost_variables_selected_order = [] while len(U) > 0: k, _, cost = no_cost_find_best_feature( j_criterion_func=self.j_criterion_func, data=self.data, target_variable=self.target_variable, prev_variables_index=list(S), possible_variables_index=list(U), costs=self.costs) S.add(k) self.variables_selected_order.append(k) self.cost_variables_selected_order.append(cost) U = U.difference(set([k]))
def fit(self, data, target_variable, costs, j_criterion_func='cife', **kwargs): super().fit(data, target_variable, costs, j_criterion_func, **kwargs) S = set() U = set([i for i in range(self.data.shape[1])]) self.variables_selected_order = [] self.cost_variables_selected_order = [] for _ in tqdm(range(len(U)), desc='Scoring No-cost Features'): k, _, cost = no_cost_find_best_feature( j_criterion_func=self.j_criterion_func, data=self.data, target_variable=self.target_variable, prev_variables_index=list(S), possible_variables_index=list(U), costs=self.costs) S.add(k) self.variables_selected_order.append(k) self.cost_variables_selected_order.append(cost) U = U.difference(set([k])) if len(S) == self.number_of_features: break
def _fit_no_cost(self, stop_budget=False, **kwargs): S = set() U = set([i for i in range(self.data.shape[1])]) self.no_cost_variables_selected_order = [] self.no_cost_cost_variables_selected_order = [] for _ in tqdm(range(self.number_of_features), desc='Selecting No-cost Features'): k, _, cost = no_cost_find_best_feature( j_criterion_func=self.j_criterion_func, data=self.data, target_variable=self.target_variable, prev_variables_index=list(S), possible_variables_index=list(U), costs=self.costs, beta=self.beta) S.add(k) if stop_budget is True and ( sum(self.no_cost_cost_variables_selected_order) + cost) >= (self.budget or np.inf): break self.no_cost_variables_selected_order.append(k) self.no_cost_cost_variables_selected_order.append(cost) U = U.difference(set([k])) if len(S) == self.number_of_features: break
def test_simple_input_mim(self): integer_matrix = np.random.randint(0, 10, (100, 10)) diverse_target = np.random.randint(0, 10, (100)) candidates_index = [0, 1, 2, 6, 7, 8, 9] costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38] selected_feature, criterion_value, cost = no_cost_find_best_feature( j_criterion_func=mim, data=integer_matrix, target_variable=diverse_target, possible_variables_index=candidates_index, costs=costs) self.assertIsInstance(selected_feature, int) self.assertIsInstance(criterion_value, float) self.assertIsInstance(cost, float)
def _no_cost_scoreCV(self, **kwargs): # Rank variables with NoCostVariableSelector S = set() U = set([i for i in range(self.data.shape[1])]) self.no_cost_variables_selected_order = [] self.no_cost_cost_variables_selected_order = [] while len(U) > 0: k, _, cost = no_cost_find_best_feature( j_criterion_func=self.j_criterion_func, data=self.data, target_variable=self.target_variable, prev_variables_index=list(S), possible_variables_index=list(U), costs=self.costs, beta=self.beta) S.add(k) self.no_cost_variables_selected_order.append(k) self.no_cost_cost_variables_selected_order.append(cost) U = U.difference(set([k])) current_cost = 0 self.no_cost_total_scores = [] self.no_cost_total_costs = [] for i, var_id in enumerate(self.no_cost_variables_selected_order): cur_vars = self.no_cost_variables_selected_order[0:i + 1] score = cross_val_score( estimator=self.model, X=self.data[:, cur_vars], y=self.target_variable, cv=self.cv_kwargs.get('cv'), scoring=self.cv_kwargs.get('scoring'), n_jobs=self.cv_kwargs.get('n_jobs')).mean() current_cost += self.costs[var_id] self.no_cost_total_scores.append(score) self.no_cost_total_costs.append(current_cost)