def __init__(self): super(BaseHeteroFeatureSelection, self).__init__() self.transfer_variable = HeteroFeatureSelectionTransferVariable() self.cols = [] # Current cols index to do selection # self.left_col_names = [] self.left_cols = {} # final result self.left_cols_index = [] # self.cols_dict = {} self.header = [] self.original_header = [] self.schema = {} self.party_name = 'Base' self.filter_meta_list = [] self.filter_param_list = [] # Possible previous model self.binning_model = None self.model_param = FeatureSelectionParam() # All possible meta self.unique_meta = None self.iv_value_meta = None self.iv_percentile_meta = None self.variance_coe_meta = None self.outlier_meta = None # Use to save each model's result self.results = []
def __init__(self): super(BaseHeteroFeatureSelection, self).__init__() self.transfer_variable = HeteroFeatureSelectionTransferVariable() self.curt_select_properties = SelectionProperties() self.completed_selection_result = CompletedSelectionResults() self.schema = None self.party_name = 'Base' # Possible previous model self.binning_model = None self.static_obj = None self.model_param = FeatureSelectionParam() self.meta_dicts = {}
def _run_filter(self, data_table): select_param = FeatureSelectionParam() select_param.percentage_value_param.upper_pct = 0.2 filter_obj = get_filter(consts.PERCENTAGE_VALUE, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties left_cols = [3, 4] self.assertEqual(res_select_properties.all_left_col_names, [self.header[x] for x in left_cols])
def test_unique_logic(self): data_table = self.gen_data(1000, 48) select_param = FeatureSelectionParam() filter_obj = get_filter(consts.UNIQUE_VALUE, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties self.assertEqual(res_select_properties.all_left_col_names, [self.header[1]]) data_table.destroy()
def test_left_logic(self): data_table = self.gen_data(1000, 10, 48) select_param = FeatureSelectionParam() select_param.manually_param.left_col_indexes = [0, 1] select_param.manually_param.left_col_names = ['3', '2'] filter_obj = get_filter(consts.MANUALLY_FILTER, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties result = ['0', '1', '2', '3'] self.assertEqual(res_select_properties.all_left_col_names, result)
def test_unique_logic(self): data_table = self.gen_data(1000, 10, 48) select_param = FeatureSelectionParam() select_param.variance_coe_param.value_threshold = 0.1 filter_obj = get_filter(consts.COEFFICIENT_OF_VARIATION_VALUE_THRES, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes([x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties result = [self.header[idx] for idx, x in enumerate(self.coe_list) if x >= select_param.variance_coe_param.value_threshold] self.assertEqual(res_select_properties.all_left_col_names, result) self.assertEqual(len(res_select_properties.all_left_col_names), 9) data_table.destroy()
def test_filter_logic(self): data_table = self.gen_data(1000, 10, 48) select_param = FeatureSelectionParam() select_param.outlier_param.percentile = 0.9 select_param.outlier_param.upper_threshold = 99 filter_obj = get_filter(consts.OUTLIER_COLS, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties self.assertEqual(res_select_properties.all_left_col_names, [self.header[x] for x in range(9)]) self.assertEqual(len(res_select_properties.all_left_col_names), 9) data_table.destroy()