Ejemplo n.º 1
0
    def __init__(self):
        super(BaseHeteroFeatureSelection, self).__init__()
        self.transfer_variable = HeteroFeatureSelectionTransferVariable()
        self.cols = []  # Current cols index to do selection
        # self.left_col_names = []
        self.left_cols = {}  # final result
        self.left_cols_index = []
        # self.cols_dict = {}
        self.header = []
        self.original_header = []
        self.schema = {}
        self.party_name = 'Base'

        self.filter_meta_list = []
        self.filter_param_list = []

        # Possible previous model
        self.binning_model = None
        self.model_param = FeatureSelectionParam()

        # All possible meta
        self.unique_meta = None
        self.iv_value_meta = None
        self.iv_percentile_meta = None
        self.variance_coe_meta = None
        self.outlier_meta = None

        # Use to save each model's result
        self.results = []
Ejemplo n.º 2
0
    def __init__(self):
        super(BaseHeteroFeatureSelection, self).__init__()
        self.transfer_variable = HeteroFeatureSelectionTransferVariable()

        self.curt_select_properties = SelectionProperties()
        self.completed_selection_result = CompletedSelectionResults()

        self.schema = None
        self.party_name = 'Base'
        # Possible previous model
        self.binning_model = None
        self.static_obj = None
        self.model_param = FeatureSelectionParam()
        self.meta_dicts = {}
Ejemplo n.º 3
0
 def _run_filter(self, data_table):
     select_param = FeatureSelectionParam()
     select_param.percentage_value_param.upper_pct = 0.2
     filter_obj = get_filter(consts.PERCENTAGE_VALUE, select_param)
     select_properties = SelectionProperties()
     select_properties.set_header(self.header)
     select_properties.set_last_left_col_indexes(
         [x for x in range(len(self.header))])
     select_properties.set_select_all_cols()
     filter_obj.set_selection_properties(select_properties)
     res_select_properties = filter_obj.fit(data_table,
                                            suffix='').selection_properties
     left_cols = [3, 4]
     self.assertEqual(res_select_properties.all_left_col_names,
                      [self.header[x] for x in left_cols])
Ejemplo n.º 4
0
 def test_unique_logic(self):
     data_table = self.gen_data(1000, 48)
     select_param = FeatureSelectionParam()
     filter_obj = get_filter(consts.UNIQUE_VALUE, select_param)
     select_properties = SelectionProperties()
     select_properties.set_header(self.header)
     select_properties.set_last_left_col_indexes(
         [x for x in range(len(self.header))])
     select_properties.set_select_all_cols()
     filter_obj.set_selection_properties(select_properties)
     res_select_properties = filter_obj.fit(data_table,
                                            suffix='').selection_properties
     self.assertEqual(res_select_properties.all_left_col_names,
                      [self.header[1]])
     data_table.destroy()
Ejemplo n.º 5
0
 def test_left_logic(self):
     data_table = self.gen_data(1000, 10, 48)
     select_param = FeatureSelectionParam()
     select_param.manually_param.left_col_indexes = [0, 1]
     select_param.manually_param.left_col_names = ['3', '2']
     filter_obj = get_filter(consts.MANUALLY_FILTER, select_param)
     select_properties = SelectionProperties()
     select_properties.set_header(self.header)
     select_properties.set_last_left_col_indexes(
         [x for x in range(len(self.header))])
     select_properties.set_select_all_cols()
     filter_obj.set_selection_properties(select_properties)
     res_select_properties = filter_obj.fit(data_table,
                                            suffix='').selection_properties
     result = ['0', '1', '2', '3']
     self.assertEqual(res_select_properties.all_left_col_names, result)
Ejemplo n.º 6
0
    def test_unique_logic(self):
        data_table = self.gen_data(1000, 10, 48)
        select_param = FeatureSelectionParam()
        select_param.variance_coe_param.value_threshold = 0.1
        filter_obj = get_filter(consts.COEFFICIENT_OF_VARIATION_VALUE_THRES, select_param)
        select_properties = SelectionProperties()
        select_properties.set_header(self.header)
        select_properties.set_last_left_col_indexes([x for x in range(len(self.header))])
        select_properties.set_select_all_cols()
        filter_obj.set_selection_properties(select_properties)
        res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties
        result = [self.header[idx] for idx, x in enumerate(self.coe_list)
                  if x >= select_param.variance_coe_param.value_threshold]

        self.assertEqual(res_select_properties.all_left_col_names, result)
        self.assertEqual(len(res_select_properties.all_left_col_names), 9)
        data_table.destroy()
Ejemplo n.º 7
0
    def test_filter_logic(self):
        data_table = self.gen_data(1000, 10, 48)
        select_param = FeatureSelectionParam()
        select_param.outlier_param.percentile = 0.9
        select_param.outlier_param.upper_threshold = 99
        filter_obj = get_filter(consts.OUTLIER_COLS, select_param)
        select_properties = SelectionProperties()
        select_properties.set_header(self.header)
        select_properties.set_last_left_col_indexes(
            [x for x in range(len(self.header))])
        select_properties.set_select_all_cols()
        filter_obj.set_selection_properties(select_properties)
        res_select_properties = filter_obj.fit(data_table,
                                               suffix='').selection_properties

        self.assertEqual(res_select_properties.all_left_col_names,
                         [self.header[x] for x in range(9)])
        self.assertEqual(len(res_select_properties.all_left_col_names), 9)
        data_table.destroy()