def replace_outlier_value(self, input_data_features, mode="fit"): if self.outlier_replace: from federatedml.feature.imputer import Imputer imputer_processor = Imputer(self.outlier_impute) if mode == "fit": input_data_features, self.outlier_replace_value = \ imputer_processor.fit(input_data_features, replace_method=self.outlier_replace_method, replace_value=self.outlier_replace_value) if self.outlier_impute is None: self.outlier_impute = imputer_processor.get_missing_value_list( ) else: input_data_features = imputer_processor.transform( input_data_features, transform_value=self.outlier_replace_value) self.outlier_replace_rate = imputer_processor.get_impute_rate(mode) # callback("outlier_value_ratio", # outlier_replace_rate, # self.tracker) # callback("outlier_value_list", # self.outlier_impute, # self.tracker) return input_data_features
def test_transform_float(self): imputer_value = ['', 'none', 'na', 'null', "10000", "-10000"] imputer = Imputer(missing_value_list=imputer_value) cols_transform_value_ground_true = [0.963102, 1.467675, 0.829202, 0.772457, 1.000835, 0.962702, 1.077099, 1.053586, 2.996525, 0.961696] process_data = imputer.transform(self.table_instance, cols_transform_value_ground_true, output_format="float") test_data_fit = self.fit_test_data_float(self.test_data, cols_transform_value_ground_true, imputer_value) self.assertListEqual(self.table_to_list(process_data), test_data_fit)
def transform(self, data): LOGGER.info(f"Enter Feature Imputation transform") imputer_processor = Imputer(self.missing_impute) imputed_data = imputer_processor.transform( data, transform_value=self.default_value, skip_cols=self.skip_cols) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list() self.missing_impute_rate = imputer_processor.get_impute_rate( "transform") return imputed_data
def test_get_impute_rate(self): imputer_value = ['', 'none', 'na', 'null', "10000", "-10000"] imputer = Imputer(missing_value_list=imputer_value) _, _ = imputer.fit(self.table_instance, "median", output_format='str') cols_impute_rate_ground_true = [0, 0.3, 0.1, 0.1, 0.1, 0.1, 0, 0.1, 0, 0] cols_fit_impute_rate = imputer.get_impute_rate(mode="fit") self.assertListEqual(cols_fit_impute_rate, cols_impute_rate_ground_true) cols_transform_value_ground_true = [-0.606584, -0.193332, -0.620475, -0.591332, -0.327392, -0.519504, -0.610669, -0.768581, -0.28757, -0.247477] _ = imputer.transform(self.table_instance, cols_transform_value_ground_true) cols_transform_impute_rate = imputer.get_impute_rate(mode="fit") self.assertListEqual(cols_transform_impute_rate, cols_impute_rate_ground_true)
def replace_outlier_value(self, input_data_features, mode="fit"): if self.outlier_replace: from federatedml.feature.imputer import Imputer imputer_processor = Imputer(self.outlier_impute) if mode == "fit": input_data_features, self.outlier_replace_value = \ imputer_processor.fit(input_data_features, replace_method=self.outlier_replace_method, replace_value=self.outlier_replace_value) if self.outlier_impute is None: self.outlier_impute = imputer_processor.get_imputer_value_list() else: input_data_features = imputer_processor.transform(input_data_features, replace_method=self.outlier_replace_method, transform_value=self.outlier_replace_value) return input_data_features
def fill_missing_value(self, input_data_features, mode="fit"): if self.missing_fill: from federatedml.feature.imputer import Imputer imputer_processor = Imputer(self.missing_impute) if mode == "fit": input_data_features, self.default_value = imputer_processor.fit(input_data_features, replace_method=self.missing_fill_method, replace_value=self.default_value) if self.missing_impute is None: self.missing_impute = imputer_processor.get_imputer_value_list() else: input_data_features = imputer_processor.transform(input_data_features, replace_method=self.missing_fill_method, transform_value=self.default_value) if self.missing_impute is None: self.missing_impute = imputer_processor.get_imputer_value_list() return input_data_features
def fill_missing_value(self, input_data, tags_dict, mode="fit"): str_trans_method = functools.partial( self.change_tag_to_str, tags_dict=tags_dict, delimitor=self.delimitor, with_label=self.with_label, tag_value_delimitor=self.tag_value_delimitor) input_data = input_data.mapValues(str_trans_method) schema = make_schema(self.header, self.sid_name, self.label_name) set_schema(input_data, schema) from federatedml.feature.imputer import Imputer imputer_processor = Imputer() if mode == "fit": data, self.default_value = imputer_processor.fit( input_data, replace_method=self.missing_fill_method, replace_value=self.default_value) LOGGER.debug("self.default_value is {}".format(self.default_value)) else: data = imputer_processor.transform( input_data, transform_value=self.default_value) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list() LOGGER.debug("self.missing_impute is {}".format(self.missing_impute)) self.missing_impute_rate = imputer_processor.get_impute_rate(mode) str_trans_tag_method = functools.partial( self.change_str_to_tag, tags_dict=tags_dict, delimitor=self.delimitor, tag_value_delimitor=self.tag_value_delimitor) data = data.mapValues(str_trans_tag_method) return data