def fill_missing_value(self, input_data_features, mode="fit"): if self.missing_fill: from federatedml.feature.imputer import Imputer imputer_processor = Imputer(self.missing_impute) if mode == "fit": input_data_features, self.default_value = imputer_processor.fit( input_data_features, replace_method=self.missing_fill_method, replace_value=self.default_value) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list( ) else: input_data_features = imputer_processor.transform( input_data_features, transform_value=self.default_value) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list( ) self.missing_impute_rate = imputer_processor.get_impute_rate(mode) # callback("missing_value_ratio", # missing_impute_rate, # self.tracker) # callback("missing_value_list", # self.missing_impute, # self.tracker) return input_data_features
def transform(self, data): LOGGER.info(f"Enter Feature Imputation transform") imputer_processor = Imputer(self.missing_impute) imputed_data = imputer_processor.transform( data, transform_value=self.default_value, skip_cols=self.skip_cols) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list() self.missing_impute_rate = imputer_processor.get_impute_rate( "transform") return imputed_data
def replace_outlier_value(self, input_data_features, mode="fit"): if self.outlier_replace: from federatedml.feature.imputer import Imputer imputer_processor = Imputer(self.outlier_impute) if mode == "fit": input_data_features, self.outlier_replace_value = \ imputer_processor.fit(input_data_features, replace_method=self.outlier_replace_method, replace_value=self.outlier_replace_value) if self.outlier_impute is None: self.outlier_impute = imputer_processor.get_missing_value_list() else: input_data_features = imputer_processor.transform(input_data_features, transform_value=self.outlier_replace_value) self.outlier_replace_rate = imputer_processor.get_impute_rate(mode) return input_data_features
def fill_missing_value(self, input_data, tags_dict, mode="fit"): str_trans_method = functools.partial( self.change_tag_to_str, tags_dict=tags_dict, delimitor=self.delimitor, with_label=self.with_label, tag_value_delimitor=self.tag_value_delimitor) input_data = input_data.mapValues(str_trans_method) schema = make_schema(self.header, self.sid_name, self.label_name) set_schema(input_data, schema) from federatedml.feature.imputer import Imputer imputer_processor = Imputer() if mode == "fit": data, self.default_value = imputer_processor.fit( input_data, replace_method=self.missing_fill_method, replace_value=self.default_value) LOGGER.debug("self.default_value is {}".format(self.default_value)) else: data = imputer_processor.transform( input_data, transform_value=self.default_value) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list() LOGGER.debug("self.missing_impute is {}".format(self.missing_impute)) self.missing_impute_rate = imputer_processor.get_impute_rate(mode) str_trans_tag_method = functools.partial( self.change_str_to_tag, tags_dict=tags_dict, delimitor=self.delimitor, tag_value_delimitor=self.tag_value_delimitor) data = data.mapValues(str_trans_tag_method) return data
def fit(self, data): LOGGER.info(f"Enter Feature Imputation fit") imputer_processor = Imputer(self.missing_impute) self.header = get_header(data) if self.col_missing_fill_method: for k in self.col_missing_fill_method.keys(): if k not in self.header: raise ValueError( f"{k} not found in data header. Please check col_missing_fill_method keys." ) imputed_data, self.default_value = imputer_processor.fit( data, replace_method=self.missing_fill_method, replace_value=self.default_value, col_replace_method=self.col_missing_fill_method) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list() self.missing_impute_rate = imputer_processor.get_impute_rate("fit") # self.header = get_header(imputed_data) self.cols_replace_method = imputer_processor.cols_replace_method self.skip_cols = imputer_processor.get_skip_cols() self.set_summary(self.get_summary()) return imputed_data