def test_cols_select_fit_and_transform_repeat(self): scale_column_idx = [1, 1, 2, 2, 4, 5, 5] min_max_scaler = MinMaxScaler(mode='normal', area='col', scale_column_idx=scale_column_idx, feat_upper=None, feat_lower=None, out_upper=None, out_lower=None) fit_data, cols_transform_value = min_max_scaler.fit( self.table_instance) scaler = MMS() scaler.fit(self.test_data) mms_transform_data = np.around(scaler.transform(self.test_data), 4).tolist() for i, line in enumerate(mms_transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: mms_transform_data[i][j] = self.test_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_data), mms_transform_data) transform_data = min_max_scaler.transform(self.table_instance, cols_transform_value) self.assertListEqual(self.get_table_instance_feature(transform_data), mms_transform_data)
def transform(self, data, fit_config): """ Transform input data using scale with fit results Parameters ---------- data: data_instance, input data fit_config: list, the fit results information of scale Returns ---------- transform_data:data_instance, data after transform """ LOGGER.info("Start scale data transform ...") self.header = data.schema.get('header') if len(fit_config) == 0: LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return") if self.scale_param.method == consts.MINMAXSCALE: min_max_scaler = MinMaxScaler() data = min_max_scaler.transform(data, fit_config) elif self.scale_param.method == consts.STANDARDSCALE: standard_scaler = StandardScaler() data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1], scale_column_idx=fit_config[2]) else: LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method)) data.schema['header'] = self.header LOGGER.info("End transform data ...") return data
def test_transform_all(self): feat_upper = 8 feat_lower = 3 out_upper = 2 out_lower = -1 min_max_scaler = MinMaxScaler(mode='normal', area='all', feat_upper=feat_upper, feat_lower=feat_lower, out_upper=out_upper, out_lower=out_lower) fit_data, cols_transform_value = min_max_scaler.fit(self.table_instance) transform_data = min_max_scaler.transform(self.table_instance, cols_transform_value) self.assertListEqual(self.get_table_instance_feature(fit_data), self.get_table_instance_feature(transform_data))
def transform(self, data, fit_config): LOGGER.info("Start scale data transform ...") if len(fit_config) == 0: LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return") if self.scale_param.method == consts.MINMAXSCALE: min_max_scaler = MinMaxScaler() data = min_max_scaler.transform(data, fit_config[0]) elif self.scale_param.method == consts.STANDARDSCALE: standard_scaler = StandardScaler() data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1]) else: LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method)) LOGGER.info("End transform data ...") return data