def test_transform1(self): standard_scaler = StandardScaler(with_mean=True, with_std=True) fit_instance, mean, std = standard_scaler.fit(self.table_instance) transform_data = standard_scaler.transform(self.table_instance, mean, std) self.assertListEqual(self.get_table_instance_feature(transform_data), self.get_table_instance_feature(fit_instance))
def test_cols_select_fit_and_transform_repeat(self): scale_column_idx = [1, 1, 2, 2, 4, 5, 5] standard_scaler = StandardScaler(area='col', scale_column_idx=scale_column_idx, with_mean=True, with_std=True) fit_data, scale_conf = standard_scaler.fit(self.table_instance) mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[ 2] scaler = SSL(with_mean=True, with_std=True) scaler.fit(self.test_data) transform_data = np.around(scaler.transform(self.test_data), 4).tolist() for i, line in enumerate(transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: transform_data[i][j] = self.test_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_data), transform_data) std_scale_transform_data = standard_scaler.transform( self.table_instance, mean, std, scale_column_idx) self.assertListEqual( self.get_table_instance_feature(std_scale_transform_data), transform_data)
def transform(self, data, fit_config): """ Transform input data using scale with fit results Parameters ---------- data: data_instance, input data fit_config: list, the fit results information of scale Returns ---------- transform_data:data_instance, data after transform """ LOGGER.info("Start scale data transform ...") self.header = data.schema.get('header') if len(fit_config) == 0: LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return") if self.scale_param.method == consts.MINMAXSCALE: min_max_scaler = MinMaxScaler() data = min_max_scaler.transform(data, fit_config) elif self.scale_param.method == consts.STANDARDSCALE: standard_scaler = StandardScaler() data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1], scale_column_idx=fit_config[2]) else: LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method)) data.schema['header'] = self.header LOGGER.info("End transform data ...") return data
def test_transform4(self): standard_scaler = StandardScaler(with_mean=False, with_std=False) fit_instance, scale_conf = standard_scaler.fit(self.table_instance) mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[ 2] transform_data = standard_scaler.transform(self.table_instance, mean, std, scale_column_idx) self.assertListEqual(self.get_table_instance_feature(transform_data), self.get_table_instance_feature(fit_instance))
def transform(self, data, fit_config): LOGGER.info("Start scale data transform ...") if len(fit_config) == 0: LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return") if self.scale_param.method == consts.MINMAXSCALE: min_max_scaler = MinMaxScaler() data = min_max_scaler.transform(data, fit_config[0]) elif self.scale_param.method == consts.STANDARDSCALE: standard_scaler = StandardScaler() data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1]) else: LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method)) LOGGER.info("End transform data ...") return data