def test_cols_select_fit_and_transform(self): scale_param = self.get_scale_param() scale_param.scale_column_idx = [1, 2, 4] standard_scaler = StandardScale(scale_param) fit_data = standard_scaler.fit(self.table_instance) scale_column_idx = standard_scaler.scale_column_idx scaler = SSL(with_mean=True, with_std=True) scaler.fit(self.test_data) transform_data = np.around(scaler.transform(self.test_data), 4).tolist() for i, line in enumerate(transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: transform_data[i][j] = self.test_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_data), transform_data) std_scale_transform_data = standard_scaler.transform( self.table_instance) self.assertListEqual( self.get_table_instance_feature(std_scale_transform_data), transform_data)
def test_cols_select_fit_and_transform_repeat(self): scale_column_idx = [1, 1, 2, 2, 4, 5, 5] standard_scaler = StandardScaler(area='col', scale_column_idx=scale_column_idx, with_mean=True, with_std=True) fit_data, scale_conf = standard_scaler.fit(self.table_instance) mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[ 2] scaler = SSL(with_mean=True, with_std=True) scaler.fit(self.test_data) transform_data = np.around(scaler.transform(self.test_data), 4).tolist() for i, line in enumerate(transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: transform_data[i][j] = self.test_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_data), transform_data) std_scale_transform_data = standard_scaler.transform( self.table_instance, mean, std, scale_column_idx) self.assertListEqual( self.get_table_instance_feature(std_scale_transform_data), transform_data)
def test_fit10(self): scale_column_idx = [1, 2, 4] scale_param = self.get_scale_param() scale_param.scale_column_idx = [] scale_param.feat_upper = 0.8 scale_param.feat_lower = 0.2 scale_param.with_mean = True scale_param.with_std = True scale_param.mode = "cap" scale_param.scale_column_idx = scale_column_idx scale_param.area = "col" standard_scaler = StandardScale(scale_param) fit_instance = standard_scaler.fit(self.table_instance) mean = standard_scaler.mean std = standard_scaler.std column_max_value = standard_scaler.column_max_value column_min_value = standard_scaler.column_min_value gt_cap_lower_list = [0, 2, 2, 2, 3, 1] gt_cap_upper_list = [1, 8, 8, 8, 7, 8] raw_data = copy.deepcopy(self.test_data) for i, line in enumerate(self.test_data): for j, value in enumerate(line): if j in scale_column_idx: if value > gt_cap_upper_list[j]: self.test_data[i][j] = gt_cap_upper_list[j] elif value < gt_cap_lower_list[j]: self.test_data[i][j] = gt_cap_lower_list[j] scaler = SSL(with_mean=True, with_std=True) scaler.fit(self.test_data) transform_data = np.around(scaler.transform(self.test_data), 4).tolist() for i, line in enumerate(transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: transform_data[i][j] = raw_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_instance), transform_data) self.assertEqual(column_max_value, gt_cap_upper_list) self.assertEqual(column_min_value, gt_cap_lower_list) self.assertListEqual(list(np.around(mean, 6)), list(np.around(scaler.mean_, 6))) self.assertListEqual(list(np.around(std, 6)), list(np.around(scaler.scale_, 6))) raw_data_transform = standard_scaler.transform(self.table_instance) self.assertListEqual( self.get_table_instance_feature(fit_instance), self.get_table_instance_feature(raw_data_transform))
def test_fit4(self): standard_scaler = StandardScaler(with_mean=False, with_std=False) fit_instance, mean, std = standard_scaler.fit(self.table_instance) scaler = SSL(with_mean=False, with_std=False) scaler.fit(self.test_data) self.assertListEqual( self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 4).tolist()) self.assertEqual(mean, [0 for _ in range(len(self.test_data[0]))]) self.assertEqual(std, [1 for _ in range(len(self.test_data[0]))])
def test_fit3(self): standard_scaler = StandardScaler(with_mean=True, with_std=False) fit_instance, mean, std = standard_scaler.fit(self.table_instance) scaler = SSL(with_std=False) scaler.fit(self.test_data) self.assertListEqual( self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 4).tolist()) self.assertListEqual(list(np.around(mean, 4)), list(np.around(scaler.mean_, 4))) self.assertListEqual(list(np.around(std, 4)), [1 for _ in std])
def test_fit2(self): standard_scaler = StandardScaler(with_mean=False, with_std=True) fit_instance, scale_conf = standard_scaler.fit(self.table_instance) mean, std = scale_conf[0], scale_conf[1] scaler = SSL(with_mean=False) scaler.fit(self.test_data) self.assertListEqual( self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 4).tolist()) self.assertListEqual(list(np.around(mean, 4)), [0 for _ in mean]) self.assertListEqual(list(np.around(std, 4)), list(np.around(scaler.scale_, 4)))
def test_fit9(self): scale_column_idx = [1, 2, 4] scale_param = self.get_scale_param() scale_param.scale_column_idx = [] scale_param.feat_upper = [2, 2, 2, 2, 2, 2] scale_param.feat_lower = [1, 1, 1, 1, 1, 1] scale_param.with_mean = True scale_param.with_std = True scale_param.scale_column_idx = scale_column_idx scale_param.area = "col" standard_scaler = StandardScale(scale_param) fit_instance = standard_scaler.fit(self.table_instance) mean = standard_scaler.mean std = standard_scaler.std column_max_value = standard_scaler.column_max_value column_min_value = standard_scaler.column_min_value raw_data = copy.deepcopy(self.test_data) for i, line in enumerate(self.test_data): for j, value in enumerate(line): if j in scale_column_idx: if value > 2: self.test_data[i][j] = 2 elif value < 1: self.test_data[i][j] = 1 scaler = SSL(with_mean=True, with_std=True) scaler.fit(self.test_data) transform_data = np.around(scaler.transform(self.test_data), 4).tolist() for i, line in enumerate(transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: transform_data[i][j] = raw_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_instance), transform_data) self.assertListEqual(list(np.around(mean, 6)), list(np.around(scaler.mean_, 6))) self.assertListEqual(list(np.around(std, 6)), list(np.around(scaler.scale_, 6))) self.assertEqual(column_max_value, [1, 2, 2, 10, 2, 10]) self.assertEqual(column_min_value, [0, 1, 1, 2, 2, -100]) raw_data_transform = standard_scaler.transform(self.table_instance) self.assertListEqual( self.get_table_instance_feature(fit_instance), self.get_table_instance_feature(raw_data_transform))
def test_fit1(self): scale_param = self.get_scale_param() standard_scaler = StandardScale(scale_param) fit_instance = standard_scaler.fit(self.table_instance) mean = standard_scaler.mean std = standard_scaler.std scaler = SSL() scaler.fit(self.test_data) self.assertListEqual( self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 4).tolist()) self.assertListEqual(list(np.around(mean, 4)), list(np.around(scaler.mean_, 4))) self.assertListEqual(list(np.around(std, 4)), list(np.around(scaler.scale_, 4)))
def test_fit6(self): standard_scaler = StandardScaler(area='col', with_mean=True, with_std=True) fit_instance, scale_conf = standard_scaler.fit(self.table_instance) mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[ 2] scaler = SSL() scaler.fit(self.test_data) self.assertListEqual( self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 4).tolist()) self.assertListEqual(list(np.around(mean, 4)), list(np.around(scaler.mean_, 4))) self.assertListEqual(list(np.around(std, 4)), list(np.around(scaler.scale_, 4)))
def test_fit6(self): scale_param = self.get_scale_param() scale_param.scale_column_idx = [] scale_param.area = "col" standard_scaler = StandardScale(scale_param) fit_instance = standard_scaler.fit(self.table_instance) mean = standard_scaler.mean std = standard_scaler.std scaler = SSL() scaler.fit(self.test_data) self.assertListEqual(self.get_table_instance_feature(fit_instance), np.around(self.test_data, 4).tolist()) self.assertListEqual(list(np.around(mean, 4)), list(np.around(scaler.mean_, 4))) self.assertListEqual(list(np.around(std, 4)), list(np.around(scaler.scale_, 4)))