def test_fit1(self): scale_param = self.get_scale_param() scale_param.scale_column_idx = [] scale_param.feat_upper = 2 scale_param.feat_lower = 1 scale_obj = MinMaxScale(scale_param) fit_instance = scale_obj.fit(self.table_instance) column_min_value = scale_obj.column_min_value column_max_value = scale_obj.column_max_value for i, line in enumerate(self.test_data): for j, value in enumerate(line): if value > 2: self.test_data[i][j] = 2 elif value < 1: self.test_data[i][j] = 1 scaler = MMS() scaler.fit(self.test_data) self.assertListEqual(self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 6).tolist()) data_min = list(scaler.data_min_) data_max = list(scaler.data_max_) self.assertListEqual(column_min_value, data_min) self.assertListEqual(column_max_value, data_max) transform_data = scale_obj.transform(self.table_instance) self.assertListEqual(self.get_table_instance_feature(fit_instance), self.get_table_instance_feature(transform_data))
def test_fit5(self): scale_column_idx = [1, 2, 4] scale_names = ['fid1', 'fid2', 'fid1000'] scale_param = self.get_scale_param() scale_param.mode = "cap" # scale_param.area = "col" scale_param.feat_upper = 0.8 scale_param.feat_lower = 0.2 scale_param.scale_names = scale_names scale_param.scale_col_indexes = [2, 4] scale_obj = MinMaxScale(scale_param) fit_instance = scale_obj.fit(self.table_instance) column_min_value = scale_obj.column_min_value column_max_value = scale_obj.column_max_value raw_data = copy.deepcopy(self.test_data) gt_cap_lower_list = [0, 2, 2, 2, 3, 1] gt_cap_upper_list = [1, 8, 8, 8, 7, 8] for i, line in enumerate(self.test_data): for j, value in enumerate(line): if value > gt_cap_upper_list[j]: self.test_data[i][j] = gt_cap_upper_list[j] elif value < gt_cap_lower_list[j]: self.test_data[i][j] = gt_cap_lower_list[j] scaler = MMS() scaler.fit(self.test_data) sklearn_transform_data = np.around(scaler.transform(self.test_data), 6).tolist() for i, line in enumerate(sklearn_transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: sklearn_transform_data[i][j] = raw_data[i][j] fit_data = np.round(self.get_table_instance_feature(fit_instance), 6).tolist() self.assertListEqual(fit_data, sklearn_transform_data) for i, line in enumerate(sklearn_transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: sklearn_transform_data[i][j] = raw_data[i][j] data_min = list(scaler.data_min_) data_max = list(scaler.data_max_) self.assertListEqual(column_min_value, data_min) self.assertListEqual(column_max_value, data_max) transform_data = scale_obj.transform(self.table_instance) self.assertListEqual(self.get_table_instance_feature(fit_instance), self.get_table_instance_feature(transform_data))
def test_fit4(self): scale_column_idx = [1, 2, 4] scale_param = self.get_scale_param() # scale_param.area = "col" scale_param.feat_upper = 2 scale_param.feat_lower = 1 scale_param.scale_col_indexes = scale_column_idx scale_obj = MinMaxScale(scale_param) fit_instance = scale_obj.fit(self.table_instance) column_min_value = scale_obj.column_min_value column_max_value = scale_obj.column_max_value raw_data = copy.deepcopy(self.test_data) for i, line in enumerate(self.test_data): for j, value in enumerate(line): if j in scale_column_idx: if value > 2: self.test_data[i][j] = 2 elif value < 1: self.test_data[i][j] = 1 scaler = MMS() scaler.fit(self.test_data) sklearn_transform_data = np.around(scaler.transform(self.test_data), 6).tolist() for i, line in enumerate(sklearn_transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: sklearn_transform_data[i][j] = raw_data[i][j] self.assertListEqual(self.get_table_instance_feature(fit_instance), sklearn_transform_data) for i, line in enumerate(sklearn_transform_data): for j, cols in enumerate(line): if j not in scale_column_idx: sklearn_transform_data[i][j] = raw_data[i][j] data_min = list(scaler.data_min_) data_max = list(scaler.data_max_) self.assertListEqual(column_min_value, data_min) self.assertListEqual(column_max_value, data_max) transform_data = scale_obj.transform(self.table_instance) self.assertListEqual(self.get_table_instance_feature(fit_instance), self.get_table_instance_feature(transform_data))
def test_fit_instance_default(self): scale_param = self.get_scale_param() scale_param.scale_col_indexes = -1 scale_obj = MinMaxScale(scale_param) fit_instance = scale_obj.fit(self.table_instance) column_min_value = scale_obj.column_min_value column_max_value = scale_obj.column_max_value scaler = MMS() scaler.fit(self.test_data) self.assertListEqual(self.get_table_instance_feature(fit_instance), np.around(scaler.transform(self.test_data), 6).tolist()) data_min = list(scaler.data_min_) data_max = list(scaler.data_max_) self.assertListEqual(column_min_value, data_min) self.assertListEqual(column_max_value, data_max) transform_data = scale_obj.transform(self.table_instance) self.assertListEqual(self.get_table_instance_feature(fit_instance), self.get_table_instance_feature(transform_data))