예제 #1
0
    def test_fit1(self):
        scale_param = self.get_scale_param()
        scale_param.scale_column_idx = []
        scale_param.feat_upper = 2
        scale_param.feat_lower = 1

        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if value > 2:
                    self.test_data[i][j] = 2
                elif value < 1:
                    self.test_data[i][j] = 1

        scaler = MMS()
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             np.around(scaler.transform(self.test_data), 6).tolist())

        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
예제 #2
0
    def test_fit5(self):
        scale_column_idx = [1, 2, 4]
        scale_names = ['fid1', 'fid2', 'fid1000']
        scale_param = self.get_scale_param()
        scale_param.mode = "cap"
        # scale_param.area = "col"
        scale_param.feat_upper = 0.8
        scale_param.feat_lower = 0.2
        scale_param.scale_names = scale_names
        scale_param.scale_col_indexes = [2, 4]

        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        raw_data = copy.deepcopy(self.test_data)
        gt_cap_lower_list = [0, 2, 2, 2, 3, 1]
        gt_cap_upper_list = [1, 8, 8, 8, 7, 8]

        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if value > gt_cap_upper_list[j]:
                    self.test_data[i][j] = gt_cap_upper_list[j]
                elif value < gt_cap_lower_list[j]:
                    self.test_data[i][j] = gt_cap_lower_list[j]

        scaler = MMS()
        scaler.fit(self.test_data)
        sklearn_transform_data = np.around(scaler.transform(self.test_data),
                                           6).tolist()
        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        fit_data = np.round(self.get_table_instance_feature(fit_instance),
                            6).tolist()
        self.assertListEqual(fit_data, sklearn_transform_data)

        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
예제 #3
0
    def test_fit4(self):
        scale_column_idx = [1, 2, 4]
        scale_param = self.get_scale_param()
        # scale_param.area = "col"
        scale_param.feat_upper = 2
        scale_param.feat_lower = 1
        scale_param.scale_col_indexes = scale_column_idx

        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        raw_data = copy.deepcopy(self.test_data)
        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if j in scale_column_idx:
                    if value > 2:
                        self.test_data[i][j] = 2
                    elif value < 1:
                        self.test_data[i][j] = 1

        scaler = MMS()
        scaler.fit(self.test_data)
        sklearn_transform_data = np.around(scaler.transform(self.test_data),
                                           6).tolist()
        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             sklearn_transform_data)

        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
예제 #4
0
    def test_fit_instance_default(self):
        scale_param = self.get_scale_param()
        scale_param.scale_col_indexes = -1
        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        scaler = MMS()
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             np.around(scaler.transform(self.test_data), 6).tolist())
        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))