Ejemplo n.º 1
0
    def test_cols_select_fit_and_transform(self):
        scale_param = self.get_scale_param()
        scale_param.scale_column_idx = [1, 2, 4]
        standard_scaler = StandardScale(scale_param)
        fit_data = standard_scaler.fit(self.table_instance)

        scale_column_idx = standard_scaler.scale_column_idx

        scaler = SSL(with_mean=True, with_std=True)
        scaler.fit(self.test_data)
        transform_data = np.around(scaler.transform(self.test_data),
                                   4).tolist()

        for i, line in enumerate(transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    transform_data[i][j] = self.test_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             transform_data)

        std_scale_transform_data = standard_scaler.transform(
            self.table_instance)
        self.assertListEqual(
            self.get_table_instance_feature(std_scale_transform_data),
            transform_data)
Ejemplo n.º 2
0
    def test_cols_select_fit_and_transform_repeat(self):
        scale_column_idx = [1, 1, 2, 2, 4, 5, 5]
        standard_scaler = StandardScaler(area='col',
                                         scale_column_idx=scale_column_idx,
                                         with_mean=True,
                                         with_std=True)
        fit_data, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]
        scaler = SSL(with_mean=True, with_std=True)
        scaler.fit(self.test_data)
        transform_data = np.around(scaler.transform(self.test_data),
                                   4).tolist()

        for i, line in enumerate(transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    transform_data[i][j] = self.test_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             transform_data)

        std_scale_transform_data = standard_scaler.transform(
            self.table_instance, mean, std, scale_column_idx)
        self.assertListEqual(
            self.get_table_instance_feature(std_scale_transform_data),
            transform_data)
Ejemplo n.º 3
0
    def test_fit10(self):
        scale_column_idx = [1, 2, 4]

        scale_param = self.get_scale_param()
        scale_param.scale_column_idx = []
        scale_param.feat_upper = 0.8
        scale_param.feat_lower = 0.2
        scale_param.with_mean = True
        scale_param.with_std = True
        scale_param.mode = "cap"
        scale_param.scale_column_idx = scale_column_idx
        scale_param.area = "col"

        standard_scaler = StandardScale(scale_param)
        fit_instance = standard_scaler.fit(self.table_instance)
        mean = standard_scaler.mean
        std = standard_scaler.std
        column_max_value = standard_scaler.column_max_value
        column_min_value = standard_scaler.column_min_value

        gt_cap_lower_list = [0, 2, 2, 2, 3, 1]
        gt_cap_upper_list = [1, 8, 8, 8, 7, 8]
        raw_data = copy.deepcopy(self.test_data)
        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if j in scale_column_idx:
                    if value > gt_cap_upper_list[j]:
                        self.test_data[i][j] = gt_cap_upper_list[j]
                    elif value < gt_cap_lower_list[j]:
                        self.test_data[i][j] = gt_cap_lower_list[j]

        scaler = SSL(with_mean=True, with_std=True)
        scaler.fit(self.test_data)
        transform_data = np.around(scaler.transform(self.test_data),
                                   4).tolist()

        for i, line in enumerate(transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    transform_data[i][j] = raw_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             transform_data)
        self.assertEqual(column_max_value, gt_cap_upper_list)
        self.assertEqual(column_min_value, gt_cap_lower_list)

        self.assertListEqual(list(np.around(mean, 6)),
                             list(np.around(scaler.mean_, 6)))
        self.assertListEqual(list(np.around(std, 6)),
                             list(np.around(scaler.scale_, 6)))

        raw_data_transform = standard_scaler.transform(self.table_instance)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            self.get_table_instance_feature(raw_data_transform))
Ejemplo n.º 4
0
    def test_fit4(self):
        standard_scaler = StandardScaler(with_mean=False, with_std=False)
        fit_instance, mean, std = standard_scaler.fit(self.table_instance)

        scaler = SSL(with_mean=False, with_std=False)
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertEqual(mean, [0 for _ in range(len(self.test_data[0]))])
        self.assertEqual(std, [1 for _ in range(len(self.test_data[0]))])
Ejemplo n.º 5
0
    def test_fit3(self):
        standard_scaler = StandardScaler(with_mean=True, with_std=False)
        fit_instance, mean, std = standard_scaler.fit(self.table_instance)

        scaler = SSL(with_std=False)
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)),
                             list(np.around(scaler.mean_, 4)))
        self.assertListEqual(list(np.around(std, 4)), [1 for _ in std])
Ejemplo n.º 6
0
    def test_fit2(self):
        standard_scaler = StandardScaler(with_mean=False, with_std=True)
        fit_instance, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std = scale_conf[0], scale_conf[1]

        scaler = SSL(with_mean=False)
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)), [0 for _ in mean])
        self.assertListEqual(list(np.around(std, 4)),
                             list(np.around(scaler.scale_, 4)))
Ejemplo n.º 7
0
    def test_fit9(self):
        scale_column_idx = [1, 2, 4]

        scale_param = self.get_scale_param()
        scale_param.scale_column_idx = []
        scale_param.feat_upper = [2, 2, 2, 2, 2, 2]
        scale_param.feat_lower = [1, 1, 1, 1, 1, 1]
        scale_param.with_mean = True
        scale_param.with_std = True
        scale_param.scale_column_idx = scale_column_idx
        scale_param.area = "col"

        standard_scaler = StandardScale(scale_param)
        fit_instance = standard_scaler.fit(self.table_instance)
        mean = standard_scaler.mean
        std = standard_scaler.std
        column_max_value = standard_scaler.column_max_value
        column_min_value = standard_scaler.column_min_value

        raw_data = copy.deepcopy(self.test_data)
        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if j in scale_column_idx:
                    if value > 2:
                        self.test_data[i][j] = 2
                    elif value < 1:
                        self.test_data[i][j] = 1

        scaler = SSL(with_mean=True, with_std=True)
        scaler.fit(self.test_data)
        transform_data = np.around(scaler.transform(self.test_data),
                                   4).tolist()

        for i, line in enumerate(transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    transform_data[i][j] = raw_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             transform_data)
        self.assertListEqual(list(np.around(mean, 6)),
                             list(np.around(scaler.mean_, 6)))
        self.assertListEqual(list(np.around(std, 6)),
                             list(np.around(scaler.scale_, 6)))
        self.assertEqual(column_max_value, [1, 2, 2, 10, 2, 10])
        self.assertEqual(column_min_value, [0, 1, 1, 2, 2, -100])

        raw_data_transform = standard_scaler.transform(self.table_instance)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            self.get_table_instance_feature(raw_data_transform))
Ejemplo n.º 8
0
    def test_fit1(self):
        scale_param = self.get_scale_param()
        standard_scaler = StandardScale(scale_param)
        fit_instance = standard_scaler.fit(self.table_instance)
        mean = standard_scaler.mean
        std = standard_scaler.std

        scaler = SSL()
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)),
                             list(np.around(scaler.mean_, 4)))
        self.assertListEqual(list(np.around(std, 4)),
                             list(np.around(scaler.scale_, 4)))
Ejemplo n.º 9
0
    def test_fit6(self):
        standard_scaler = StandardScaler(area='col',
                                         with_mean=True,
                                         with_std=True)
        fit_instance, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]

        scaler = SSL()
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)),
                             list(np.around(scaler.mean_, 4)))
        self.assertListEqual(list(np.around(std, 4)),
                             list(np.around(scaler.scale_, 4)))
Ejemplo n.º 10
0
    def test_fit6(self):
        scale_param = self.get_scale_param()
        scale_param.scale_column_idx = []
        scale_param.area = "col"

        standard_scaler = StandardScale(scale_param)
        fit_instance = standard_scaler.fit(self.table_instance)
        mean = standard_scaler.mean
        std = standard_scaler.std

        scaler = SSL()
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             np.around(self.test_data, 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)),
                             list(np.around(scaler.mean_, 4)))
        self.assertListEqual(list(np.around(std, 4)),
                             list(np.around(scaler.scale_, 4)))