Example #1
0
 def test_transform1(self):
     standard_scaler = StandardScaler(with_mean=True, with_std=True)
     fit_instance, mean, std = standard_scaler.fit(self.table_instance)
     transform_data = standard_scaler.transform(self.table_instance, mean,
                                                std)
     self.assertListEqual(self.get_table_instance_feature(transform_data),
                          self.get_table_instance_feature(fit_instance))
    def test_cols_select_fit_and_transform_repeat(self):
        scale_column_idx = [1, 1, 2, 2, 4, 5, 5]
        standard_scaler = StandardScaler(area='col',
                                         scale_column_idx=scale_column_idx,
                                         with_mean=True,
                                         with_std=True)
        fit_data, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]
        scaler = SSL(with_mean=True, with_std=True)
        scaler.fit(self.test_data)
        transform_data = np.around(scaler.transform(self.test_data),
                                   4).tolist()

        for i, line in enumerate(transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    transform_data[i][j] = self.test_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             transform_data)

        std_scale_transform_data = standard_scaler.transform(
            self.table_instance, mean, std, scale_column_idx)
        self.assertListEqual(
            self.get_table_instance_feature(std_scale_transform_data),
            transform_data)
Example #3
0
    def transform(self, data, fit_config):
        """
        Transform input data using scale with fit results
        Parameters
        ----------
        data: data_instance, input data
        fit_config: list, the fit results information of scale

        Returns
        ----------
        transform_data:data_instance, data after transform
        """
        LOGGER.info("Start scale data transform ...")
        self.header = data.schema.get('header')
        if len(fit_config) == 0:
            LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return")

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config)
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1],
                                             scale_column_idx=fit_config[2])
        else:
            LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End transform data ...")

        return data
    def test_transform4(self):
        standard_scaler = StandardScaler(with_mean=False, with_std=False)
        fit_instance, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]
        transform_data = standard_scaler.transform(self.table_instance, mean,
                                                   std, scale_column_idx)

        self.assertListEqual(self.get_table_instance_feature(transform_data),
                             self.get_table_instance_feature(fit_instance))
Example #5
0
    def transform(self, data, fit_config):
        LOGGER.info("Start scale data transform ...")

        if len(fit_config) == 0:
            LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return")

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config[0])
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1])
        else:
            LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method))

        LOGGER.info("End transform data ...")

        return data