Esempio n. 1
0
    def test_fit1(self):
        scale_param = self.get_scale_param()
        scale_param.scale_column_idx = []
        scale_param.feat_upper = 2
        scale_param.feat_lower = 1

        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if value > 2:
                    self.test_data[i][j] = 2
                elif value < 1:
                    self.test_data[i][j] = 1

        scaler = MMS()
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             np.around(scaler.transform(self.test_data), 6).tolist())

        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
Esempio n. 2
0
    def test_fit5(self):
        scale_column_idx = [1, 2, 4]
        scale_names = ['fid1', 'fid2', 'fid1000']
        scale_param = self.get_scale_param()
        scale_param.mode = "cap"
        # scale_param.area = "col"
        scale_param.feat_upper = 0.8
        scale_param.feat_lower = 0.2
        scale_param.scale_names = scale_names
        scale_param.scale_col_indexes = [2, 4]

        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        raw_data = copy.deepcopy(self.test_data)
        gt_cap_lower_list = [0, 2, 2, 2, 3, 1]
        gt_cap_upper_list = [1, 8, 8, 8, 7, 8]

        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if value > gt_cap_upper_list[j]:
                    self.test_data[i][j] = gt_cap_upper_list[j]
                elif value < gt_cap_lower_list[j]:
                    self.test_data[i][j] = gt_cap_lower_list[j]

        scaler = MMS()
        scaler.fit(self.test_data)
        sklearn_transform_data = np.around(scaler.transform(self.test_data),
                                           6).tolist()
        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        fit_data = np.round(self.get_table_instance_feature(fit_instance),
                            6).tolist()
        self.assertListEqual(fit_data, sklearn_transform_data)

        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
Esempio n. 3
0
    def fit(self, data):
        """
        Apply scale for input data
        Parameters
        ----------
        data: data_instance, input data

        Returns
        ----------
        data:data_instance, data after scale
        scale_value_results: list, the fit results information of scale
        """
        LOGGER.info("Start scale data fit ...")

        if self.model_param.method == consts.MINMAXSCALE:
            self.scale_obj = MinMaxScale(self.model_param)
        elif self.model_param.method == consts.STANDARDSCALE:
            self.scale_obj = StandardScale(self.model_param)
        else:
            LOGGER.warning("Scale method is {}, do nothing and return!".format(self.model_param.method))

        if self.scale_obj:
            fit_data = self.scale_obj.fit(data)
            fit_data.schema = data.schema

            self.callback_meta(metric_name="scale", metric_namespace="train",
                               metric_meta=MetricMeta(name="scale", metric_type="SCALE", extra_metas={"method":self.model_param.method}))
        else:
            fit_data = data

        LOGGER.info("End fit data ...")
        return fit_data
Esempio n. 4
0
    def test_fit4(self):
        scale_column_idx = [1, 2, 4]
        scale_param = self.get_scale_param()
        # scale_param.area = "col"
        scale_param.feat_upper = 2
        scale_param.feat_lower = 1
        scale_param.scale_col_indexes = scale_column_idx

        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        raw_data = copy.deepcopy(self.test_data)
        for i, line in enumerate(self.test_data):
            for j, value in enumerate(line):
                if j in scale_column_idx:
                    if value > 2:
                        self.test_data[i][j] = 2
                    elif value < 1:
                        self.test_data[i][j] = 1

        scaler = MMS()
        scaler.fit(self.test_data)
        sklearn_transform_data = np.around(scaler.transform(self.test_data),
                                           6).tolist()
        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             sklearn_transform_data)

        for i, line in enumerate(sklearn_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    sklearn_transform_data[i][j] = raw_data[i][j]

        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
Esempio n. 5
0
File: scale.py Progetto: zpskt/FATE
    def export_model(self):
        if not self.scale_obj:
            if self.model_param.method == consts.MINMAXSCALE:
                self.scale_obj = MinMaxScale(self.model_param)
            else:
                self.scale_obj = StandardScale(self.model_param)

        return self.scale_obj.export_model(self.need_run)
Esempio n. 6
0
    def test_fit_instance_default(self):
        scale_param = self.get_scale_param()
        scale_param.scale_col_indexes = -1
        scale_obj = MinMaxScale(scale_param)
        fit_instance = scale_obj.fit(self.table_instance)
        column_min_value = scale_obj.column_min_value
        column_max_value = scale_obj.column_max_value

        scaler = MMS()
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             np.around(scaler.transform(self.test_data), 6).tolist())
        data_min = list(scaler.data_min_)
        data_max = list(scaler.data_max_)
        self.assertListEqual(column_min_value, data_min)
        self.assertListEqual(column_max_value, data_max)

        transform_data = scale_obj.transform(self.table_instance)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             self.get_table_instance_feature(transform_data))
Esempio n. 7
0
File: scale.py Progetto: zpskt/FATE
    def transform(self, data, fit_config=None):
        """
        Transform input data using scale with fit results
        Parameters
        ----------
        data: data_instance, input data
        fit_config: list, the fit results information of scale

        Returns
        ----------
        transform_data:data_instance, data after transform
        """
        LOGGER.info("Start scale data transform ...")

        if self.model_param.method == consts.MINMAXSCALE:
            self.scale_obj = MinMaxScale(self.model_param)
        elif self.model_param.method == consts.STANDARDSCALE:
            self.scale_obj = StandardScale(self.model_param)
            self.scale_obj.set_param(self.mean, self.std)
        else:
            LOGGER.info(
                "DataTransform method is {}, do nothing and return!".format(
                    self.model_param.method))

        if self.scale_obj:
            self.scale_obj.header = self.header
            self.scale_obj.scale_column_idx = self.scale_column_idx
            self.scale_obj.set_column_range(self.column_max_value,
                                            self.column_min_value)
            transform_data = self.scale_obj.transform(data)
            transform_data.schema = data.schema

            self.callback_meta(
                metric_name="scale",
                metric_namespace="train",
                metric_meta=MetricMeta(
                    name="scale",
                    metric_type="SCALE",
                    extra_metas={"method": self.model_param.method}))

        else:
            transform_data = data

        LOGGER.info("End transform data.")

        return transform_data