Ejemplo n.º 1
0
    def replace_outlier_value(self, input_data_features, mode="fit"):
        if self.outlier_replace:
            from federatedml.feature.imputer import Imputer
            imputer_processor = Imputer(self.outlier_impute)
            if mode == "fit":
                input_data_features, self.outlier_replace_value = \
                    imputer_processor.fit(input_data_features,
                                          replace_method=self.outlier_replace_method,
                                          replace_value=self.outlier_replace_value)

                if self.outlier_impute is None:
                    self.outlier_impute = imputer_processor.get_missing_value_list(
                    )
            else:
                input_data_features = imputer_processor.transform(
                    input_data_features,
                    transform_value=self.outlier_replace_value)

            self.outlier_replace_rate = imputer_processor.get_impute_rate(mode)
            # callback("outlier_value_ratio",
            #         outlier_replace_rate,
            #         self.tracker)

            # callback("outlier_value_list",
            #          self.outlier_impute,
            #          self.tracker)

        return input_data_features
Ejemplo n.º 2
0
 def test_transform_float(self):
     imputer_value = ['', 'none', 'na', 'null', "10000", "-10000"]
     imputer = Imputer(missing_value_list=imputer_value)
     cols_transform_value_ground_true = [0.963102, 1.467675, 0.829202, 0.772457, 1.000835, 0.962702, 1.077099,
                                         1.053586, 2.996525, 0.961696]
     process_data = imputer.transform(self.table_instance, cols_transform_value_ground_true, output_format="float")
     test_data_fit = self.fit_test_data_float(self.test_data, cols_transform_value_ground_true, imputer_value)
     self.assertListEqual(self.table_to_list(process_data), test_data_fit)
Ejemplo n.º 3
0
    def transform(self, data):
        LOGGER.info(f"Enter Feature Imputation transform")
        imputer_processor = Imputer(self.missing_impute)
        imputed_data = imputer_processor.transform(
            data, transform_value=self.default_value, skip_cols=self.skip_cols)
        if self.missing_impute is None:
            self.missing_impute = imputer_processor.get_missing_value_list()

        self.missing_impute_rate = imputer_processor.get_impute_rate(
            "transform")
        return imputed_data
Ejemplo n.º 4
0
    def test_get_impute_rate(self):
        imputer_value = ['', 'none', 'na', 'null', "10000", "-10000"]
        imputer = Imputer(missing_value_list=imputer_value)
        _, _ = imputer.fit(self.table_instance, "median", output_format='str')
        cols_impute_rate_ground_true = [0, 0.3, 0.1, 0.1, 0.1, 0.1, 0, 0.1, 0, 0]
        cols_fit_impute_rate = imputer.get_impute_rate(mode="fit")
        self.assertListEqual(cols_fit_impute_rate, cols_impute_rate_ground_true)

        cols_transform_value_ground_true = [-0.606584, -0.193332, -0.620475, -0.591332, -0.327392, -0.519504, -0.610669,
                                            -0.768581, -0.28757, -0.247477]
        _ = imputer.transform(self.table_instance, cols_transform_value_ground_true)
        cols_transform_impute_rate = imputer.get_impute_rate(mode="fit")
        self.assertListEqual(cols_transform_impute_rate, cols_impute_rate_ground_true)
Ejemplo n.º 5
0
    def replace_outlier_value(self, input_data_features, mode="fit"):
        if self.outlier_replace:
            from federatedml.feature.imputer import Imputer
            imputer_processor = Imputer(self.outlier_impute)
            if mode == "fit":
                input_data_features, self.outlier_replace_value = \
                    imputer_processor.fit(input_data_features,
                                          replace_method=self.outlier_replace_method,
                                          replace_value=self.outlier_replace_value)

                if self.outlier_impute is None:
                    self.outlier_impute = imputer_processor.get_imputer_value_list()
            else:
                input_data_features = imputer_processor.transform(input_data_features,
                                                                  replace_method=self.outlier_replace_method,
                                                                  transform_value=self.outlier_replace_value)

        return input_data_features
Ejemplo n.º 6
0
    def fill_missing_value(self, input_data_features, mode="fit"):
        if self.missing_fill:
            from federatedml.feature.imputer import Imputer
            imputer_processor = Imputer(self.missing_impute)
            if mode == "fit":
                input_data_features, self.default_value = imputer_processor.fit(input_data_features,
                                                                                replace_method=self.missing_fill_method,
                                                                                replace_value=self.default_value)
                if self.missing_impute is None:
                    self.missing_impute = imputer_processor.get_imputer_value_list()
            else:
                input_data_features = imputer_processor.transform(input_data_features,
                                                                  replace_method=self.missing_fill_method,
                                                                  transform_value=self.default_value)

            if self.missing_impute is None:
                self.missing_impute = imputer_processor.get_imputer_value_list()

        return input_data_features
Ejemplo n.º 7
0
    def fill_missing_value(self, input_data, tags_dict, mode="fit"):
        str_trans_method = functools.partial(
            self.change_tag_to_str,
            tags_dict=tags_dict,
            delimitor=self.delimitor,
            with_label=self.with_label,
            tag_value_delimitor=self.tag_value_delimitor)

        input_data = input_data.mapValues(str_trans_method)
        schema = make_schema(self.header, self.sid_name, self.label_name)
        set_schema(input_data, schema)

        from federatedml.feature.imputer import Imputer
        imputer_processor = Imputer()
        if mode == "fit":
            data, self.default_value = imputer_processor.fit(
                input_data,
                replace_method=self.missing_fill_method,
                replace_value=self.default_value)
            LOGGER.debug("self.default_value is {}".format(self.default_value))
        else:
            data = imputer_processor.transform(
                input_data, transform_value=self.default_value)
        if self.missing_impute is None:
            self.missing_impute = imputer_processor.get_missing_value_list()

        LOGGER.debug("self.missing_impute is {}".format(self.missing_impute))

        self.missing_impute_rate = imputer_processor.get_impute_rate(mode)

        str_trans_tag_method = functools.partial(
            self.change_str_to_tag,
            tags_dict=tags_dict,
            delimitor=self.delimitor,
            tag_value_delimitor=self.tag_value_delimitor)

        data = data.mapValues(str_trans_tag_method)

        return data