Example #1
0
    def one_hot_encoder_fit_transform(self, data_instance):
        if data_instance is None:
            return data_instance

        if self.workflow_param.need_one_hot:
            LOGGER.info("Start one-hot encode")
            one_hot_param = param_generator.OneHotEncoderParam()
            one_hot_param = self._load_param(one_hot_param)
            param_checker.OneHotEncoderParamChecker.check_param(one_hot_param)

            one_hot_encoder = OneHotEncoder(one_hot_param)

            data_instance = one_hot_encoder.fit_transform(data_instance)
            save_result = one_hot_encoder.save_model(
                self.workflow_param.model_table,
                self.workflow_param.model_namespace)
            # Save model result in pipeline
            for meta_buffer_type, param_buffer_type in save_result:
                self.pipeline.node_meta.append(meta_buffer_type)
                self.pipeline.node_param.append(param_buffer_type)

            LOGGER.info("Finish one-hot encode")
            return data_instance
        else:
            LOGGER.info("No need to do one-hot encode")
            return data_instance
Example #2
0
    def test_instance(self):
        one_hot_encoder = OneHotEncoder()
        one_hot_encoder.cols = self.cols
        one_hot_encoder.cols_index = self.cols

        result = one_hot_encoder.fit(self.table)
        local_result = result.collect()
        for k, v in local_result:
            new_features = v.features
            self.assertTrue(len(new_features) == self.feature_num * 3)
Example #3
0
    def test_instance(self):
        param = OneHotEncoderParam(cols=self.cols)
        one_hot_encoder = OneHotEncoder(param=param)

        one_hot_encoder.fit(self.table)
        local_data = self.table.collect()
        print("original data:")
        for k, v in local_data:
            print(k, v.features)
        new_data = one_hot_encoder.transform(data_instances=self.table)
        local_data = new_data.collect()
        print("One-hot encoded data:")
        for k, v in local_data:
            print(k, v.features)
Example #4
0
    def one_hot_encoder_transform(self, data_instance):
        if data_instance is None:
            return data_instance

        if self.workflow_param.need_one_hot:
            LOGGER.info("Start one-hot encode")
            one_hot_param = param_generator.OneHotEncoderParam()
            one_hot_param = ParamExtract.parse_param_from_config(one_hot_param, self.config_path)
            param_checker.OneHotEncoderParamChecker.check_param(one_hot_param)

            one_hot_encoder = OneHotEncoder(one_hot_param)
            one_hot_encoder.load_model(self.workflow_param.model_table, self.workflow_param.model_namespace)

            data_instance = one_hot_encoder.transform(data_instance)

            LOGGER.info("Finish one-hot encode")
            return data_instance
        else:
            LOGGER.info("No need to do one-hot encode")
            return data_instance