def one_hot_encoder_fit_transform(self, data_instance): if data_instance is None: return data_instance if self.workflow_param.need_one_hot: LOGGER.info("Start one-hot encode") one_hot_param = param_generator.OneHotEncoderParam() one_hot_param = self._load_param(one_hot_param) param_checker.OneHotEncoderParamChecker.check_param(one_hot_param) one_hot_encoder = OneHotEncoder(one_hot_param) data_instance = one_hot_encoder.fit_transform(data_instance) save_result = one_hot_encoder.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) LOGGER.info("Finish one-hot encode") return data_instance else: LOGGER.info("No need to do one-hot encode") return data_instance
def test_instance(self): one_hot_encoder = OneHotEncoder() one_hot_encoder.cols = self.cols one_hot_encoder.cols_index = self.cols result = one_hot_encoder.fit(self.table) local_result = result.collect() for k, v in local_result: new_features = v.features self.assertTrue(len(new_features) == self.feature_num * 3)
def test_instance(self): param = OneHotEncoderParam(cols=self.cols) one_hot_encoder = OneHotEncoder(param=param) one_hot_encoder.fit(self.table) local_data = self.table.collect() print("original data:") for k, v in local_data: print(k, v.features) new_data = one_hot_encoder.transform(data_instances=self.table) local_data = new_data.collect() print("One-hot encoded data:") for k, v in local_data: print(k, v.features)
def one_hot_encoder_transform(self, data_instance): if data_instance is None: return data_instance if self.workflow_param.need_one_hot: LOGGER.info("Start one-hot encode") one_hot_param = param_generator.OneHotEncoderParam() one_hot_param = ParamExtract.parse_param_from_config(one_hot_param, self.config_path) param_checker.OneHotEncoderParamChecker.check_param(one_hot_param) one_hot_encoder = OneHotEncoder(one_hot_param) one_hot_encoder.load_model(self.workflow_param.model_table, self.workflow_param.model_namespace) data_instance = one_hot_encoder.transform(data_instance) LOGGER.info("Finish one-hot encode") return data_instance else: LOGGER.info("No need to do one-hot encode") return data_instance