def feature_selection_fit(self, data_instance, flow_id='sample_flowid', without_transform=False): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) filter_methods = feature_select_param.filter_method previous_model = {} if 'iv_value_thres' in filter_methods or 'iv_percentile' in filter_methods: binning_model = { 'name': self.workflow_param.model_table, 'namespace': self.workflow_param.model_namespace } previous_model['binning_model'] = binning_model feature_selector.init_previous_model(**previous_model) if without_transform: data_instance = feature_selector.fit(data_instance) else: data_instance = feature_selector.fit_transform(data_instance) save_result = feature_selector.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance
def _initialize_model(self, runtime_conf_path): feature_param = FeatureSelectionParam() self.feature_param = ParamExtract.parse_param_from_config( feature_param, runtime_conf_path) FeatureSelectionParamChecker.check_param(self.feature_param) self.model = HeteroFeatureSelectionHost(self.feature_param) LOGGER.debug("Guest model started")
def feature_selection_fit(self, data_instance, flow_id='sample_flowid'): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) local_only = feature_select_param.local_only # Decide whether do fit_local or fit if local_only: data_instance = feature_selector.fit_local_transform( data_instance) save_result = feature_selector.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) else: data_instance = feature_selector.fit_transform(data_instance) save_result = feature_selector.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance
def feature_selection_transform(self, data_instance, flow_id='sample_flowid'): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection transform") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) feature_selector.load_model(self.workflow_param.model_table, self.workflow_param.model_namespace) LOGGER.debug( "Role: {}, in transform feature selector left_cols: {}".format( self.role, feature_selector.left_cols)) data_instance = feature_selector.transform(data_instance) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance
class HeteroFeatureSelectHostWorkflow(WorkFlow): def _initialize(self, config_path): self._initialize_role_and_mode() self._initialize_model(config_path) self._initialize_workflow_param(config_path) def _initialize_role_and_mode(self): self.role = consts.HOST self.mode = consts.HETERO def _initialize_intersect(self, config): pass def _initialize_model(self, runtime_conf_path): feature_param = FeatureSelectionParam() self.feature_param = ParamExtract.parse_param_from_config( feature_param, runtime_conf_path) FeatureSelectionParamChecker.check_param(self.feature_param) self.model = HeteroFeatureSelectionHost(self.feature_param) LOGGER.debug("Guest model started") @status_tracer_decorator.status_trace def run(self): self._init_argument() if self.workflow_param.method == "feature_select": if self.feature_param.method == 'fit': train_data_instance = self.gen_data_instance( self.workflow_param.train_input_table, self.workflow_param.train_input_namespace) if self.feature_param.local_only: self.model.fit_local(train_data_instance) else: self.model.fit(train_data_instance) self.model.save_model(self.workflow_param.model_table, self.workflow_param.model_namespace) elif self.feature_param.method == 'fit_transform': train_data_instance = self.gen_data_instance( self.workflow_param.train_input_table, self.workflow_param.train_input_namespace) if self.feature_param.local_only: result_table = self.model.fit_local_transform( train_data_instance) else: result_table = self.model.fit_transform( train_data_instance) self.model.save_model(self.workflow_param.model_table, self.workflow_param.model_namespace) self.save_predict_result(result_table) LOGGER.info("Predict result saved, table: {}," " namespace: {}".format( self.workflow_param.predict_output_table, self.workflow_param.predict_output_namespace)) elif self.feature_param.method == 'transform': train_data_instance = self.gen_data_instance( self.workflow_param.train_input_table, self.workflow_param.train_input_namespace, mode='transform') self.load_model() result_table = self.model.transform(train_data_instance) self.save_predict_result(result_table) LOGGER.info("Predict result saved, table: {}," " namespace: {}".format( self.workflow_param.predict_output_table, self.workflow_param.predict_output_namespace)) else: raise TypeError("method %s is not support yet" % (self.workflow_param.method)) LOGGER.info("Finish host party feature selection")
def test_feature_selection(self): selection_host = HeteroFeatureSelectionHost() host_param = self._make_param_dict('fit') print("host params: {}".format(host_param)) selection_host.run(host_param, self.args) result_data = selection_host.save_data() local_data = result_data.collect() print("data in fit") for k, v in local_data: print("k: {}, v: {}".format(k, v.features)) host_model = {self.model_name: selection_host.export_model()} host_args = { 'data': { self.model_name: { 'data': self.table } }, 'model': host_model } selection_host = HeteroFeatureSelectionHost() host_param = self._make_param_dict('transform') selection_host.run(host_param, host_args) result_data = selection_host.save_data() local_data = result_data.collect() print("data in transform") for k, v in local_data: print("k: {}, v: {}".format(k, v.features))