def feature_selection_fit(self, data_instance, flow_id='sample_flowid', without_transform=False): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) filter_methods = feature_select_param.filter_method previous_model = {} if 'iv_value_thres' in filter_methods or 'iv_percentile' in filter_methods: binning_model = { 'name': self.workflow_param.model_table, 'namespace': self.workflow_param.model_namespace } previous_model['binning_model'] = binning_model feature_selector.init_previous_model(**previous_model) if without_transform: data_instance = feature_selector.fit(data_instance) else: data_instance = feature_selector.fit_transform(data_instance) save_result = feature_selector.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance
def feature_selection_fit(self, data_instance, flow_id='sample_flowid'): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) local_only = feature_select_param.local_only # Decide whether do fit_local or fit if local_only: data_instance = feature_selector.fit_local_transform( data_instance) save_result = feature_selector.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) else: data_instance = feature_selector.fit_transform(data_instance) save_result = feature_selector.save_model( self.workflow_param.model_table, self.workflow_param.model_namespace) # Save model result in pipeline for meta_buffer_type, param_buffer_type in save_result: self.pipeline.node_meta.append(meta_buffer_type) self.pipeline.node_param.append(param_buffer_type) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance
def feature_selection_transform(self, data_instance, flow_id='sample_flowid'): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection transform") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) feature_selector.load_model(self.workflow_param.model_table, self.workflow_param.model_namespace) LOGGER.debug( "Role: {}, in transform feature selector left_cols: {}".format( self.role, feature_selector.left_cols)) data_instance = feature_selector.transform(data_instance) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance