예제 #1
0
    def feature_selection_fit(self,
                              data_instance,
                              flow_id='sample_flowid',
                              without_transform=False):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)
            filter_methods = feature_select_param.filter_method
            previous_model = {}
            if 'iv_value_thres' in filter_methods or 'iv_percentile' in filter_methods:

                binning_model = {
                    'name': self.workflow_param.model_table,
                    'namespace': self.workflow_param.model_namespace
                }
                previous_model['binning_model'] = binning_model
            feature_selector.init_previous_model(**previous_model)

            if without_transform:
                data_instance = feature_selector.fit(data_instance)
            else:
                data_instance = feature_selector.fit_transform(data_instance)
            save_result = feature_selector.save_model(
                self.workflow_param.model_table,
                self.workflow_param.model_namespace)
            # Save model result in pipeline
            for meta_buffer_type, param_buffer_type in save_result:
                self.pipeline.node_meta.append(meta_buffer_type)
                self.pipeline.node_param.append(param_buffer_type)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
예제 #2
0
class HeteroFeatureSelectHostWorkflow(WorkFlow):
    def _initialize(self, config_path):
        self._initialize_role_and_mode()
        self._initialize_model(config_path)
        self._initialize_workflow_param(config_path)

    def _initialize_role_and_mode(self):
        self.role = consts.HOST
        self.mode = consts.HETERO

    def _initialize_intersect(self, config):
        pass

    def _initialize_model(self, runtime_conf_path):
        feature_param = FeatureSelectionParam()
        self.feature_param = ParamExtract.parse_param_from_config(
            feature_param, runtime_conf_path)
        FeatureSelectionParamChecker.check_param(self.feature_param)
        self.model = HeteroFeatureSelectionHost(self.feature_param)
        LOGGER.debug("Guest model started")

    @status_tracer_decorator.status_trace
    def run(self):
        self._init_argument()

        if self.workflow_param.method == "feature_select":
            if self.feature_param.method == 'fit':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                if self.feature_param.local_only:
                    self.model.fit_local(train_data_instance)
                else:
                    self.model.fit(train_data_instance)
                self.model.save_model(self.workflow_param.model_table,
                                      self.workflow_param.model_namespace)

            elif self.feature_param.method == 'fit_transform':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                if self.feature_param.local_only:
                    result_table = self.model.fit_local_transform(
                        train_data_instance)
                else:
                    result_table = self.model.fit_transform(
                        train_data_instance)
                self.model.save_model(self.workflow_param.model_table,
                                      self.workflow_param.model_namespace)
                self.save_predict_result(result_table)
                LOGGER.info("Predict result saved, table: {},"
                            " namespace: {}".format(
                                self.workflow_param.predict_output_table,
                                self.workflow_param.predict_output_namespace))

            elif self.feature_param.method == 'transform':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace,
                    mode='transform')
                self.load_model()
                result_table = self.model.transform(train_data_instance)
                self.save_predict_result(result_table)
                LOGGER.info("Predict result saved, table: {},"
                            " namespace: {}".format(
                                self.workflow_param.predict_output_table,
                                self.workflow_param.predict_output_namespace))
        else:
            raise TypeError("method %s is not support yet" %
                            (self.workflow_param.method))

        LOGGER.info("Finish host party feature selection")