コード例 #1
0
    def feature_selection_fit(self,
                              data_instance,
                              flow_id='sample_flowid',
                              without_transform=False):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)
            filter_methods = feature_select_param.filter_method
            previous_model = {}
            if 'iv_value_thres' in filter_methods or 'iv_percentile' in filter_methods:

                binning_model = {
                    'name': self.workflow_param.model_table,
                    'namespace': self.workflow_param.model_namespace
                }
                previous_model['binning_model'] = binning_model
            feature_selector.init_previous_model(**previous_model)

            if without_transform:
                data_instance = feature_selector.fit(data_instance)
            else:
                data_instance = feature_selector.fit_transform(data_instance)
            save_result = feature_selector.save_model(
                self.workflow_param.model_table,
                self.workflow_param.model_namespace)
            # Save model result in pipeline
            for meta_buffer_type, param_buffer_type in save_result:
                self.pipeline.node_meta.append(meta_buffer_type)
                self.pipeline.node_param.append(param_buffer_type)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
コード例 #2
0
 def _initialize_model(self, runtime_conf_path):
     feature_param = FeatureSelectionParam()
     self.feature_param = ParamExtract.parse_param_from_config(
         feature_param, runtime_conf_path)
     FeatureSelectionParamChecker.check_param(self.feature_param)
     self.model = HeteroFeatureSelectionHost(self.feature_param)
     LOGGER.debug("Guest model started")
コード例 #3
0
    def feature_selection_fit(self, data_instance, flow_id='sample_flowid'):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)

            local_only = feature_select_param.local_only  # Decide whether do fit_local or fit
            if local_only:
                data_instance = feature_selector.fit_local_transform(
                    data_instance)
                save_result = feature_selector.save_model(
                    self.workflow_param.model_table,
                    self.workflow_param.model_namespace)
                # Save model result in pipeline
                for meta_buffer_type, param_buffer_type in save_result:
                    self.pipeline.node_meta.append(meta_buffer_type)
                    self.pipeline.node_param.append(param_buffer_type)

            else:
                data_instance = feature_selector.fit_transform(data_instance)
                save_result = feature_selector.save_model(
                    self.workflow_param.model_table,
                    self.workflow_param.model_namespace)
                # Save model result in pipeline
                for meta_buffer_type, param_buffer_type in save_result:
                    self.pipeline.node_meta.append(meta_buffer_type)
                    self.pipeline.node_param.append(param_buffer_type)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
コード例 #4
0
ファイル: workflow.py プロジェクト: yuxiaoqing22/FATE
    def feature_selection_transform(self,
                                    data_instance,
                                    flow_id='sample_flowid'):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection transform")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)

            feature_selector.load_model(self.workflow_param.model_table,
                                        self.workflow_param.model_namespace)

            LOGGER.debug(
                "Role: {}, in transform feature selector left_cols: {}".format(
                    self.role, feature_selector.left_cols))

            data_instance = feature_selector.transform(data_instance)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
コード例 #5
0
class HeteroFeatureSelectHostWorkflow(WorkFlow):
    def _initialize(self, config_path):
        self._initialize_role_and_mode()
        self._initialize_model(config_path)
        self._initialize_workflow_param(config_path)

    def _initialize_role_and_mode(self):
        self.role = consts.HOST
        self.mode = consts.HETERO

    def _initialize_intersect(self, config):
        pass

    def _initialize_model(self, runtime_conf_path):
        feature_param = FeatureSelectionParam()
        self.feature_param = ParamExtract.parse_param_from_config(
            feature_param, runtime_conf_path)
        FeatureSelectionParamChecker.check_param(self.feature_param)
        self.model = HeteroFeatureSelectionHost(self.feature_param)
        LOGGER.debug("Guest model started")

    @status_tracer_decorator.status_trace
    def run(self):
        self._init_argument()

        if self.workflow_param.method == "feature_select":
            if self.feature_param.method == 'fit':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                if self.feature_param.local_only:
                    self.model.fit_local(train_data_instance)
                else:
                    self.model.fit(train_data_instance)
                self.model.save_model(self.workflow_param.model_table,
                                      self.workflow_param.model_namespace)

            elif self.feature_param.method == 'fit_transform':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                if self.feature_param.local_only:
                    result_table = self.model.fit_local_transform(
                        train_data_instance)
                else:
                    result_table = self.model.fit_transform(
                        train_data_instance)
                self.model.save_model(self.workflow_param.model_table,
                                      self.workflow_param.model_namespace)
                self.save_predict_result(result_table)
                LOGGER.info("Predict result saved, table: {},"
                            " namespace: {}".format(
                                self.workflow_param.predict_output_table,
                                self.workflow_param.predict_output_namespace))

            elif self.feature_param.method == 'transform':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace,
                    mode='transform')
                self.load_model()
                result_table = self.model.transform(train_data_instance)
                self.save_predict_result(result_table)
                LOGGER.info("Predict result saved, table: {},"
                            " namespace: {}".format(
                                self.workflow_param.predict_output_table,
                                self.workflow_param.predict_output_namespace))
        else:
            raise TypeError("method %s is not support yet" %
                            (self.workflow_param.method))

        LOGGER.info("Finish host party feature selection")
コード例 #6
0
    def test_feature_selection(self):
        selection_host = HeteroFeatureSelectionHost()

        host_param = self._make_param_dict('fit')
        print("host params: {}".format(host_param))
        selection_host.run(host_param, self.args)

        result_data = selection_host.save_data()
        local_data = result_data.collect()
        print("data in fit")
        for k, v in local_data:
            print("k: {}, v: {}".format(k, v.features))

        host_model = {self.model_name: selection_host.export_model()}

        host_args = {
            'data': {
                self.model_name: {
                    'data': self.table
                }
            },
            'model': host_model
        }

        selection_host = HeteroFeatureSelectionHost()

        host_param = self._make_param_dict('transform')

        selection_host.run(host_param, host_args)

        result_data = selection_host.save_data()
        local_data = result_data.collect()
        print("data in transform")
        for k, v in local_data:
            print("k: {}, v: {}".format(k, v.features))