Beispiel #1
0
    def test_feature_binning(self):
        binning_guest = HeteroFeatureBinningGuest()

        guest_param = self._make_param_dict('fit')

        binning_guest.run(guest_param, self.args)

        result_data = binning_guest.save_data()
        local_data = result_data.collect()
        print("data in fit")
        for k, v in local_data:
            print("k: {}, v: {}".format(k, v.features))
        guest_model = {self.model_name: binning_guest.export_model()}

        guest_args = {
            'data': {
                self.model_name: {
                    'data': self.table
                }
            },
            'model': guest_model
        }

        binning_guest = HeteroFeatureBinningGuest()

        guest_param = self._make_param_dict('transform')

        binning_guest.run(guest_param, guest_args)

        result_data = binning_guest.save_data()
        local_data = result_data.collect()
        print("data in transform")
        for k, v in local_data:
            print("k: {}, v: {}".format(k, v.features))
 def _initialize_model(self, runtime_conf_path):
     binning_param = FeatureBinningParam()
     self.binning_param = ParamExtract.parse_param_from_config(
         binning_param, runtime_conf_path)
     FeatureBinningParamChecker.check_param(self.binning_param)
     self.model = HeteroFeatureBinningGuest(self.binning_param)
     LOGGER.debug("Guest model started")
    def run_data(self, table_args, run_type='fit'):
        if self.binning_obj is not None:
            return self.binning_obj
        if self.role == GUEST:
            binning_obj = HeteroFeatureBinningGuest()
        else:
            binning_obj = HeteroFeatureBinningHost()
        guest_param = self._make_param_dict(run_type)

        binning_obj.run(guest_param, table_args)
        self.binning_obj = binning_obj
        return binning_obj
    def load_model(self, model_dict):

        if 'model' in model_dict:
            # self._parse_need_run(model_dict, MODEL_META_NAME)
            LOGGER.debug("Feature selection need run: {}".format(
                self.need_run))
            if not self.need_run:
                return
            model_param = list(
                model_dict.get('model').values())[0].get(MODEL_PARAM_NAME)
            model_meta = list(
                model_dict.get('model').values())[0].get(MODEL_META_NAME)

            self.model_output = {
                MODEL_META_NAME: model_meta,
                MODEL_PARAM_NAME: model_param
            }

            header = list(model_param.header)
            # self.schema = {'header': header}
            self.header = header
            self.curt_select_properties.set_header(header)
            self.completed_selection_result.set_header(header)
            self.curt_select_properties.set_last_left_col_indexes(
                [x for x in range(len(header))])
            self.curt_select_properties.add_select_col_names(header)

            final_left_cols_names = dict(model_param.final_left_cols.left_cols)
            LOGGER.debug(
                "final_left_cols_names: {}".format(final_left_cols_names))
            for col_name, _ in final_left_cols_names.items():
                self.curt_select_properties.add_left_col_name(col_name)
            self.completed_selection_result.add_filter_results(
                filter_name='conclusion',
                select_properties=self.curt_select_properties)
            self.update_curt_select_param()
            LOGGER.debug(
                "After load model, completed_selection_result.all_left_col_indexes: {}"
                .format(self.completed_selection_result.all_left_col_indexes))

        if 'isometric_model' in model_dict:

            LOGGER.debug(
                "Has isometric_model, model_dict: {}".format(model_dict))
            if self.party_name == consts.GUEST:
                self.binning_model = HeteroFeatureBinningGuest()
            else:
                self.binning_model = HeteroFeatureBinningHost()

            new_model_dict = {'model': model_dict['isometric_model']}
            self.binning_model.load_model(new_model_dict)
Beispiel #5
0
    def init_previous_model(self, **models):
        if 'binning_model' in models:
            binning_model_params = models.get('binning_model')
            binning_param = FeatureBinningParam()
            if self.party_name == consts.GUEST:
                binning_obj = HeteroFeatureBinningGuest(binning_param)
            else:
                binning_obj = HeteroFeatureBinningHost(binning_param)

            name = binning_model_params.get('name')
            namespace = binning_model_params.get('namespace')

            binning_obj.load_model(name, namespace)
            self.binning_model = binning_obj
    def run_data(self, table_args, run_type='fit'):
        if self.binning_obj is not None:
            return self.binning_obj
        if self.role == GUEST:
            binning_obj = HeteroFeatureBinningGuest()
        else:
            binning_obj = HeteroFeatureBinningHost()

        # param_obj = FeatureBinningParam(method=consts.QUANTILE)
        # binning_obj.model_param = param_obj
        guest_param = self._make_param_dict(run_type)
        binning_obj.run(guest_param, table_args)
        print("current binning method: {}, split_points: {}".format(
            binning_obj.model_param.method,
            binning_obj.binning_obj.split_points))
        self.binning_obj = binning_obj
        return binning_obj
Beispiel #7
0
    def feature_binning(self, data_instances, flow_id='sample_flowid'):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instances

        if data_instances is None:
            return data_instances

        LOGGER.info("Start feature binning")
        feature_binning_param = param_generator.FeatureBinningParam()
        feature_binning_param = ParamExtract.parse_param_from_config(
            feature_binning_param, self.config_path)
        param_checker.FeatureBinningParamChecker.check_param(
            feature_binning_param)

        if self.role == consts.HOST:
            feature_binning_obj = HeteroFeatureBinningHost(
                feature_binning_param)
        elif self.role == consts.GUEST:
            feature_binning_obj = HeteroFeatureBinningGuest(
                feature_binning_param)
        elif self.role == consts.ARBITER:
            return data_instances
        else:
            raise ValueError("Unknown role of workflow")

        feature_binning_obj.set_flowid(flow_id)
        if feature_binning_param.local_only:
            data_instances = feature_binning_obj.fit_local(data_instances)
        else:
            data_instances = feature_binning_obj.fit(data_instances)
        save_result = feature_binning_obj.save_model(
            self.workflow_param.model_table,
            self.workflow_param.model_namespace)
        # Save model result in pipeline
        for meta_buffer_type, param_buffer_type in save_result:
            self.pipeline.node_meta.append(meta_buffer_type)
            self.pipeline.node_param.append(param_buffer_type)

        LOGGER.info("Finish feature selection")
        return data_instances
Beispiel #8
0
    def _load_model(self, model_dict):

        if 'model' in model_dict:
            # self._parse_need_run(model_dict, MODEL_META_NAME)
            LOGGER.debug("Feature selection need run: {}".format(self.need_run))
            if not self.need_run:
                return
            model_param = list(model_dict.get('model').values())[0].get(MODEL_PARAM_NAME)
            model_meta = list(model_dict.get('model').values())[0].get(MODEL_META_NAME)

            self.model_output = {
                MODEL_META_NAME: model_meta,
                MODEL_PARAM_NAME: model_param
            }
            LOGGER.debug("Model output set, model_output is :{}".format(self.model_output))
            self.results = list(model_param.results)
            left_col_obj = model_param.final_left_cols

            original_headers = list(left_col_obj.original_cols)
            self.header = original_headers
            left_col_name_dict = dict(left_col_obj.left_cols)
            LOGGER.debug("In load model, left_col_name_dict: {}, original_headers: {}".format(left_col_name_dict,
                                                                                              original_headers))
            left_cols = {}
            for col_name, is_left in left_col_name_dict.items():
                left_cols[col_name] = is_left
            LOGGER.debug("Self.left_cols: {}".format(left_cols))
            self.filter_result = SelfFilterResult(header=original_headers, to_select_cols_all=list(left_cols.keys()))
            self.filter_result.set_left_cols(left_cols)

        if 'isometric_model' in model_dict:

            LOGGER.debug("Has isometric_model, model_dict: {}".format(model_dict))
            if self.party_name == consts.GUEST:
                self.binning_model = HeteroFeatureBinningGuest()
            else:
                self.binning_model = HeteroFeatureBinningHost()

            new_model_dict = {'model': model_dict['isometric_model']}
            self.binning_model._load_model(new_model_dict)
Beispiel #9
0
    def _calculates_iv_attrs(self, data_instances, flowid_postfix=''):
        if self.local_only and self.guest_iv_attrs is not None:
            return

        bin_flow_id = self.flowid + flowid_postfix
        self.bin_param.cols = self.left_cols
        if self.binning_model is None:
            self.binning_model = HeteroFeatureBinningGuest(self.bin_param)
            self.binning_model.set_flowid(bin_flow_id)
        else:
            self.binning_model.reset(self.bin_param, flowid=bin_flow_id)

        if self.local_only:
            if self.guest_iv_attrs is None:
                self.guest_iv_attrs = self.binning_model.fit_local(
                    data_instances=data_instances)
        else:
            iv_attrs = self.binning_model.fit(data_instances)
            self.guest_iv_attrs = iv_attrs.get('local')
            self.host_iv_attrs = iv_attrs.get('remote')
            self.host_left_cols = [i for i in range(len(self.host_iv_attrs))]
            LOGGER.debug("Host left cols: {}".format(self.host_left_cols))
        LOGGER.info("Finish federated binning with host.")