Ejemplo n.º 1
0
    def host_optimal_binning(self, data_instances, host_idx,
                             encrypted_bin_info, result_counts,
                             category_names):
        optimal_binning_params = encrypted_bin_info['optimal_params']

        host_model_params = copy.deepcopy(self.model_param)
        host_model_params.bin_num = optimal_binning_params.get('bin_num')
        host_model_params.optimal_binning_param.metric_method = optimal_binning_params.get(
            'metric_method')
        host_model_params.optimal_binning_param.mixture = optimal_binning_params.get(
            'mixture')
        host_model_params.optimal_binning_param.max_bin_pct = optimal_binning_params.get(
            'max_bin_pct')
        host_model_params.optimal_binning_param.min_bin_pct = optimal_binning_params.get(
            'min_bin_pct')

        event_total, non_event_total = self.get_histogram(data_instances)
        result_counts = dict(result_counts.collect())
        optimal_binning_cols = {
            x: y
            for x, y in result_counts.items() if x not in category_names
        }
        host_binning_obj = OptimalBinning(
            params=host_model_params,
            abnormal_list=self.binning_obj.abnormal_list)
        host_binning_obj.event_total = event_total
        host_binning_obj.non_event_total = non_event_total
        host_binning_obj = self.optimal_binning_sync(host_binning_obj,
                                                     optimal_binning_cols,
                                                     data_instances.count(),
                                                     data_instances.partitions,
                                                     host_idx)
        return host_binning_obj
Ejemplo n.º 2
0
    def _init_model(self, params: FeatureBinningParam):
        self.model_param = params

        self.transform_type = self.model_param.transform_param.transform_type

        if self.role == consts.HOST:
            if self.transform_type == "woe":
                raise ValueError(
                    "Host party do not support woe transform now.")

        if self.model_param.method == consts.QUANTILE:
            self.binning_obj = QuantileBinning(self.model_param)
        elif self.model_param.method == consts.BUCKET:
            self.binning_obj = BucketBinning(self.model_param)
        elif self.model_param.method == consts.OPTIMAL:
            if self.role == consts.HOST:
                self.model_param.bin_num = self.model_param.optimal_binning_param.init_bin_nums
                self.binning_obj = QuantileBinning(self.model_param)
            else:
                self.binning_obj = OptimalBinning(self.model_param)
        else:
            raise ValueError("Binning method: {} is not supported yet".format(
                self.model_param.method))

        self.iv_calculator = IvCalculator(
            self.model_param.adjustment_factor,
            role=self.role,
            party_id=self.component_properties.local_partyid)
Ejemplo n.º 3
0
    def _init_model(self, params: FeatureBinningParam):
        self.model_param = params

        self.transform_type = self.model_param.transform_param.transform_type

        if self.role == consts.HOST:
            if self.transform_type == "woe":
                raise ValueError(
                    "Host party do not support woe transform now.")

        if self.model_param.method == consts.QUANTILE:
            self.binning_obj = QuantileBinning(self.model_param)
        elif self.model_param.method == consts.BUCKET:
            self.binning_obj = BucketBinning(self.model_param)
        elif self.model_param.method == consts.OPTIMAL:
            if self.role == consts.HOST:
                self.model_param.bin_num = self.model_param.optimal_binning_param.init_bin_nums
                self.binning_obj = QuantileBinning(self.model_param)
            else:
                self.binning_obj = OptimalBinning(self.model_param)
        else:
            # self.binning_obj = QuantileBinning(self.bin_param)
            raise ValueError("Binning method: {} is not supported yet".format(
                self.model_param.method))
        LOGGER.debug("in _init_model, role: {}, local_partyid: {}".format(
            self.role, self.component_properties))
        self.binning_obj.set_role_party(
            self.role, self.component_properties.local_partyid)
Ejemplo n.º 4
0
 def optimal_binning_sync(self, result_counts, sample_count, partitions, host_idx, host_model_params):
     host_binning_obj = OptimalBinning(params=host_model_params, abnormal_list=self.binning_obj.abnormal_list)
     host_binning_obj.event_total = self.binning_obj.event_total
     host_binning_obj.non_event_total = self.binning_obj.non_event_total
     LOGGER.debug("Start host party optimal binning train")
     bucket_table = host_binning_obj.bin_sum_to_bucket_list(result_counts, partitions)
     host_binning_obj.fit_buckets(bucket_table, sample_count)
     encoded_split_points = host_binning_obj.bin_results.all_split_points
     self.transfer_variable.bucket_idx.remote(encoded_split_points,
                                              role=consts.HOST,
                                              idx=host_idx)
     return host_binning_obj
Ejemplo n.º 5
0
    def load_model(self, model_dict):
        model_param = list(
            model_dict.get('model').values())[0].get(MODEL_PARAM_NAME)
        model_meta = list(
            model_dict.get('model').values())[0].get(MODEL_META_NAME)

        self.bin_inner_param = BinInnerParam()
        multi_class_result = model_param.multi_class_result
        self.labels = list(multi_class_result.labels)
        # if not self.labels:
        #     self.labels = [0, 1]
        if self.labels:
            self.bin_result = MultiClassBinResult.reconstruct(
                list(multi_class_result.results), self.labels)

        assert isinstance(model_meta,
                          feature_binning_meta_pb2.FeatureBinningMeta)
        assert isinstance(model_param,
                          feature_binning_param_pb2.FeatureBinningParam)

        self.header = list(model_param.header)
        self.bin_inner_param.set_header(self.header)

        self.bin_inner_param.add_transform_bin_indexes(
            list(model_meta.transform_param.transform_cols))
        self.bin_inner_param.add_bin_names(list(model_meta.cols))
        self.transform_type = model_meta.transform_param.transform_type

        bin_method = str(model_meta.method)
        if bin_method == consts.QUANTILE:
            self.binning_obj = QuantileBinning(params=model_meta)
        elif bin_method == consts.OPTIMAL:
            self.binning_obj = OptimalBinning(params=model_meta)
        else:
            self.binning_obj = BucketBinning(params=model_meta)

        # self.binning_obj.set_role_party(self.role, self.component_properties.local_partyid)
        self.binning_obj.set_bin_inner_param(self.bin_inner_param)

        split_results = dict(model_param.binning_result.binning_result)
        for col_name, sr_pb in split_results.items():
            split_points = list(sr_pb.split_points)
            self.binning_obj.bin_results.put_col_split_points(
                col_name, split_points)

        # self.binning_obj.bin_results.reconstruct(model_param.binning_result)

        self.host_results = []
        host_pbs = list(model_param.multi_class_result.host_results)
        if len(host_pbs):
            if len(self.labels) == 2:
                for host_pb in host_pbs:
                    self.host_results.append(
                        MultiClassBinResult.reconstruct(host_pb, self.labels))
            else:
                assert len(host_pbs) % len(self.labels) == 0
                i = 0
                while i < len(host_pbs):
                    this_pbs = host_pbs[i:i + len(self.labels)]
                    self.host_results.append(
                        MultiClassBinResult.reconstruct(this_pbs, self.labels))
                    i += len(self.labels)

        if list(model_param.header_anonymous):
            self.header_anonymous = list(model_param.header_anonymous)