def host_optimal_binning(self, data_instances, host_idx, encrypted_bin_info, result_counts, category_names): optimal_binning_params = encrypted_bin_info['optimal_params'] host_model_params = copy.deepcopy(self.model_param) host_model_params.bin_num = optimal_binning_params.get('bin_num') host_model_params.optimal_binning_param.metric_method = optimal_binning_params.get( 'metric_method') host_model_params.optimal_binning_param.mixture = optimal_binning_params.get( 'mixture') host_model_params.optimal_binning_param.max_bin_pct = optimal_binning_params.get( 'max_bin_pct') host_model_params.optimal_binning_param.min_bin_pct = optimal_binning_params.get( 'min_bin_pct') event_total, non_event_total = self.get_histogram(data_instances) result_counts = dict(result_counts.collect()) optimal_binning_cols = { x: y for x, y in result_counts.items() if x not in category_names } host_binning_obj = OptimalBinning( params=host_model_params, abnormal_list=self.binning_obj.abnormal_list) host_binning_obj.event_total = event_total host_binning_obj.non_event_total = non_event_total host_binning_obj = self.optimal_binning_sync(host_binning_obj, optimal_binning_cols, data_instances.count(), data_instances.partitions, host_idx) return host_binning_obj
def _init_model(self, params: FeatureBinningParam): self.model_param = params self.transform_type = self.model_param.transform_param.transform_type if self.role == consts.HOST: if self.transform_type == "woe": raise ValueError( "Host party do not support woe transform now.") if self.model_param.method == consts.QUANTILE: self.binning_obj = QuantileBinning(self.model_param) elif self.model_param.method == consts.BUCKET: self.binning_obj = BucketBinning(self.model_param) elif self.model_param.method == consts.OPTIMAL: if self.role == consts.HOST: self.model_param.bin_num = self.model_param.optimal_binning_param.init_bin_nums self.binning_obj = QuantileBinning(self.model_param) else: self.binning_obj = OptimalBinning(self.model_param) else: raise ValueError("Binning method: {} is not supported yet".format( self.model_param.method)) self.iv_calculator = IvCalculator( self.model_param.adjustment_factor, role=self.role, party_id=self.component_properties.local_partyid)
def _init_model(self, params: FeatureBinningParam): self.model_param = params self.transform_type = self.model_param.transform_param.transform_type if self.role == consts.HOST: if self.transform_type == "woe": raise ValueError( "Host party do not support woe transform now.") if self.model_param.method == consts.QUANTILE: self.binning_obj = QuantileBinning(self.model_param) elif self.model_param.method == consts.BUCKET: self.binning_obj = BucketBinning(self.model_param) elif self.model_param.method == consts.OPTIMAL: if self.role == consts.HOST: self.model_param.bin_num = self.model_param.optimal_binning_param.init_bin_nums self.binning_obj = QuantileBinning(self.model_param) else: self.binning_obj = OptimalBinning(self.model_param) else: # self.binning_obj = QuantileBinning(self.bin_param) raise ValueError("Binning method: {} is not supported yet".format( self.model_param.method)) LOGGER.debug("in _init_model, role: {}, local_partyid: {}".format( self.role, self.component_properties)) self.binning_obj.set_role_party( self.role, self.component_properties.local_partyid)
def optimal_binning_sync(self, result_counts, sample_count, partitions, host_idx, host_model_params): host_binning_obj = OptimalBinning(params=host_model_params, abnormal_list=self.binning_obj.abnormal_list) host_binning_obj.event_total = self.binning_obj.event_total host_binning_obj.non_event_total = self.binning_obj.non_event_total LOGGER.debug("Start host party optimal binning train") bucket_table = host_binning_obj.bin_sum_to_bucket_list(result_counts, partitions) host_binning_obj.fit_buckets(bucket_table, sample_count) encoded_split_points = host_binning_obj.bin_results.all_split_points self.transfer_variable.bucket_idx.remote(encoded_split_points, role=consts.HOST, idx=host_idx) return host_binning_obj
def load_model(self, model_dict): model_param = list( model_dict.get('model').values())[0].get(MODEL_PARAM_NAME) model_meta = list( model_dict.get('model').values())[0].get(MODEL_META_NAME) self.bin_inner_param = BinInnerParam() multi_class_result = model_param.multi_class_result self.labels = list(multi_class_result.labels) # if not self.labels: # self.labels = [0, 1] if self.labels: self.bin_result = MultiClassBinResult.reconstruct( list(multi_class_result.results), self.labels) assert isinstance(model_meta, feature_binning_meta_pb2.FeatureBinningMeta) assert isinstance(model_param, feature_binning_param_pb2.FeatureBinningParam) self.header = list(model_param.header) self.bin_inner_param.set_header(self.header) self.bin_inner_param.add_transform_bin_indexes( list(model_meta.transform_param.transform_cols)) self.bin_inner_param.add_bin_names(list(model_meta.cols)) self.transform_type = model_meta.transform_param.transform_type bin_method = str(model_meta.method) if bin_method == consts.QUANTILE: self.binning_obj = QuantileBinning(params=model_meta) elif bin_method == consts.OPTIMAL: self.binning_obj = OptimalBinning(params=model_meta) else: self.binning_obj = BucketBinning(params=model_meta) # self.binning_obj.set_role_party(self.role, self.component_properties.local_partyid) self.binning_obj.set_bin_inner_param(self.bin_inner_param) split_results = dict(model_param.binning_result.binning_result) for col_name, sr_pb in split_results.items(): split_points = list(sr_pb.split_points) self.binning_obj.bin_results.put_col_split_points( col_name, split_points) # self.binning_obj.bin_results.reconstruct(model_param.binning_result) self.host_results = [] host_pbs = list(model_param.multi_class_result.host_results) if len(host_pbs): if len(self.labels) == 2: for host_pb in host_pbs: self.host_results.append( MultiClassBinResult.reconstruct(host_pb, self.labels)) else: assert len(host_pbs) % len(self.labels) == 0 i = 0 while i < len(host_pbs): this_pbs = host_pbs[i:i + len(self.labels)] self.host_results.append( MultiClassBinResult.reconstruct(this_pbs, self.labels)) i += len(self.labels) if list(model_param.header_anonymous): self.header_anonymous = list(model_param.header_anonymous)