Ejemplo n.º 1
0
    def host_optimal_binning(self, data_instances, host_idx,
                             encrypted_bin_info, result_counts,
                             category_names):
        optimal_binning_params = encrypted_bin_info['optimal_params']

        host_model_params = copy.deepcopy(self.model_param)
        host_model_params.bin_num = optimal_binning_params.get('bin_num')
        host_model_params.optimal_binning_param.metric_method = optimal_binning_params.get(
            'metric_method')
        host_model_params.optimal_binning_param.mixture = optimal_binning_params.get(
            'mixture')
        host_model_params.optimal_binning_param.max_bin_pct = optimal_binning_params.get(
            'max_bin_pct')
        host_model_params.optimal_binning_param.min_bin_pct = optimal_binning_params.get(
            'min_bin_pct')

        event_total, non_event_total = self.get_histogram(data_instances)
        result_counts = dict(result_counts.collect())
        optimal_binning_cols = {
            x: y
            for x, y in result_counts.items() if x not in category_names
        }
        host_binning_obj = OptimalBinning(
            params=host_model_params,
            abnormal_list=self.binning_obj.abnormal_list)
        host_binning_obj.event_total = event_total
        host_binning_obj.non_event_total = non_event_total
        host_binning_obj = self.optimal_binning_sync(host_binning_obj,
                                                     optimal_binning_cols,
                                                     data_instances.count(),
                                                     data_instances.partitions,
                                                     host_idx)
        return host_binning_obj
Ejemplo n.º 2
0
 def optimal_binning_sync(self, result_counts, sample_count, partitions, host_idx, host_model_params):
     host_binning_obj = OptimalBinning(params=host_model_params, abnormal_list=self.binning_obj.abnormal_list)
     host_binning_obj.event_total = self.binning_obj.event_total
     host_binning_obj.non_event_total = self.binning_obj.non_event_total
     LOGGER.debug("Start host party optimal binning train")
     bucket_table = host_binning_obj.bin_sum_to_bucket_list(result_counts, partitions)
     host_binning_obj.fit_buckets(bucket_table, sample_count)
     encoded_split_points = host_binning_obj.bin_results.all_split_points
     self.transfer_variable.bucket_idx.remote(encoded_split_points,
                                              role=consts.HOST,
                                              idx=host_idx)
     return host_binning_obj