def callback_info(self): class_weight = None classes = None if self.class_weight_dict: class_weight = { str(k): v for k, v in self.class_weight_dict.items() } classes = sorted([str(k) for k in self.class_weight_dict.keys()]) # LOGGER.debug(f"callback class weight is: {class_weight}") metric_meta = MetricMeta(name='train', metric_type=self.metric_type, extra_metas={ "weight_mode": self.weight_mode, "class_weight": class_weight, "classes": classes, "sample_weight_name": self.sample_weight_name }) self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=[Metric(self.metric_name, 0)]) self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=self.metric_name, metric_meta=metric_meta)
def __save_curve_meta(self, metric_name, metric_namespace, metric_type, unit_name=None, ordinate_name=None, curve_name=None, best=None, pair_type=None, thresholds=None): extra_metas = {} metric_type = "_".join([metric_type, "EVALUATION"]) key_list = [ "unit_name", "ordinate_name", "curve_name", "best", "pair_type", "thresholds" ] for key in key_list: value = locals()[key] if value: if key == "thresholds": value = np.round(value, self.round_num).tolist() extra_metas[key] = value self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric_type, extra_metas=extra_metas))
def _callback(self): self.tracker.set_metric_meta( metric_namespace="statistic", metric_name="correlation", metric_meta=MetricMeta(name="pearson", metric_type="CORRELATION_GRAPH"), )
def _display_result(self, block_num=None): if block_num is None: self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=[Metric("Coverage", self.coverage), Metric("Block number", self.block_num)]) self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=self.metric_name, metric_meta=MetricMeta(self.metric_name, metric_type="INTERSECTION")) else: self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=[Metric("Coverage", self.coverage), Metric("Block number", block_num)]) self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=self.metric_name, metric_meta=MetricMeta(self.metric_name, metric_type="INTERSECTION"))
def _callback_leaf_id_mapping(self, mapping): metric_namespace = 'sbt_transformer' metric_name = 'leaf_mapping' self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric_name, extra_metas=mapping))
def record_step_best(self, step_best, host_mask, guest_mask, data_instances, model): metas = {"host_mask": host_mask.tolist(), "guest_mask": guest_mask.tolist(), "score_name": self.score_name} metas["number_in"] = int(sum(host_mask) + sum(guest_mask)) metas["direction"] = self.direction metas["n_count"] = int(self.n_count) host_anonym = [ anonymous_generator.generate_anonymous( fid=i, role='host', model=model) for i in range( len(host_mask))] guest_anonym = [ anonymous_generator.generate_anonymous( fid=i, role='guest', model=model) for i in range( len(guest_mask))] metas["host_features_anonym"] = host_anonym metas["guest_features_anonym"] = guest_anonym model_info = self.models_trained[step_best] loss = model_info.get_loss() ic_val = model_info.get_score() metas["loss"] = loss metas["current_ic_val"] = ic_val metas["fit_intercept"] = model.fit_intercept model_key = model_info.get_key() model_dict = self._get_model(model_key) if self.role != consts.ARBITER: all_features = data_instances.schema.get('header') metas["all_features"] = all_features metas["to_enter"] = self.get_to_enter(host_mask, guest_mask, all_features) model_param = list(model_dict.get('model').values())[0].get( model.model_param_name) param_dict = MessageToDict(model_param) metas["intercept"] = param_dict.get("intercept", None) metas["weight"] = param_dict.get("weight", {}) metas["header"] = param_dict.get("header", []) if self.n_step == 0 and self.direction == "forward": metas["intercept"] = self.intercept self.update_summary_client(model, host_mask, guest_mask, all_features, host_anonym, guest_anonym) else: self.update_summary_arbiter(model, loss, ic_val) metric_name = f"stepwise_{self.n_step}" metric = [Metric(metric_name, float(self.n_step))] model.callback_metric(metric_name=metric_name, metric_namespace=self.metric_namespace, metric_data=metric) model.tracker.set_metric_meta(metric_name=metric_name, metric_namespace=self.metric_namespace, metric_meta=MetricMeta(name=metric_name, metric_type=self.metric_type, extra_metas=metas)) LOGGER.info(f"metric_name: {metric_name}, metas: {metas}") return
def fit(self, data_inst, validate_data=None): # init binning obj self.aggregator = HomoBoostArbiterAggregator() self.binning_obj = HomoFeatureBinningServer() self.federated_binning() # initializing self.feature_num = self.sync_feature_num() if self.task_type == consts.CLASSIFICATION: label_mapping = HomoLabelEncoderArbiter().label_alignment() LOGGER.info('label mapping is {}'.format(label_mapping)) self.booster_dim = len( label_mapping) if len(label_mapping) > 2 else 1 if self.n_iter_no_change: self.check_convergence_func = converge_func_factory( "diff", self.tol) # sync start round and end round self.sync_start_round_and_end_round() LOGGER.info('begin to fit a boosting tree') self.preprocess() for epoch_idx in range(self.start_round, self.boosting_round): LOGGER.info('cur epoch idx is {}'.format(epoch_idx)) for class_idx in range(self.booster_dim): model = self.fit_a_learner(epoch_idx, class_idx) global_loss = self.aggregator.aggregate_loss(suffix=(epoch_idx, )) self.history_loss.append(global_loss) LOGGER.debug('cur epoch global loss is {}'.format(global_loss)) self.callback_metric("loss", "train", [Metric(epoch_idx, global_loss)]) if self.n_iter_no_change: should_stop = self.aggregator.broadcast_converge_status( self.check_convergence, (global_loss, ), suffix=(epoch_idx, )) LOGGER.debug('stop flag sent') if should_stop: break self.callback_meta( "loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"Best": min(self.history_loss)})) self.postprocess() self.callback_list.on_train_end() self.set_summary(self.generate_summary())
def callback_loss(self, iter_num, loss): metric_meta = MetricMeta(name='train', metric_type="LOSS", extra_metas={ "unit_name": "iters", }) self.callback_meta(metric_name='loss', metric_namespace='train', metric_meta=metric_meta) self.callback_metric(metric_name='loss', metric_namespace='train', metric_data=[Metric(iter_num, loss)])
def callback(self, metas): metric = [Metric(self.metric_name, 0)] self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=metric) self.tracker.set_metric_meta(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_meta=MetricMeta( name=self.metric_name, metric_type=self.metric_type, extra_metas=metas))
def callback_ovr_metric_data(self, eval_results): for model_name, eval_rs in eval_results.items(): train_callback_meta = defaultdict(dict) validate_callback_meta = defaultdict(dict) split_list = model_name.split('_') label = split_list[-1] origin_model_name_list = split_list[: -2] # remove ' "class" label_index' origin_model_name = '' for s in origin_model_name_list: origin_model_name += (s + '_') origin_model_name = origin_model_name[:-1] for rs_dict in eval_rs: for metric_name, metric_rs in rs_dict.items(): if metric_name == consts.KS: metric_rs = [ metric_rs[0], metric_rs[1][0] ] # ks value only, curve data is not needed metric_namespace = metric_rs[0] if metric_namespace == 'train': callback_meta = train_callback_meta else: callback_meta = validate_callback_meta callback_meta[label][metric_name] = metric_rs[1] self.tracker.set_metric_meta( "train", model_name + '_' + 'ovr', MetricMeta(name=origin_model_name, metric_type='ovr', extra_metas=train_callback_meta)) self.tracker.set_metric_meta( "validate", model_name + '_' + 'ovr', MetricMeta(name=origin_model_name, metric_type='ovr', extra_metas=validate_callback_meta)) LOGGER.debug('callback data {} {}'.format(train_callback_meta, validate_callback_meta))
def callback_info(self): metric_meta = MetricMeta( name='train', metric_type=self.metric_type, extra_metas={"label_encoder": self.label_encoder}) self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=[Metric(self.metric_name, 0)]) self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=self.metric_name, metric_meta=metric_meta)
def callback(tracker, method, callback_metrics, other_metrics=None, summary_dict=None): LOGGER.debug("callback: method is {}".format(method)) if method == "random": tracker.log_metric_data("sample_count", "random", callback_metrics) tracker.set_metric_meta( "sample_count", "random", MetricMeta(name="sample_count", metric_type="SAMPLE_TEXT")) summary_dict["sample_count"] = callback_metrics[0].value else: LOGGER.debug("callback: name {}, namespace {}, metrics_data {}".format( "sample_count", "stratified", callback_metrics)) tracker.log_metric_data("sample_count", "stratified", callback_metrics) tracker.set_metric_meta( "sample_count", "stratified", MetricMeta(name="sample_count", metric_type="SAMPLE_TABLE")) tracker.log_metric_data("original_count", "stratified", other_metrics) tracker.set_metric_meta( "original_count", "stratified", MetricMeta(name="original_count", metric_type="SAMPLE_TABLE")) summary_dict["sample_count"] = {} for sample_metric in callback_metrics: summary_dict["sample_count"][ sample_metric.key] = sample_metric.value summary_dict["original_count"] = {} for sample_metric in other_metrics: summary_dict["original_count"][ sample_metric.key] = sample_metric.value
def __save_f1_score_table(self, metric, f1_scores, thresholds, metric_name, metric_namespace): extra_metas = { 'f1_scores': list(np.round(f1_scores, self.round_num)), 'thresholds': list(np.round(thresholds, self.round_num)) } self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric.upper(), extra_metas=extra_metas))
def callback_cache_meta(self, intersect_meta): """ self.callback_metric(f"{self.metric_name}_cache_meta", f"{self.metric_namespace}_CACHE", metric_data=[Metric("intersect_cache_meta", 0)]) """ metric_name = f"{self.metric_name}_cache_meta" self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=metric_name, metric_meta=MetricMeta(name=f"{self.metric_name}_cache_meta", metric_type=self.metric_type, extra_metas=intersect_meta) )
def __save_single_value(self, result, metric_name, metric_namespace, eval_name): metric_type = 'EVALUATION_SUMMARY' if eval_name in consts.ALL_CLUSTER_METRICS: metric_type = 'CLUSTERING_EVALUATION_SUMMARY' self.tracker.log_metric_data( metric_namespace, metric_name, [Metric(eval_name, np.round(result, self.round_num))]) self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric_type))
def callback_dbi(self, iter_num, dbi): metric_meta = MetricMeta(name='train', metric_type="DBI", extra_metas={ "unit_name": "iters", }) self.callback_meta(metric_name='DBI', metric_namespace='train', metric_meta=metric_meta) self.callback_metric(metric_name='DBI', metric_namespace='train', metric_data=[Metric(iter_num, dbi)])
def callback(self): meta_info = {"intersect_method": self.model_param.intersect_method, "join_method": self.model_param.join_method} self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=[Metric("intersect_count", self.intersect_num), Metric("intersect_rate", self.intersect_rate), Metric("unmatched_count", self.unmatched_num), Metric("unmatched_rate", self.unmatched_rate)]) self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=self.metric_name, metric_meta=MetricMeta(name=self.metric_name, metric_type=self.metric_type, extra_metas=meta_info) )
def __save_pr_table(self, metric, metric_res, metric_name, metric_namespace): p_scores, r_scores, score_threshold = metric_res extra_metas = { 'p_scores': list(map(list, np.round(p_scores, self.round_num))), 'r_scores': list(map(list, np.round(r_scores, self.round_num))), 'thresholds': list(np.round(score_threshold, self.round_num)) } self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric.upper(), extra_metas=extra_metas))
def __save_confusion_mat_table(self, metric, confusion_mat, thresholds, metric_name, metric_namespace): extra_metas = { 'tp': list(confusion_mat['tp']), 'tn': list(confusion_mat['tn']), 'fp': list(confusion_mat['fp']), 'fn': list(confusion_mat['fn']), 'thresholds': list(np.round(thresholds, self.round_num)) } self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric.upper(), extra_metas=extra_metas))
def callback_loss(self, iter_num, loss): # noinspection PyTypeChecker metric_meta = MetricMeta( name="train", metric_type="LOSS", extra_metas={ "unit_name": "iters", }, ) self.callback_meta( metric_name="loss", metric_namespace="train", metric_meta=metric_meta ) self.callback_metric( metric_name="loss", metric_namespace="train", metric_data=[Metric(iter_num, loss)], )
def __save_distance_measure(self, metric, metric_res: dict, metric_name, metric_namespace): extra_metas = {} cluster_index = [k for k in metric_res.keys()] radius, neareast_idx = [], [] for k in metric_res: radius.append(metric_res[k][0]) neareast_idx.append(metric_res[k][1]) extra_metas['cluster_index'] = cluster_index extra_metas['radius'] = radius extra_metas['nearest_idx'] = neareast_idx self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric.upper(), extra_metas=extra_metas))
def server_callback_loss(self, iter_num, loss): # noinspection PyTypeChecker metric_meta = MetricMeta( name="train", metric_type="LOSS", extra_metas={ "unit_name": "iters", }, ) self.callback_meta(metric_name="loss", metric_namespace="train", metric_meta=metric_meta) self.callback_metric( metric_name="loss", metric_namespace="train", metric_data=[Metric(iter_num, loss)], ) self._summary["loss_history"].append(loss)
def __save_contingency_matrix(self, metric, metric_res, metric_name, metric_namespace): result_array, unique_predicted_label, unique_true_label = metric_res true_labels = list(map(int, unique_true_label)) predicted_label = list(map(int, unique_predicted_label)) result_table = [] for l_ in result_array: result_table.append(list(map(int, l_))) extra_metas = { 'true_labels': true_labels, 'predicted_labels': predicted_label, 'result_table': result_table } self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric.upper(), extra_metas=extra_metas))
def transform(self, data, fit_config=None): """ Transform input data using scale with fit results Parameters ---------- data: data_instance, input data fit_config: list, the fit results information of scale Returns ---------- transform_data:data_instance, data after transform """ LOGGER.info("Start scale data transform ...") if self.model_param.method == consts.MINMAXSCALE: self.scale_obj = MinMaxScale(self.model_param) elif self.model_param.method == consts.STANDARDSCALE: self.scale_obj = StandardScale(self.model_param) self.scale_obj.set_param(self.mean, self.std) else: LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.model_param.method)) if self.scale_obj: self.scale_obj.header = self.header self.scale_obj.scale_column_idx = self.scale_column_idx self.scale_obj.set_column_range(self.column_max_value, self.column_min_value) transform_data = self.scale_obj.transform(data) transform_data.schema = data.schema self.callback_meta(metric_name="scale", metric_namespace="train", metric_meta=MetricMeta(name="scale", metric_type="SCALE", extra_metas={"method": self.model_param.method})) else: transform_data = data LOGGER.info("End transform data.") return transform_data
def fit(self, data): """ Apply scale for input data Parameters ---------- data: data_instance, input data Returns ---------- data:data_instance, data after scale scale_value_results: list, the fit results information of scale """ LOGGER.info("Start scale data fit ...") if self.model_param.method == consts.MINMAXSCALE: self.scale_obj = MinMaxScale(self.model_param) elif self.model_param.method == consts.STANDARDSCALE: self.scale_obj = StandardScale(self.model_param) else: LOGGER.warning("Scale method is {}, do nothing and return!".format(self.model_param.method)) if self.scale_obj: fit_data = self.scale_obj.fit(data) fit_data.schema = data.schema self.callback_meta(metric_name="scale", metric_namespace="train", metric_meta=MetricMeta(name="scale", metric_type="SCALE", extra_metas={"method": self.model_param.method})) LOGGER.info("start to get model summary ...") self.set_summary(self.scale_obj.get_model_summary()) LOGGER.info("Finish getting model summary.") else: fit_data = data LOGGER.info("End fit data ...") return fit_data
def __save_psi_table(self, metric, metric_res, metric_name, metric_namespace): psi_scores, total_psi, expected_interval, expected_percentage, actual_interval, actual_percentage, \ train_pos_perc, validate_pos_perc, intervals = metric_res[1] extra_metas = { 'psi_scores': list(np.round(psi_scores, self.round_num)), 'total_psi': round(total_psi, self.round_num), 'expected_interval': list(expected_interval), 'expected_percentage': list(expected_percentage), 'actual_interval': list(actual_interval), 'actual_percentage': list(actual_percentage), 'intervals': list(intervals), 'train_pos_perc': train_pos_perc, 'validate_pos_perc': validate_pos_perc } self.tracker.set_metric_meta( metric_namespace, metric_name, MetricMeta(name=metric_name, metric_type=metric.upper(), extra_metas=extra_metas))
def fit(self, data_inst, validate_data=None): LOGGER.debug('in training, partitions is {}'.format( data_inst.partitions)) LOGGER.info('start to fit a ftl model, ' 'run mode is {},' 'communication efficient mode is {}'.format( self.mode, self.comm_eff)) self.check_host_number() data_loader, self.x_shape, self.data_num, self.overlap_num = self.prepare_data( self.init_intersect_obj(), data_inst, guest_side=True) self.input_dim = self.x_shape[0] # cache data_loader for faster validation self.cache_dataloader[self.get_dataset_key(data_inst)] = data_loader self.partitions = data_inst.partitions LOGGER.debug('self partitions is {}'.format(self.partitions)) self.initialize_nn(input_shape=self.x_shape) self.feat_dim = self.nn._model.output_shape[1] self.constant_k = 1 / self.feat_dim self.callback_list.on_train_begin(train_data=data_inst, validate_data=validate_data) self.callback_meta( "loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"unit_name": "iters"})) # compute intermediate result of first epoch self.phi, self.phi_product, self.overlap_ua, self.send_components = self.batch_compute_components( data_loader) for epoch_idx in range(self.epochs): LOGGER.debug('fitting epoch {}'.format(epoch_idx)) self.callback_list.on_epoch_begin(epoch_idx) host_components = self.exchange_components(self.send_components, epoch_idx=epoch_idx) loss = None for local_round_idx in range(self.local_round): if self.comm_eff: LOGGER.debug( 'running local iter {}'.format(local_round_idx)) grads = self.compute_backward_gradients( host_components, data_loader, epoch_idx=epoch_idx, local_round=local_round_idx) self.update_nn_weights(grads, data_loader, epoch_idx, decay=self.comm_eff) if local_round_idx == 0: loss = self.compute_loss( host_components, epoch_idx, len(data_loader.get_overlap_indexes())) if local_round_idx + 1 != self.local_round: self.phi, self.overlap_ua = self.compute_phi_and_overlap_ua( data_loader) self.callback_metric("loss", "train", [Metric(epoch_idx, loss)]) self.history_loss.append(loss) # updating variables for next epochs if epoch_idx + 1 == self.epochs: # only need to update phi in last epochs self.phi, _ = self.compute_phi_and_overlap_ua(data_loader) else: # compute phi, phi_product, overlap_ua etc. for next epoch self.phi, self.phi_product, self.overlap_ua, self.send_components = self.batch_compute_components( data_loader) self.callback_list.on_epoch_end(epoch_idx) # check n_iter_no_change if self.n_iter_no_change is True: if self.check_convergence(loss): self.sync_stop_flag(epoch_idx, stop_flag=True) break else: self.sync_stop_flag(epoch_idx, stop_flag=False) LOGGER.debug('fitting epoch {} done, loss is {}'.format( epoch_idx, loss)) self.callback_list.on_train_end() self.callback_meta( "loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"Best": min(self.history_loss)})) self.set_summary(self.generate_summary()) LOGGER.debug('fitting ftl model done')
def fit(self, data): # LOGGER.debug(f"fit receives data is {data}") if not isinstance(data, dict) or len(data) <= 1: raise ValueError( "Union module must receive more than one table as input.") empty_count = 0 combined_table = None combined_schema = None metrics = [] for (key, local_table) in data.items(): LOGGER.debug("table to combine name: {}".format(key)) num_data = local_table.count() LOGGER.debug("table count: {}".format(num_data)) metrics.append(Metric(key, num_data)) self.add_summary(key, num_data) if num_data == 0: LOGGER.warning("Table {} is empty.".format(key)) if combined_table is None: combined_table = local_table combined_schema = local_table.schema empty_count += 1 continue local_is_data_instance = self.check_is_data_instance(local_table) if self.is_data_instance is None or combined_table is None: self.is_data_instance = local_is_data_instance LOGGER.debug(f"self.is_data_instance is {self.is_data_instance}, " f"local_is_data_instance is {local_is_data_instance}") if self.is_data_instance != local_is_data_instance: raise ValueError( f"Cannot combine DataInstance and non-DataInstance object. Union aborted." ) if self.is_data_instance: self.is_empty_feature = data_overview.is_empty_feature( local_table) if self.is_empty_feature: LOGGER.warning("Table {} has empty feature.".format(key)) else: self.check_schema_content(local_table.schema) if combined_table is None or combined_table.count() == 0: # first non-empty table to combine combined_table = local_table combined_schema = local_table.schema if self.keep_duplicate: combined_table = combined_table.map(lambda k, v: (f"{k}_{key}", v)) combined_table.schema = combined_schema else: self.check_id(local_table, combined_table) self.check_label_name(local_table, combined_table) self.check_header(local_table, combined_table) if self.keep_duplicate: local_table = local_table.map(lambda k, v: (f"{k}_{key}", v)) combined_table = combined_table.union(local_table, self._keep_first) combined_table.schema = combined_schema # only check feature length if not empty if self.is_data_instance and not self.is_empty_feature: self.feature_count = len(combined_schema.get("header")) # LOGGER.debug(f"feature count: {self.feature_count}") combined_table.mapValues(self.check_feature_length) if combined_table is None: LOGGER.warning( "All tables provided are empty or have empty features.") first_table = list(data.values())[0] combined_table = first_table.join(first_table) num_data = combined_table.count() metrics.append(Metric("Total", num_data)) self.add_summary("Total", num_data) LOGGER.info(f"Result total data entry count: {num_data}") self.callback_metric(metric_name=self.metric_name, metric_namespace=self.metric_namespace, metric_data=metrics) self.tracker.set_metric_meta(metric_namespace=self.metric_namespace, metric_name=self.metric_name, metric_meta=MetricMeta( name=self.metric_name, metric_type=self.metric_type)) LOGGER.info( "Union operation finished. Total {} empty tables encountered.". format(empty_count)) return combined_table
def _set_loss_callback_info(self): self.callback_meta("loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"unit_name": "iters"}))
def fit(self, data_inst, validate_data=None): LOGGER.info('begin to fit a hetero boosting model, model is {}'.format( self.model_name)) self.start_round = 0 self.on_training = True self.data_inst = data_inst self.data_bin, self.bin_split_points, self.bin_sparse_points = self.prepare_data( data_inst) self.y = self.get_label(self.data_bin) if not self.is_warm_start: self.feature_name_fid_mapping = self.gen_feature_fid_mapping( data_inst.schema) self.classes_, self.num_classes, self.booster_dim = self.check_label( ) self.loss = self.get_loss_function() self.y_hat, self.init_score = self.get_init_score( self.y, self.num_classes) else: classes_, num_classes, booster_dim = self.check_label() self.prepare_warm_start(data_inst, classes_) LOGGER.info('class index is {}'.format(self.classes_)) self.sync_booster_dim() self.generate_encrypter() self.callback_list.on_train_begin(data_inst, validate_data) self.callback_meta( "loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"unit_name": "iters"})) self.preprocess() for epoch_idx in range(self.start_round, self.boosting_round): LOGGER.info('cur epoch idx is {}'.format(epoch_idx)) self.callback_list.on_epoch_begin(epoch_idx) for class_idx in range(self.booster_dim): # fit a booster model = self.fit_a_learner(epoch_idx, class_idx) booster_meta, booster_param = model.get_model() if booster_meta is not None and booster_param is not None: self.booster_meta = booster_meta self.boosting_model_list.append(booster_param) # update predict score cur_sample_weights = model.get_sample_weights() self.y_hat = self.get_new_predict_score(self.y_hat, cur_sample_weights, dim=class_idx) # compute loss loss = self.compute_loss(self.y_hat, self.y) self.history_loss.append(loss) LOGGER.info("round {} loss is {}".format(epoch_idx, loss)) self.callback_metric("loss", "train", [Metric(epoch_idx, loss)]) # check validation validation_strategy = self.callback_list.get_validation_strategy() if validation_strategy: validation_strategy.set_precomputed_train_scores( self.score_to_predict_result(data_inst, self.y_hat)) self.callback_list.on_epoch_end(epoch_idx) should_stop = False if self.n_iter_no_change and self.check_convergence(loss): should_stop = True self.is_converged = True self.sync_stop_flag(self.is_converged, epoch_idx) if self.stop_training or should_stop: break self.postprocess() self.callback_list.on_train_end() self.callback_meta( "loss", "train", MetricMeta(name="train", metric_type="LOSS", extra_metas={"Best": min(self.history_loss)})) # get summary self.set_summary(self.generate_summary())