def fit_binary(self, data_instances, validate_data): self._abnormal_detection(data_instances) validation_strategy = self.init_validation_strategy( data_instances, validate_data) LOGGER.debug( f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}" ) self.header = self.get_header(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() self.batch_generator.initialize_batch_generator(data_instances) self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) LOGGER.debug("model_shape: {}, w shape: {}, w: {}".format( model_shape, w.shape, w)) self.model_weights = LinearModelWeights( w, fit_intercept=self.init_param_obj.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() batch_index = 0 self.optimizer.set_iters(self.n_iter_) for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) LOGGER.debug( f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}" ) optim_host_gradient, fore_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) LOGGER.debug( 'optim_host_gradient: {}'.format(optim_host_gradient)) training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.update_local_model(fore_gradient, data_instances, self.model_weights.coef_, **training_info) self.gradient_loss_operator.compute_loss( self.model_weights, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model( self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("Get is_converged flag from arbiter:{}".format( self.is_converged)) validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.is_converged: break LOGGER.debug("Final lr weights: {}".format(self.model_weights.unboxed))
def fit(self, data_instances, validate_data=None): """ Train poisson model of role guest Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_poisson_guest fit") self._abnormal_detection(data_instances) self.header = copy.deepcopy(self.get_header(data_instances)) validation_strategy = self.init_validation_strategy(data_instances, validate_data) self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname) if self.exposure_index > -1: self.header.pop(self.exposure_index) LOGGER.info("expsoure provided at Guest, colname is {}".format(self.exposure_colname)) exposure = data_instances.mapValues(lambda v: self.load_exposure(v)) data_instances = data_instances.mapValues(lambda v: self.load_instance(v)) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator(data_instances, self.batch_size) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) # compute offset of this batch batch_offset = exposure.join(batch_feat_inst, lambda ei, d: self.safe_log(ei)) # Start gradient procedure optimized_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index, batch_offset ) LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss(data_instances, self.model_weights, self.n_iter_, batch_index, batch_offset, loss_norm) self.model_weights = self.optimizer.update_model(self.model_weights, optimized_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 if self.is_converged: break
def fit_binary(self, data_instances, validate_data=None): LOGGER.info("Enter hetero_fm_guest fit") self.header = self.get_header(data_instances) validation_strategy = self.init_validation_strategy(data_instances, validate_data) data_instances = data_instances.mapValues(HeteroFMGuest.load_data) LOGGER.debug(f"MODEL_STEP After load data, data count: {data_instances.count()}") self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator(data_instances, self.batch_size) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) # intercept is initialized within FactorizationMachineWeights. # Skip initializer's intercept part. fit_intercept = False if self.init_param_obj.fit_intercept: fit_intercept = True self.init_param_obj.fit_intercept = False w_ = self.initializer.init_model(model_shape, init_params=self.init_param_obj) embed_ = np.random.normal(scale=1 / np.sqrt(self.init_param_obj.embed_size), size=(model_shape, self.init_param_obj.embed_size)) self.model_weights = \ FactorizationMachineWeights(w_, embed_, fit_intercept=fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_data.count()}") # Start gradient procedure LOGGER.debug("iter: {}, before compute gradient, data count: {}".format(self.n_iter_, batch_data.count())) # optim_guest_gradient, fore_gradient, host_forwards = self.gradient_loss_operator. \ optim_guest_gradient, fore_gradient = self.gradient_loss_operator. \ compute_gradient_procedure( batch_data, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index ) LOGGER.debug('optim_guest_gradient: {}'.format(optim_guest_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss(data_instances, self.n_iter_, batch_index, loss_norm) # clip gradient if self.model_param.clip_gradient and self.model_param.clip_gradient > 0: optim_guest_gradient = np.maximum(optim_guest_gradient, -self.model_param.clip_gradient) optim_guest_gradient = np.minimum(optim_guest_gradient, self.model_param.clip_gradient) _model_weights = self.optimizer.update_model(self.model_weights, optim_guest_gradient) self.model_weights.update(_model_weights) batch_index += 1 LOGGER.debug("fm_weight, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed)) self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 if self.is_converged: break LOGGER.debug("Final fm weights: {}".format(self.model_weights.unboxed))
def fit_binary(self, data_instances, validate_data=None): LOGGER.info("Enter hetero_lr_guest fit") self.header = self.get_header(data_instances) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) data_instances = data_instances.mapValues(HeteroLRGuest.load_data) LOGGER.debug( f"MODEL_STEP After load data, data count: {data_instances.count()}" ) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = batch_data # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}") # Start gradient procedure LOGGER.debug( "iter: {}, before compute gradient, data count: {}".format( self.n_iter_, batch_feat_inst.count())) optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) # LOGGER.debug('optim_guest_gradient: {}'.format(optim_guest_gradient)) # training_info = {"iteration": self.n_iter_, "batch_index": batch_index} # self.update_local_model(fore_gradient, data_instances, self.model_weights.coef_, **training_info) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( data_instances, self.model_weights, self.n_iter_, batch_index, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optim_guest_gradient) batch_index += 1 # LOGGER.debug("lr_weight, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed)) self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.validation_strategy: LOGGER.debug('LR guest running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 if self.is_converged: break if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model) self.set_summary(self.get_model_summary())
def fit(self, data_instances): """ Train lr model of role guest Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_lr_guest fit") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) data_instances = data_instances.mapValues(HeteroLRGuest.load_data) # 获得密钥 public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) LOGGER.info("Generate mini-batch from input data") mini_batch_obj = MiniBatch(data_instances, batch_size=self.batch_size) batch_num = mini_batch_obj.batch_nums if self.batch_size == -1: LOGGER.info( "batch size is -1, set it to the number of data in data_instances" ) self.batch_size = data_instances.count() batch_info = {"batch_size": self.batch_size, "batch_num": batch_num} LOGGER.info("batch_info:{}".format(batch_info)) federation.remote(batch_info, name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), role=consts.HOST, idx=0) LOGGER.info("Remote batch_info to Host") federation.remote(batch_info, name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), role=consts.ARBITER, idx=0) LOGGER.info("Remote batch_info to Arbiter") self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(batch_num) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) weight = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.init_param_obj.fit_intercept is True: self.coef_ = weight[:-1] self.intercept_ = weight[-1] else: self.coef_ = weight is_send_all_batch_index = False self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = mini_batch_obj.mini_batch_data_generator( result='index') batch_index = 0 for batch_data_index in batch_data_generator: LOGGER.info("batch:{}".format(batch_index)) if not is_send_all_batch_index: LOGGER.info("remote mini-batch index to Host") federation.remote( batch_data_index, name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), role=consts.HOST, idx=0) if batch_index >= mini_batch_obj.batch_nums - 1: is_send_all_batch_index = True # Get mini-batch train data if len(index_data_inst_map) < batch_num: batch_data_inst = data_instances.join( batch_data_index, lambda data_inst, index: data_inst) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_index] # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data_inst) # guest/host forward self.compute_forward(batch_feat_inst, self.coef_, self.intercept_, batch_index) host_forward = federation.get( name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_forward from host") aggregate_forward_res = self.aggregate_forward(host_forward) en_aggregate_wx = aggregate_forward_res.mapValues( lambda v: v[0]) en_aggregate_wx_square = aggregate_forward_res.mapValues( lambda v: v[1]) # compute [[d]] if self.gradient_operator is None: self.gradient_operator = HeteroLogisticGradient( self.encrypt_operator) fore_gradient = self.gradient_operator.compute_fore_gradient( batch_feat_inst, en_aggregate_wx) federation.remote( fore_gradient, name=self.transfer_variable.fore_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.fore_gradient, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote fore_gradient to Host") # compute guest gradient and loss guest_gradient, loss = self.gradient_operator.compute_gradient_and_loss( batch_feat_inst, fore_gradient, en_aggregate_wx, en_aggregate_wx_square, self.fit_intercept) # loss regulation if necessary if self.updater is not None: guest_loss_regular = self.updater.loss_norm(self.coef_) loss += self.encrypt_operator.encrypt(guest_loss_regular) federation.remote( guest_gradient, name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote guest_gradient to arbiter") optim_guest_gradient = federation.get( name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_guest_gradient from arbiter") # update model LOGGER.info("update_model") self.update_model(optim_guest_gradient) # update local model that transforms features of raw input 'batch_data_inst' training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info) # Get loss regulation from Host if regulation is set if self.updater is not None: en_host_loss_regular = federation.get( name=self.transfer_variable.host_loss_regular.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_loss_regular, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_loss_regular from Host") loss += en_host_loss_regular federation.remote( loss, name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote loss to arbiter") # is converge of loss in arbiter batch_index += 1 # temporary resource recovery and will be removed in the future rubbish_list = [ host_forward, aggregate_forward_res, en_aggregate_wx, en_aggregate_wx_square, fore_gradient, self.guest_forward ] rubbish_clear(rubbish_list) is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
def fit(self, data_instances, node2id, local_instances=None, common_nodes=None): """ Train node embedding for role guest Parameters ---------- data_instances: DTable of target node and label, input data node2id: a dict which can map node name to id """ LOGGER.info("samples number:{}".format(data_instances.count())) LOGGER.info("Enter network embedding procedure:") self.n_node = len(node2id) LOGGER.info("Bank A has {} nodes".format(self.n_node)) data_instances = data_instances.mapValues(HeteroNEGuest.load_data) LOGGER.info("Transform input data to train instance") public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) # hetero network embedding LOGGER.info("Generate mini-batch from input data") mini_batch_obj = MiniBatch(data_instances, batch_size=self.batch_size) batch_num = mini_batch_obj.batch_nums LOGGER.info("samples number:{}".format(data_instances.count())) if self.batch_size == -1: LOGGER.info( "batch size is -1, set it to the number of data in data_instances" ) self.batch_size = data_instances.count() ############## # horizontal federated learning LOGGER.info("Generate mini-batch for local instances in guest") mini_batch_obj_local = MiniBatch(local_instances, batch_size=self.batch_size) local_batch_num = mini_batch_obj_local.batch_nums common_node_instances = eggroll.parallelize( ((node, node) for node in common_nodes), include_key=True, name='common_nodes') ############## batch_info = {'batch_size': self.batch_size, "batch_num": batch_num} LOGGER.info("batch_info:{}".format(batch_info)) federation.remote(batch_info, name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), role=consts.HOST, idx=0) LOGGER.info("Remote batch_info to Host") federation.remote(batch_info, name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), role=consts.ARBITER, idx=0) LOGGER.info("Remote batch_info to Arbiter") self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(batch_num) ] LOGGER.info("Start initialize model.") self.embedding_ = self.initializer.init_model((self.n_node, self.dim), self.init_param_obj) LOGGER.info("Embedding shape={}".format(self.embedding_.shape)) is_send_all_batch_index = False self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) ################# local_batch_data_generator = mini_batch_obj_local.mini_batch_data_generator( ) total_loss = 0 local_batch_num = 0 LOGGER.info("Enter the horizontally federated learning procedure:") for local_batch_data in local_batch_data_generator: n = local_batch_data.count() #LOGGER.info("Local batch data count:{}".format(n)) E_Y = self.compute_local_embedding(local_batch_data, self.embedding_, node2id) local_grads_e1, local_grads_e2, local_loss = self.local_gradient_operator.compute( E_Y, 'E_1') local_grads_e1 = local_grads_e1.mapValues( lambda g: self.local_optimizer.apply_gradients(g / n)) local_grads_e2 = local_grads_e2.mapValues( lambda g: self.local_optimizer.apply_gradients(g / n)) e1id_join_grads = local_batch_data.join( local_grads_e1, lambda v, g: (node2id[v[0]], g)) e2id_join_grads = local_batch_data.join( local_grads_e2, lambda v, g: (node2id[v[1]], g)) self.update_model(e1id_join_grads) self.update_model(e2id_join_grads) local_loss = local_loss / n local_batch_num += 1 total_loss += local_loss #LOGGER.info("gradient count:{}".format(e1id_join_grads.count())) guest_common_embedding = common_node_instances.mapValues( lambda node: self.embedding_[node2id[node]]) federation.remote( guest_common_embedding, name=self.transfer_variable.guest_common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_common_embedding, self.n_iter_, 0), role=consts.ARBITER, idx=0) LOGGER.info("Remote the embedding of common node to arbiter!") common_embedding = federation.get( name=self.transfer_variable.common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.common_embedding, self.n_iter_, 0), idx=0) LOGGER.info( "Get the aggregated embedding of common node from arbiter!") self.update_common_nodes(common_embedding, common_nodes, node2id) total_loss /= local_batch_num LOGGER.info( "Iter {}, horizontally feaderated learning loss: {}".format( self.n_iter_, total_loss)) ################# # verticallly feaderated learning # each iter will get the same batch_data_generator LOGGER.info("Enter the vertically federated learning:") batch_data_generator = mini_batch_obj.mini_batch_data_generator( result='index') batch_index = 0 for batch_data_index in batch_data_generator: LOGGER.info("batch:{}".format(batch_index)) # only need to send one times if not is_send_all_batch_index: LOGGER.info("remote mini-batch index to Host") federation.remote( batch_data_index, name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), role=consts.HOST, idx=0) if batch_index >= mini_batch_obj.batch_nums - 1: is_send_all_batch_index = True # in order to avoid joining in next iteration # Get mini-batch train data if len(index_data_inst_map) < batch_num: batch_data_inst = data_instances.join( batch_data_index, lambda data_inst, index: data_inst) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_index] # For inductive learning: transform node attributes to node embedding # self.transform(batch_data_inst) self.guest_forward = self.compute_forward( batch_data_inst, self.embedding_, node2id, batch_index) host_forward = federation.get( name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_forward from host") aggregate_forward_res = self.aggregate_forward(host_forward) en_aggregate_ee = aggregate_forward_res.mapValues( lambda v: v[0]) en_aggregate_ee_square = aggregate_forward_res.mapValues( lambda v: v[1]) # compute [[d]] if self.gradient_operator is None: self.gradient_operator = HeteroNetworkEmbeddingGradient( self.encrypt_operator) fore_gradient = self.gradient_operator.compute_fore_gradient( batch_data_inst, en_aggregate_ee) host_gradient = self.gradient_operator.compute_gradient( self.guest_forward.mapValues( lambda v: Instance(features=v[1])), fore_gradient) federation.remote( host_gradient, name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote host_gradient to arbiter") composed_data_inst = host_forward.join( batch_data_inst, lambda hf, d: Instance(features=hf[1], label=d.label)) guest_gradient, loss = self.gradient_operator.compute_gradient_and_loss( composed_data_inst, fore_gradient, en_aggregate_ee, en_aggregate_ee_square) federation.remote( guest_gradient, name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote guest_gradient to arbiter") optim_guest_gradient = federation.get( name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_guest_gradient from arbiter") # update node embedding LOGGER.info("Update node embedding") nodeid_join_gradient = batch_data_inst.join( optim_guest_gradient, lambda instance, gradient: (node2id[instance.features], gradient)) self.update_model(nodeid_join_gradient) # update local model that transform attribute to node embedding training_info = { 'iteration': self.n_iter_, 'batch_index': batch_index } self.update_local_model(fore_gradient, batch_data_inst, self.embedding_, **training_info) # loss need to be encrypted !!!!!! federation.remote( loss, name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote loss to arbiter") # is converge of loss in arbiter batch_index += 1 # remove temporary resource rubbish_list = [ host_forward, aggregate_forward_res, en_aggregate_ee, en_aggregate_ee_square, fore_gradient, self.guest_forward ] rubbish_clear(rubbish_list) ########## guest_common_embedding = common_node_instances.mapValues( lambda node: self.embedding_[node2id[node]]) federation.remote( guest_common_embedding, name=self.transfer_variable.guest_common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_common_embedding, self.n_iter_, 1), role=consts.ARBITER, idx=0) common_embedding = federation.get( name=self.transfer_variable.common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.common_embedding, self.n_iter_, 1), idx=0) self.update_common_nodes(common_embedding, common_nodes, node2id) ########## is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break embedding_table = eggroll.table(name='guest', namespace='node_embedding', partition=10) id2node = dict(zip(node2id.values(), node2id.keys())) for id, embedding in enumerate(self.embedding_): embedding_table.put(id2node[id], embedding) embedding_table.save_as(name='guest', namespace='node_embedding', partition=10) LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
def fit(self, data_instances, validate_data=None): """ Train linear regression model of role host Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_linR host") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) self.callback_list.on_train_begin(data_instances, validate_data) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() if self.transfer_variable.use_async.get(idx=0): LOGGER.debug(f"set_use_async") self.gradient_loss_operator.set_use_async() self.batch_generator.initialize_batch_generator(data_instances) self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:" + str(self.n_iter_)) self.optimizer.set_iters(self.n_iter_) batch_data_generator = self.batch_generator.generate_batch_data() batch_index = 0 for batch_data in batch_data_generator: optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) self.gradient_loss_operator.compute_loss(self.model_weights, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model(self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("Get is_converged flag from arbiter:{}".format(self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())
def fit(self, data_instances, node2id, local_instances=None, common_nodes=None): """ Train ne model pf role host Parameters ---------- data_instances: Dtable of anchor node, input data """ LOGGER.info("Enter hetero_ne host") self.n_node = len(node2id) LOGGER.info("Host party has {} nodes".format(self.n_node)) data_instances = data_instances.mapValues(HeteroNEHost.load_data) LOGGER.info("Transform input data to train instance") public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get Publick key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) ############## # horizontal federated learning LOGGER.info("Generate mini-batch for local instances in guest") mini_batch_obj_local = MiniBatch(local_instances, batch_size=self.batch_size) common_node_instances = eggroll.parallelize( ((node, node) for node in common_nodes), include_key=True, name='common_nodes') ############## batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest: {}".format(batch_info)) self.batch_size = batch_info['batch_size'] self.batch_num = batch_info['batch_num'] if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1: raise ValueError( "Batch size get from guest should not less than 10, except -1, batch_size is {}" .format(self.batch_size)) self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_num) ] LOGGER.info("Start initilize model.") self.embedding_ = self.initializer.init_model((self.n_node, self.dim), self.init_param_obj) self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter: {}".format(self.n_iter_)) ################# local_batch_data_generator = mini_batch_obj_local.mini_batch_data_generator( ) total_loss = 0 local_batch_num = 0 LOGGER.info("Horizontally learning") for local_batch_data in local_batch_data_generator: n = local_batch_data.count() LOGGER.info("Local batch data count:{}".format(n)) E_Y = self.compute_local_embedding(local_batch_data, self.embedding_, node2id) local_grads_e1, local_grads_e2, local_loss = self.local_gradient_operator.compute( E_Y, 'E_1') local_grads_e1 = local_grads_e1.mapValues( lambda g: self.local_optimizer.apply_gradients(g / n)) local_grads_e2 = local_grads_e2.mapValues( lambda g: self.local_optimizer.apply_gradients(g / n)) e1id_join_grads = local_batch_data.join( local_grads_e1, lambda v, g: (node2id[v[0]], g)) e2id_join_grads = local_batch_data.join( local_grads_e2, lambda v, g: (node2id[v[1]], g)) self.update_model(e1id_join_grads) self.update_model(e2id_join_grads) local_loss = local_loss / n local_batch_num += 1 total_loss += local_loss LOGGER.info("gradient count:{}".format( e1id_join_grads.count())) host_common_embedding = common_node_instances.mapValues( lambda node: self.embedding_[node2id[node]]) federation.remote( host_common_embedding, name=self.transfer_variable.host_common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_common_embedding, self.n_iter_, 0), role=consts.ARBITER, idx=0) common_embedding = federation.get( name=self.transfer_variable.common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.common_embedding, self.n_iter_, 0), idx=0) self.update_common_nodes(common_embedding, common_nodes, node2id) total_loss /= local_batch_num LOGGER.info("Iter {}, Local loss: {}".format( self.n_iter_, total_loss)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) # set batch_data # in order to avoid communicating in next iteration # in next iteration, the sequence of batches is the same if len(self.batch_index_list) < self.batch_num: batch_data_index = federation.get( name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), idx=0) LOGGER.info("Get batch_index from Guest") self.batch_index_list.append(batch_index) else: batch_data_index = self.batch_index_list[batch_index] # Get mini-batch train_data # in order to avoid joining for next iteration if len(index_data_inst_map) < self.batch_num: batch_data_inst = batch_data_index.join( data_instances, lambda g, d: d) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_data_index] LOGGER.info("batch_data_inst size:{}".format( batch_data_inst.count())) #self.transform(data_inst) # compute forward host_forward = self.compute_forward(batch_data_inst, self.embedding_, node2id, batch_index) federation.remote( host_forward, name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_forward to guest") # Get optimize host gradient and update model optim_host_gradient = federation.get( name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_host_gradient from arbiter") nodeid_join_gradient = batch_data_inst.join( optim_host_gradient, lambda instance, gradient: (node2id[instance.features], gradient)) LOGGER.info("update_model") self.update_model(nodeid_join_gradient) # update local model #training_info = {"iteration": self.n_iter_, "batch_index": batch_index} #self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info) batch_index += 1 rubbish_list = [host_forward] rubbish_clear(rubbish_list) ####### host_common_embedding = common_node_instances.mapValues( lambda node: self.embedding_[node2id[node]]) federation.remote( host_common_embedding, name=self.transfer_variable.host_common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_common_embedding, self.n_iter_, 1), role=consts.ARBITER, idx=0) common_embedding = federation.get( name=self.transfer_variable.common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.common_embedding, self.n_iter_, 1), idx=0) self.update_common_nodes(common_embedding, common_nodes, node2id) ####### is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, ), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: break LOGGER.info("Reach max iter {}, train mode finish!".format( self.max_iter)) embedding_table = eggroll.table(name='host', namespace='node_embedding', partition=10) id2node = dict(zip(node2id.values(), node2id.keys())) for id, embedding in enumerate(self.embedding_): embedding_table.put(id2node[id], embedding) embedding_table.save_as(name='host', namespace='node_embedding', partition=10) LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
def fit(self, data_instances, validate_data=None): """ Train linR model of role guest Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_linR_guest fit") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) self.callback_list.on_train_begin(data_instances, validate_data) # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() use_async = False if with_weight(data_instances): if self.model_param.early_stop == "diff": LOGGER.warning("input data with weight, please use 'weight_diff' for 'early_stop'.") data_instances = scale_sample_weight(data_instances) self.gradient_loss_operator.set_use_sample_weight() LOGGER.debug(f"instance weight scaled; use weighted gradient loss operator") # LOGGER.debug(f"data_instances after scale: {[v[1].weight for v in list(data_instances.collect())]}") elif len(self.component_properties.host_party_idlist) == 1: LOGGER.debug(f"set_use_async") self.gradient_loss_operator.set_use_async() use_async = True self.transfer_variable.use_async.remote(use_async) LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator(data_instances, self.batch_size) self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # Start gradient procedure optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index ) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss(batch_data, self.n_iter_, batch_index, loss_norm) self.model_weights = self.optimizer.update_model(self.model_weights, optim_guest_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())
def fit(self, data_instances, node2id): """ Train ne model pf role host Parameters ---------- data_instances: Dtable of anchor node, input data """ LOGGER.info("Enter hetero_ne host") self.n_node = len(node2id) LOGGER.info("Host party has {} nodes".format(self.n_node)) data_instances = data_instances.mapValues(HeteroNEHost.load_data) LOGGER.info("Transform input data to train instance") public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get Publick key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest: {}".format(batch_info)) self.batch_size = batch_info['batch_size'] self.batch_num = batch_info['batch_num'] if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1: raise ValueError( "Batch size get from guest should not less than 10, except -1, batch_size is {}" .format(self.batch_size)) self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_num) ] LOGGER.info("Start initilize model.") self.embedding_ = self.initializer.init_model((self.n_node, self.dim), self.init_param_obj) self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter: {}".format(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) # set batch_data # in order to avoid communicating in next iteration # in next iteration, the sequence of batches is the same if len(self.batch_index_list) < self.batch_num: batch_data_index = federation.get( name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), idx=0) LOGGER.info("Get batch_index from Guest") self.batch_index_list.append(batch_index) else: batch_data_index = self.batch_index_list[batch_index] # Get mini-batch train_data # in order to avoid joining for next iteration if len(index_data_inst_map) < self.batch_num: batch_data_inst = batch_data_index.join( data_instances, lambda g, d: d) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_data_index] LOGGER.info("batch_data_inst size:{}".format( batch_data_inst.count())) #self.transform(data_inst) # compute forward host_forward = self.compute_forward(batch_data_inst, self.embedding_, node2id, batch_index) federation.remote( host_forward, name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_forward to guest") # Get optimize host gradient and update model optim_host_gradient = federation.get( name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_host_gradient from arbiter") nodeid_join_gradient = batch_data_inst.join( optim_host_gradient, lambda instance, gradient: (node2id[instance.features], gradient)) LOGGER.info("update_model") self.update_model(nodeid_join_gradient) # update local model #training_info = {"iteration": self.n_iter_, "batch_index": batch_index} #self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info) batch_index += 1 rubbish_list = [host_forward] rubbish_clear(rubbish_list) is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, )) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info("Reach max iter {}, train mode finish!".format( self.max_iter)) embedding_table = eggroll.table(name='host', namespace='node_embedding', partition=10) id2node = dict(zip(node2id.values(), node2id.keys())) for id, embedding in enumerate(self.embedding_): embedding_table.put(id2node[id], embedding) embedding_table.save_as(name='host', namespace='node_embedding', partition=10) LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
def fit_binary(self, data_instances, validate_data): self._abnormal_detection(data_instances) validation_strategy = self.init_validation_strategy( data_instances, validate_data) LOGGER.debug( f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}" ) self.header = self.get_header(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() self.batch_generator.initialize_batch_generator(data_instances) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) # intercept is initialized within FactorizationMachineWeights. # Skip initializer's intercept part. fit_intercept = False if self.init_param_obj.fit_intercept: fit_intercept = True self.init_param_obj.fit_intercept = False w_ = self.initializer.init_model(model_shape, init_params=self.init_param_obj) embed_ = np.random.normal( scale=1 / np.sqrt(self.init_param_obj.embed_size), size=(model_shape, self.init_param_obj.embed_size)) self.model_weights = \ FactorizationMachineWeights(w_, embed_, fit_intercept=fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() batch_index = 0 self.optimizer.set_iters(self.n_iter_) for batch_data in batch_data_generator: LOGGER.debug( f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_data.count()}" ) optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.model_weights, self.encrypted_calculator, self.optimizer, self.n_iter_, batch_index) LOGGER.debug( 'optim_host_gradient: {}'.format(optim_host_gradient)) self.gradient_loss_operator.compute_loss( self.model_weights, self.optimizer, self.n_iter_, batch_index) # clip gradient if self.model_param.clip_gradient and self.model_param.clip_gradient > 0: optim_host_gradient = np.maximum( optim_host_gradient, -self.model_param.clip_gradient) optim_host_gradient = np.minimum( optim_host_gradient, self.model_param.clip_gradient) _model_weights = self.optimizer.update_model( self.model_weights, optim_host_gradient) self.model_weights.update(_model_weights) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("Get is_converged flag from arbiter:{}".format( self.is_converged)) validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.is_converged: break LOGGER.debug("Final fm weights: {}".format(self.model_weights.unboxed))
def fit(self, data_instances, validate_data=None): """ Train poisson model of role guest Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_poisson_guest fit") self._abnormal_detection(data_instances) self.header = copy.deepcopy(self.get_header(data_instances)) self.callback_list.on_train_begin(data_instances, validate_data) # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data) if with_weight(data_instances): LOGGER.warning( "input data with weight. Poisson regression does not support weighted training." ) self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname) exposure_index = self.exposure_index if exposure_index > -1: self.header.pop(exposure_index) LOGGER.info("Guest provides exposure value.") exposure = data_instances.mapValues( lambda v: HeteroPoissonBase.load_exposure(v, exposure_index)) data_instances = data_instances.mapValues( lambda v: HeteroPoissonBase.load_instance(v, exposure_index)) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # compute offset of this batch batch_offset = exposure.join( batch_data, lambda ei, d: HeteroPoissonBase.safe_log(ei)) # Start gradient procedure optimized_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index, batch_offset) # LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( batch_data, self.model_weights, self.n_iter_, batch_index, batch_offset, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optimized_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())
def fit_binary(self, data_instances, validate_data=None): LOGGER.info("Starting to hetero_sshe_logistic_regression") self.callback_list.on_train_begin(data_instances, validate_data) model_shape = self.get_features_shape(data_instances) instances_count = data_instances.count() if not self.component_properties.is_warm_start: w = self._init_weights(model_shape) self.model_weights = LinearModelWeights( l=w, fit_intercept=self.model_param.init_param.fit_intercept) last_models = copy.deepcopy(self.model_weights) else: last_models = copy.deepcopy(self.model_weights) w = last_models.unboxed self.callback_warm_start_init_iter(self.n_iter_) self.batch_generator.initialize_batch_generator( data_instances, batch_size=self.batch_size) with SPDZ( "sshe_lr", local_party=self.local_party, all_parties=self.parties, q_field=self.q_field, use_mix_rand=self.model_param.use_mix_rand, ) as spdz: spdz.set_flowid(self.flowid) self.secure_matrix_obj.set_flowid(self.flowid) if self.role == consts.GUEST: self.labels = data_instances.mapValues( lambda x: np.array([x.label], dtype=int)) w_self, w_remote = self.share_model(w, suffix="init") last_w_self, last_w_remote = w_self, w_remote LOGGER.debug( f"first_w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}" ) batch_data_generator = self.batch_generator.generate_batch_data() self.cipher_tool = [] encoded_batch_data = [] for batch_data in batch_data_generator: if self.fit_intercept: batch_features = batch_data.mapValues(lambda x: np.hstack( (x.features, 1.0))) else: batch_features = batch_data.mapValues(lambda x: x.features) self.batch_num.append(batch_data.count()) encoded_batch_data.append( fixedpoint_table.FixedPointTensor( self.fixedpoint_encoder.encode(batch_features), q_field=self.fixedpoint_encoder.n, endec=self.fixedpoint_encoder)) self.cipher_tool.append( EncryptModeCalculator( self.cipher, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) ) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info(f"start to n_iter: {self.n_iter_}") loss_list = [] self.optimizer.set_iters(self.n_iter_) if not self.reveal_every_iter: self.self_optimizer.set_iters(self.n_iter_) self.remote_optimizer.set_iters(self.n_iter_) for batch_idx, batch_data in enumerate(encoded_batch_data): current_suffix = (str(self.n_iter_), str(batch_idx)) if self.reveal_every_iter: y = self.forward(weights=self.model_weights, features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) else: y = self.forward(weights=(w_self, w_remote), features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) if self.role == consts.GUEST: error = y - self.labels self_g, remote_g = self.backward( error=error, features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) else: self_g, remote_g = self.backward( error=y, features=batch_data, suffix=current_suffix, cipher=self.cipher_tool[batch_idx]) # loss computing; suffix = ("loss", ) + current_suffix if self.reveal_every_iter: batch_loss = self.compute_loss( weights=self.model_weights, suffix=suffix, cipher=self.cipher_tool[batch_idx]) else: batch_loss = self.compute_loss( weights=(w_self, w_remote), suffix=suffix, cipher=self.cipher_tool[batch_idx]) if batch_loss is not None: batch_loss = batch_loss * self.batch_num[batch_idx] loss_list.append(batch_loss) if self.reveal_every_iter: # LOGGER.debug(f"before reveal: self_g shape: {self_g.shape}, remote_g_shape: {remote_g}," # f"self_g: {self_g}") new_g = self.reveal_models(self_g, remote_g, suffix=current_suffix) # LOGGER.debug(f"after reveal: new_g shape: {new_g.shape}, new_g: {new_g}" # f"self.model_param.reveal_strategy: {self.model_param.reveal_strategy}") if new_g is not None: self.model_weights = self.optimizer.update_model( self.model_weights, new_g, has_applied=False) else: self.model_weights = LinearModelWeights( l=np.zeros(self_g.shape), fit_intercept=self.model_param.init_param. fit_intercept) else: if self.optimizer.penalty == consts.L2_PENALTY: self_g = self_g + self.self_optimizer.alpha * w_self remote_g = remote_g + self.remote_optimizer.alpha * w_remote # LOGGER.debug(f"before optimizer: {self_g}, {remote_g}") self_g = self.self_optimizer.apply_gradients(self_g) remote_g = self.remote_optimizer.apply_gradients( remote_g) # LOGGER.debug(f"after optimizer: {self_g}, {remote_g}") w_self -= self_g w_remote -= remote_g LOGGER.debug( f"w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}" ) if self.role == consts.GUEST: loss = np.sum(loss_list) / instances_count self.loss_history.append(loss) if self.need_call_back_loss: self.callback_loss(self.n_iter_, loss) else: loss = None if self.converge_func_name in ["diff", "abs"]: self.is_converged = self.check_converge_by_loss( loss, suffix=(str(self.n_iter_), )) elif self.converge_func_name == "weight_diff": if self.reveal_every_iter: self.is_converged = self.check_converge_by_weights( last_w=last_models.unboxed, new_w=self.model_weights.unboxed, suffix=(str(self.n_iter_), )) last_models = copy.deepcopy(self.model_weights) else: self.is_converged = self.check_converge_by_weights( last_w=(last_w_self, last_w_remote), new_w=(w_self, w_remote), suffix=(str(self.n_iter_), )) last_w_self, last_w_remote = copy.deepcopy( w_self), copy.deepcopy(w_remote) else: raise ValueError( f"Cannot recognize early_stop function: {self.converge_func_name}" ) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break # Finally reconstruct if not self.reveal_every_iter: new_w = self.reveal_models(w_self, w_remote, suffix=("final", )) if new_w is not None: self.model_weights = LinearModelWeights( l=new_w, fit_intercept=self.model_param.init_param.fit_intercept ) LOGGER.debug(f"loss_history: {self.loss_history}") self.set_summary(self.get_model_summary())
def fit(self, data_instances, validate_data=None): """ Train linR model of role guest Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_linR_guest fit") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) # Start gradient procedure optim_guest_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( data_instances, self.n_iter_, batch_index, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optim_guest_gradient) batch_index += 1 # LOGGER.debug( # "model_weights, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed)) self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) # LOGGER.debug("model weights is {}".format(self.model_weights.coef_)) if self.validation_strategy: LOGGER.debug('LinR guest running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 if self.is_converged: break if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model)
def fit(self, data_instances): """ Train lr model of role host Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_lr host") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:" + str(batch_info)) self.batch_size = batch_info["batch_size"] self.batch_num = batch_info["batch_num"] if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1: raise ValueError( "Batch size get from guest should not less than 10, except -1, batch_size is {}" .format(self.batch_size)) self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_num) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if self.fit_intercept: self.fit_intercept = False self.coef_ = self.initializer.init_model( model_shape, init_params=self.init_param_obj) self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) # set batch_data if len(self.batch_index_list) < self.batch_num: batch_data_index = federation.get( name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), idx=0) LOGGER.info("Get batch_index from Guest") self.batch_index_list.append(batch_data_index) else: batch_data_index = self.batch_index_list[batch_index] # Get mini-batch train data if len(index_data_inst_map) < self.batch_num: batch_data_inst = batch_data_index.join( data_instances, lambda g, d: d) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_index] LOGGER.info("batch_data_inst size:{}".format( batch_data_inst.count())) # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data_inst) # compute forward host_forward = self.compute_forward(batch_feat_inst, self.coef_, self.intercept_, batch_index) federation.remote( host_forward, name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_forward to guest") # compute host gradient fore_gradient = federation.get( name=self.transfer_variable.fore_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.fore_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get fore_gradient from guest") if self.gradient_operator is None: self.gradient_operator = HeteroLogisticGradient( self.encrypt_operator) host_gradient = self.gradient_operator.compute_gradient( batch_feat_inst, fore_gradient, fit_intercept=False) # regulation if necessary if self.updater is not None: loss_regular = self.updater.loss_norm(self.coef_) en_loss_regular = self.encrypt_operator.encrypt( loss_regular) federation.remote( en_loss_regular, name=self.transfer_variable.host_loss_regular.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_loss_regular, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_loss_regular to guest") federation.remote( host_gradient, name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote host_gradient to arbiter") # Get optimize host gradient and update model optim_host_gradient = federation.get( name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_host_gradient from arbiter") LOGGER.info("update_model") self.update_model(optim_host_gradient) # update local model that transforms features of raw input 'batch_data_inst' training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info) batch_index += 1 # temporary resource recovery and will be removed in the future rubbish_list = [host_forward, fore_gradient] data_overview.rubbish_clear(rubbish_list) is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
def fit(self, data_instances, validate_data=None): """ Train poisson regression model of role host Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_poisson host") self._abnormal_detection(data_instances) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) self.header = self.get_header(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() self.batch_generator.initialize_batch_generator(data_instances) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: batch_feat_inst = self.transform(batch_data) optim_host_gradient, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) self.gradient_loss_operator.compute_loss( batch_feat_inst, self.model_weights, self.encrypted_calculator, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model( self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("Get is_converged flag from arbiter:{}".format( self.is_converged)) if self.validation_strategy: LOGGER.debug('Poisson host running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.is_converged: break if not self.is_converged: LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter)) if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model) self.set_summary(self.get_model_summary())
def fit_binary(self, data_instances, validate_data): self._abnormal_detection(data_instances) self.check_abnormal_values(data_instances) self.check_abnormal_values(validate_data) # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data) self.callback_list.on_train_begin(data_instances, validate_data) LOGGER.debug(f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}") self.header = self.get_header(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() if self.transfer_variable.use_async.get(idx=0): LOGGER.debug(f"set_use_async") self.gradient_loss_operator.set_use_async() self.batch_generator.initialize_batch_generator(data_instances) self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.init_param_obj.fit_intercept) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:" + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() batch_index = 0 self.optimizer.set_iters(self.n_iter_) for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = batch_data # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}") optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) # LOGGER.debug('optim_host_gradient: {}'.format(optim_host_gradient)) self.gradient_loss_operator.compute_loss(self.model_weights, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model(self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("Get is_converged flag from arbiter:{}".format(self.is_converged)) LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) LOGGER.debug(f"flowid: {self.flowid}, step_index: {self.n_iter_}") self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())