def __init__(self, params: LogisticParam): super(HomoLRArbiter, self).__init__(params) self.re_encrypt_batches = params.re_encrypt_batches self.aggregator = HomoFederatedAggregator() if params.converge_func == 'diff': self.convege_func = DiffConverge(eps=self.eps) else: raise RuntimeWarning( "Cannot recognize converge_func, must be 'eps'.") self.transfer_variable = HomoLRTransferVariable() self.predict_threshold = params self.encrypt_param = params.encrypt_param self.classes_ = [0, 1] # To be initialized self.host_use_encryption = [] self.re_encrypt_times = [] # Record the times needed for each host self.curt_re_encrypt_times = [] self.host_encrypter = [] self.party_weights = [ ] # The first one is guest weight, host weights for otherwise self.has_sychronized_encryption = False self.loss_history = [] self.is_converged = False self.header = []
def __init__(self, params: LogisticParam): super(HomoLRHost, self).__init__(params) self.learning_rate = params.learning_rate self.batch_size = params.batch_size self.encrypt_params = params.encrypt_param if self.encrypt_params.method in [consts.PAILLIER]: self.use_encrypt = True else: self.use_encrypt = False if self.use_encrypt and params.penalty != consts.L2_PENALTY: raise RuntimeError("Encrypted h**o-lr supports L2 penalty only") if self.use_encrypt: self.gradient_operator = TaylorLogisticGradient() self.re_encrypt_batches = params.re_encrypt_batches else: self.gradient_operator = LogisticGradient() self.aggregator = HomoFederatedAggregator() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.mini_batch_obj = None self.evaluator = Evaluation(classi_type=consts.BINARY) self.classes_ = [0, 1] self.has_sychronized_encryption = False
def __init__(self, params: LogisticParam): super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() self.header = [] self.penalty = params.penalty self.loss_history = [] self.is_converged = False
def __init__(self, params: LogisticParam): """ :param penalty: l1 or l2 :param alpha: :param lr: :param eps: :param max_iter: :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad'] :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size """ super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation()
class HomoLRArbiter(BaseLogisticRegression): def __init__(self, params: LogisticParam): super(HomoLRArbiter, self).__init__(params) self.re_encrypt_batches = params.re_encrypt_batches self.aggregator = HomoFederatedAggregator() if params.converge_func == 'diff': self.convege_func = DiffConverge(eps=self.eps) else: raise RuntimeWarning( "Cannot recognize converge_func, must be 'eps'.") self.transfer_variable = HomoLRTransferVariable() self.predict_threshold = params self.encrypt_param = params.encrypt_param self.classes_ = [0, 1] # To be initialized self.host_use_encryption = [] self.re_encrypt_times = [] # Record the times needed for each host self.curt_re_encrypt_times = [] self.host_encrypter = [] self.party_weights = [ ] # The first one is guest weight, host weights for otherwise self.has_sychronized_encryption = False self.loss_history = [] self.is_converged = False self.header = [] def fit(self, data=None): LOGGER.debug("self.has_sychronized_encryption: {}".format( self.has_sychronized_encryption)) self.__init_parameters() LOGGER.debug("self.has_sychronized_encryption: {}".format( self.has_sychronized_encryption)) LOGGER.info("Finish init parameters") for iter_num in range(self.max_iter): # re_encrypt host models self.__re_encrypt(iter_num) # Part3: Aggregate models receive from each party final_model = self.aggregator.aggregate_model( transfer_variable=self.transfer_variable, iter_num=iter_num, party_weights=self.party_weights, host_encrypter=self.host_encrypter) total_loss = self.aggregator.aggregate_loss( transfer_variable=self.transfer_variable, iter_num=iter_num, party_weights=self.party_weights, host_use_encryption=self.host_use_encryption) self.loss_history.append(total_loss) LOGGER.info("Iter: {}, loss: {}".format(iter_num, total_loss)) # send model final_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) federation.remote(final_model, name=self.transfer_variable.final_model.name, tag=final_model_id, role=consts.GUEST, idx=0) for idx, encrypter in enumerate(self.host_encrypter): encrypted_model = encrypter.encrypt_list(final_model) federation.remote(encrypted_model, name=self.transfer_variable.final_model.name, tag=final_model_id, role=consts.HOST, idx=idx) # send converge flag converge_flag = self.convege_func.is_converge(total_loss) converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) federation.remote(converge_flag, name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, role=consts.GUEST, idx=0) federation.remote(converge_flag, name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, role=consts.HOST, idx=-1) self.set_coef_(final_model) self.n_iter_ = iter_num if converge_flag: self.is_converged = True break self._set_header() def predict(self, data=None, predict_param=None): # synchronize encryption information if not self.has_sychronized_encryption: self.__synchronize_encryption() self.__send_host_mode() for idx, use_encrypt in enumerate(self.host_use_encryption): if use_encrypt: encrypter = self.host_encrypter[idx] predict_wx_id = self.transfer_variable.generate_transferid( self.transfer_variable.predict_wx) predict_wx = federation.get( name=self.transfer_variable.predict_wx.name, tag=predict_wx_id, idx=idx) decrypted_wx = encrypter.distribute_decrypt(predict_wx) pred_prob = decrypted_wx.mapValues( lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) predict_result_id = self.transfer_variable.generate_transferid( self.transfer_variable.predict_result) federation.remote( pred_label, name=self.transfer_variable.predict_result.name, tag=predict_result_id, role=consts.HOST, idx=idx) LOGGER.info("Finish predicting, result has been sent back") return def __init_parameters(self): """ This function is used to synchronized the parameters from each guest and host. :return: """ # 1. Receive the party weight of each party # LOGGER.debug("To receive guest party weight") party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) guest_weight = federation.get( name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, idx=0) # LOGGER.debug("Received guest_weight: {}".format(guest_weight)) host_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_party_weight) host_weights = federation.get( name=self.transfer_variable.host_party_weight.name, tag=host_weight_id, idx=-1) weights = [guest_weight] weights.extend(host_weights) self.party_weights = [x / sum(weights) for x in weights] # 2. Synchronize encryption information self.__synchronize_encryption() # 3. Receive re-encrypt-times self.re_encrypt_times = [0] * len(self.host_use_encryption) for idx, use_encryption in enumerate(self.host_use_encryption): if not use_encryption: self.re_encrypt_times[idx] = 0 continue re_encrypt_times_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypt_times) re_encrypt_times = federation.get( name=self.transfer_variable.re_encrypt_times.name, tag=re_encrypt_times_id, idx=idx) self.re_encrypt_times[idx] = re_encrypt_times LOGGER.info("re encrypt times for all parties: {}".format( self.re_encrypt_times)) def __synchronize_encryption(self): """ Communicate with hosts. Specify whether use encryption or not and transfer the public keys. """ # 1. Use Encrypt: Specify which host use encryption host_use_encryption_id = self.transfer_variable.generate_transferid( self.transfer_variable.use_encrypt) host_use_encryption = federation.get( name=self.transfer_variable.use_encrypt.name, tag=host_use_encryption_id, idx=-1) self.host_use_encryption = host_use_encryption LOGGER.info("host use encryption: {}".format(self.host_use_encryption)) # 2. Send pubkey to those use-encryption hosts for idx, use_encryption in enumerate(self.host_use_encryption): if not use_encryption: encrypter = FakeEncrypt() else: encrypter = PaillierEncrypt() encrypter.generate_key(self.encrypt_param.key_length) pub_key = encrypter.get_public_key() pubkey_id = self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey) federation.remote( pub_key, name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, role=consts.HOST, idx=idx) # LOGGER.debug("send pubkey to host: {}".format(idx)) self.host_encrypter.append(encrypter) self.has_sychronized_encryption = True def __send_host_mode(self): model = self.merge_model() final_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, "predict") for idx, use_encrypt in enumerate(self.host_use_encryption): if use_encrypt: encrypter = self.host_encrypter[idx] final_model = encrypter.encrypt_list(model) else: final_model = model federation.remote(final_model, name=self.transfer_variable.final_model.name, tag=final_model_id, role=consts.HOST, idx=idx) def __re_encrypt(self, iter_num): # If use encrypt, model weight need to be re-encrypt every several batches. self.curt_re_encrypt_times = self.re_encrypt_times.copy() # Part2: re-encrypt model weight from each host batch_num = 0 while True: batch_num += self.re_encrypt_batches to_encrypt_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.to_encrypt_model, iter_num, batch_num) re_encrypted_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypted_model, iter_num, batch_num) for idx, left_times in enumerate(self.curt_re_encrypt_times): if left_times <= 0: continue re_encrypt_model = federation.get( name=self.transfer_variable.to_encrypt_model.name, tag=to_encrypt_model_id, idx=idx) encrypter = self.host_encrypter[idx] decrypt_model = encrypter.decrypt_list(re_encrypt_model) re_encrypt_model = encrypter.encrypt_list(decrypt_model) federation.remote( re_encrypt_model, name=self.transfer_variable.re_encrypted_model.name, tag=re_encrypted_model_id, role=consts.HOST, idx=idx) left_times -= 1 self.curt_re_encrypt_times[idx] = left_times if sum(self.curt_re_encrypt_times) == 0: break def _set_header(self): self.header = ['head_' + str(x) for x in range(len(self.coef_))]
class HomoLRHost(BaseLogisticRegression): def __init__(self, params: LogisticParam): super(HomoLRHost, self).__init__(params) self.learning_rate = params.learning_rate self.batch_size = params.batch_size self.encrypt_params = params.encrypt_param if self.encrypt_params.method in [consts.PAILLIER]: self.use_encrypt = True else: self.use_encrypt = False if self.use_encrypt and params.penalty != consts.L2_PENALTY: raise RuntimeError("Encrypted h**o-lr supports L2 penalty only") if self.use_encrypt: self.gradient_operator = TaylorLogisticGradient() self.re_encrypt_batches = params.re_encrypt_batches else: self.gradient_operator = LogisticGradient() self.aggregator = HomoFederatedAggregator() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.mini_batch_obj = None self.evaluator = Evaluation(classi_type=consts.BINARY) self.classes_ = [0, 1] self.has_sychronized_encryption = False def fit(self, data_instances): LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}" "batch_size: {}".format(self.alpha, self.eps, self.max_iter, self.batch_size)) self.__init_parameters(data_instances) w = self.__init_model(data_instances) for iter_num in range(self.max_iter): # mini-batch LOGGER.debug("In iter: {}".format(iter_num)) batch_data_generator = self.mini_batch_obj.mini_batch_data_generator() batch_num = 0 total_loss = 0 for batch_data in batch_data_generator: f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) n = grad_loss.count() if not self.use_encrypt: grad, loss = grad_loss.reduce(self.aggregator.aggregate_grad_loss) grad = np.array(grad) grad /= n loss /= n # gradient_regular, loss_regular = self.updater.compute(w) if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += loss + loss_norm # LOGGER.debug("iter: {}, grad: {}, loss: {}".format(iter_num, grad, loss)) else: grad, _ = grad_loss.reduce(self.aggregator.aggregate_grad) grad = np.array(grad) grad /= n # gradient_regular = self.updater.gradient_norm(w) # grad += gradient_regular # grad = np.array(grad) self.update_model(grad) w = self.merge_model() batch_num += 1 if self.use_encrypt and batch_num % self.re_encrypt_batches == 0: to_encrypt_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.to_encrypt_model, iter_num, batch_num ) federation.remote(w, name=self.transfer_variable.to_encrypt_model.name, tag=to_encrypt_model_id, role=consts.ARBITER, idx=0) re_encrypted_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypted_model, iter_num, batch_num ) LOGGER.debug("re_encrypted_model_id: {}".format(re_encrypted_model_id)) w = federation.get(name=self.transfer_variable.re_encrypted_model.name, tag=re_encrypted_model_id, idx=0) w = np.array(w) self.set_coef_(w) model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_model, iter_num) federation.remote(w, name=self.transfer_variable.host_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) if not self.use_encrypt: loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.host_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) LOGGER.debug("model and loss sent") final_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=final_model_id, idx=0) w = np.array(w) # LOGGER.debug("Recevide model from arbiter, model: {}".format(w)) self.set_coef_(w) converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get(name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge_flag: {}".format(converge_flag)) if converge_flag: break # self.save_model() def __init_parameters(self, data_instances): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_party_weight ) # LOGGER.debug("party_weight_id: {}".format(party_weight_id)) federation.remote(self.party_weight, name=self.transfer_variable.host_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) self.__synchronize_encryption() # Send re-encrypt times self.mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) if self.use_encrypt: # LOGGER.debug("Use encryption, send re_encrypt_times") total_batch_num = self.mini_batch_obj.batch_nums re_encrypt_times = total_batch_num // self.re_encrypt_batches transfer_id = self.transfer_variable.generate_transferid(self.transfer_variable.re_encrypt_times) federation.remote(re_encrypt_times, name=self.transfer_variable.re_encrypt_times.name, tag=transfer_id, role=consts.ARBITER, idx=0) LOGGER.info("sent re_encrypt_times: {}".format(re_encrypt_times)) def __synchronize_encryption(self): """ Communicate with hosts. Specify whether use encryption or not and transfer the public keys. """ # Send if this host use encryption or not use_encryption_id = self.transfer_variable.generate_transferid( self.transfer_variable.use_encrypt ) federation.remote(self.use_encrypt, name=self.transfer_variable.use_encrypt.name, tag=use_encryption_id, role=consts.ARBITER, idx=0) # Set public key if self.use_encrypt: pubkey_id = self.transfer_variable.generate_transferid(self.transfer_variable.paillier_pubkey) pubkey = federation.get(name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, idx=0) self.encrypt_operator.set_public_key(pubkey) LOGGER.info("Finish synchronized ecryption") self.has_sychronized_encryption = True def predict(self, data_instances, predict_param): if not self.has_sychronized_encryption: self.__synchronize_encryption() self.__load_arbiter_model() else: LOGGER.info("in predict, has synchronize encryption information") wx = self.compute_wx(data_instances, self.coef_, self.intercept_) if self.use_encrypt: encrypted_wx_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_wx) # LOGGER.debug("predict_wd_id: {}".format(encrypted_wx_id)) federation.remote(wx, name=self.transfer_variable.predict_wx.name, tag=encrypted_wx_id, role=consts.ARBITER, idx=0) predict_result_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_result) # LOGGER.debug("predict_result_id: {}".format(predict_result_id)) predict_result = federation.get(name=self.transfer_variable.predict_result.name, tag=predict_result_id, idx=0) # local_predict_table = predict_result.collect() predict_result_table = predict_result.join(data_instances, lambda p, d: (d.label, None, p)) else: pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result_table = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result_table def __init_model(self, data_instances): model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) w = self.encrypt_operator.encrypt_list(w) w = np.array(w) # LOGGER.debug("self use encryption: {}, w: {}, type of w: {}".format(self.use_encrypt, w, type(w))) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Type of coef: {}".format(type(self.coef_))) return w def __load_arbiter_model(self): final_model_id = self.transfer_variable.generate_transferid(self.transfer_variable.final_model, "predict") final_model = federation.get(name=self.transfer_variable.final_model.name, tag=final_model_id, idx=0) self.set_coef_(final_model)
class HomoLRGuest(BaseLogisticRegression): def __init__(self, params: LogisticParam): super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() self.header = [] self.penalty = params.penalty self.loss_history = [] self.is_converged = False def fit(self, data_instances): self._abnormal_detection(data_instances) self.header = data_instances.schema.get( 'header') # ['x1', 'x2', 'x3' ... ] self.__init_parameters() self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: n = batch_data.count() f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num w = self.merge_model() self.loss_history.append(total_loss) LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: self.is_converged = True break self.show_meta() self.show_model() LOGGER.debug("in fit self coef: {}".format(self.coef_)) return data_instances def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = data_overview.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances, predict_param): LOGGER.debug("coef: {}, intercept: {}".format(self.coef_, self.intercept_)) wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result def set_flowid(self, flowid=0): self.transfer_variable.set_flowid(flowid)
class HomoLRGuest(BaseLogisticRegression): def __init__(self, params: LogisticParam): """ :param penalty: l1 or l2 :param alpha: :param lr: :param eps: :param max_iter: :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad'] :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size """ super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() def fit(self, data_instances): LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}" "batch_size: {}".format(self.alpha, self.eps, self.max_iter, self.batch_size)) self.__init_parameters() w = self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch # LOGGER.debug("Enter iter_num: {}".format(iter_num)) batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) n = grad_loss.count() grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) # LOGGER.debug("before update: {}".format(grad)) delta_grad = self.optimizer.apply_gradients(grad) # LOGGER.debug("after apply: {}".format(delta_grad)) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num w = self.merge_model() LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) # LOGGER.debug("Received final model: {}".format(w)) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: # self.save_model(w) break # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format( # self.coef_, self.intercept_)) def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = self.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances, predict_param): wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result def set_flowid(self, flowid=0): self.transfer_variable.set_flowid(flowid)