def test_initializer(self): initializer = Initializer() data_shape = 10 init_param_obj = InitParam(init_method=consts.RANDOM_NORMAL, init_const=20, fit_intercept=False) model = initializer.init_model(model_shape=data_shape, init_params=init_param_obj) model_shape = model.shape self.assertTrue(model_shape == (10, ))
class HomoLRHost(HomoLRBase): def __init__(self): super(HomoLRHost, self).__init__() self.aggregator = HomoFederatedAggregator() self.initializer = Initializer() self.mini_batch_obj = None self.classes_ = [0, 1] self.has_sychronized_encryption = False self.role = consts.HOST def _init_model(self, params): super(HomoLRHost, self)._init_model(params) encrypt_params = params.encrypt_param if encrypt_params.method in [consts.PAILLIER]: self.use_encrypt = True else: self.use_encrypt = False if self.use_encrypt and params.penalty == 'L1': raise RuntimeError( "Encrypted h**o-lr supports L2 penalty or 'none' only") if self.use_encrypt: self.gradient_operator = TaylorLogisticGradient() self.re_encrypt_batches = params.re_encrypt_batches else: self.gradient_operator = LogisticGradient() def fit(self, data_instances): if not self.need_run: return data_instances self.init_schema(data_instances) LOGGER.debug("Before trainning, self.header: {}".format(self.header)) self._abnormal_detection(data_instances) self.__init_parameters(data_instances) w = self.__init_model(data_instances) for iter_num in range(self.max_iter): # mini-batch LOGGER.debug("In iter: {}".format(iter_num)) batch_data_generator = self.mini_batch_obj.mini_batch_data_generator( ) batch_num = 0 total_loss = 0 for batch_data in batch_data_generator: f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) n = batch_data.count() if not self.use_encrypt: grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad = np.array(grad) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += loss + loss_norm # if not self.use_loss: # total_loss = np.linalg.norm(self.coef_) if not self.need_one_vs_rest: metric_meta = MetricMeta( name='train', metric_type="LOSS", extra_metas={"unit_name": "iters"}) metric_name = self.get_metric_name('loss') self.callback_meta(metric_name=metric_name, metric_namespace='train', metric_meta=metric_meta) self.callback_metric( metric_name=metric_name, metric_namespace='train', metric_data=[Metric(iter_num, total_loss)]) else: grad, _ = grad_loss.reduce(self.aggregator.aggregate_grad) grad = np.array(grad) grad /= n self.update_model(grad) w = self.merge_model() batch_num += 1 if self.use_encrypt and batch_num % self.re_encrypt_batches == 0: to_encrypt_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.to_encrypt_model, iter_num, batch_num) federation.remote( w, name=self.transfer_variable.to_encrypt_model.name, tag=to_encrypt_model_id, role=consts.ARBITER, idx=0) re_encrypted_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypted_model, iter_num, batch_num) LOGGER.debug("re_encrypted_model_id: {}".format( re_encrypted_model_id)) w = federation.get( name=self.transfer_variable.re_encrypted_model.name, tag=re_encrypted_model_id, idx=0) w = np.array(w) self.set_coef_(w) model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_model, iter_num) federation.remote(w, name=self.transfer_variable.host_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) if not self.use_encrypt: loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.host_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) LOGGER.debug("model and loss sent") final_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=final_model_id, idx=0) w = np.array(w) self.set_coef_(w) converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge_flag: {}".format(converge_flag)) if converge_flag: break # self.save_model() def __init_parameters(self, data_instances): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_party_weight) LOGGER.debug( "Start to remote party_weight: {}, transfer_id: {}".format( self.party_weight, party_weight_id)) federation.remote(self.party_weight, name=self.transfer_variable.host_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) self.__synchronize_encryption() # Send re-encrypt times self.mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) if self.use_encrypt: # LOGGER.debug("Use encryption, send re_encrypt_times") total_batch_num = self.mini_batch_obj.batch_nums re_encrypt_times = total_batch_num // self.re_encrypt_batches transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypt_times) LOGGER.debug( "Start to remote re_encrypt_times: {}, transfer_id: {}".format( re_encrypt_times, transfer_id)) federation.remote( re_encrypt_times, name=self.transfer_variable.re_encrypt_times.name, tag=transfer_id, role=consts.ARBITER, idx=0) LOGGER.info("sent re_encrypt_times: {}".format(re_encrypt_times)) def __synchronize_encryption(self, mode='train'): """ Communicate with hosts. Specify whether use encryption or not and transfer the public keys. """ # Send if this host use encryption or not use_encryption_id = self.transfer_variable.generate_transferid( self.transfer_variable.use_encrypt, mode) LOGGER.debug("Start to remote use_encrypt: {}, transfer_id: {}".format( self.use_encrypt, use_encryption_id)) federation.remote(self.use_encrypt, name=self.transfer_variable.use_encrypt.name, tag=use_encryption_id, role=consts.ARBITER, idx=0) # Set public key if self.use_encrypt: pubkey_id = self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey, mode) pubkey = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, idx=0) LOGGER.debug("Received pubkey") self.encrypt_operator.set_public_key(pubkey) LOGGER.info("Finish synchronized ecryption") self.has_sychronized_encryption = True def predict(self, data_instances): if not self.need_run: return data_instances if not self.has_sychronized_encryption: self.__synchronize_encryption(mode='predict') self.__load_arbiter_model() else: LOGGER.info("in predict, has synchronize encryption information") feature_shape = get_features_shape(data_instances) LOGGER.debug("Shape of coef_ : {}, feature shape: {}".format( len(self.coef_), feature_shape)) local_data = data_instances.first() LOGGER.debug("One data, features: {}".format(local_data[1].features)) wx = self.compute_wx(data_instances, self.coef_, self.intercept_) if self.use_encrypt: encrypted_wx_id = self.transfer_variable.generate_transferid( self.transfer_variable.predict_wx) LOGGER.debug("Host encrypted wx id: {}".format(encrypted_wx_id)) LOGGER.debug("Start to remote wx: {}, transfer_id: {}".format( wx, encrypted_wx_id)) federation.remote(wx, name=self.transfer_variable.predict_wx.name, tag=encrypted_wx_id, role=consts.ARBITER, idx=0) predict_result_id = self.transfer_variable.generate_transferid( self.transfer_variable.predict_result) LOGGER.debug("predict_result_id: {}".format(predict_result_id)) predict_result = federation.get( name=self.transfer_variable.predict_result.name, tag=predict_result_id, idx=0) # local_predict_table = predict_result.collect() LOGGER.debug( "predict_result count: {}, data_instances count: {}".format( predict_result.count(), data_instances.count())) predict_result_table = predict_result.join( data_instances, lambda p, d: [d.label, None, p, { "0": None, "1": None }]) else: pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, self.predict_param.threshold) if self.predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result_table = predict_result.join( pred_label, lambda x, y: [x[0], y, x[1], { "0": None, "1": None }]) LOGGER.debug("Finish predict") LOGGER.debug("In host predict, predict_result_table is : {}".format( predict_result_table.first())) return predict_result_table def __init_model(self, data_instances): model_shape = data_overview.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) w = self.encrypt_operator.encrypt_list(w) w = np.array(w) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 return w def __load_arbiter_model(self): final_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, "predict") final_model = federation.get( name=self.transfer_variable.final_model.name, tag=final_model_id, idx=0) # LOGGER.info("Received arbiter's model") # LOGGER.debug("final_model: {}".format(final_model)) self.set_coef_(final_model) def _get_param(self): if self.need_one_vs_rest: one_vs_rest_class = list(map(str, self.one_vs_rest_obj.classes)) param_protobuf_obj = lr_model_param_pb2.LRModelParam( iters=self.n_iter_, loss_history=[], is_converged=self.is_converged, weight={}, intercept=0, need_one_vs_rest=self.need_one_vs_rest, one_vs_rest_classes=one_vs_rest_class) return param_protobuf_obj header = self.header weight_dict = {} for idx, header_name in enumerate(header): coef_i = self.coef_[idx] weight_dict[header_name] = coef_i param_protobuf_obj = lr_model_param_pb2.LRModelParam( iters=self.n_iter_, loss_history=[], is_converged=self.is_converged, weight={}, intercept=0, need_one_vs_rest=self.need_one_vs_rest, header=header) from google.protobuf import json_format json_result = json_format.MessageToJson(param_protobuf_obj) LOGGER.debug("json_result: {}".format(json_result)) return param_protobuf_obj
class TestHomoLR(unittest.TestCase): def setUp(self): self.guest_X = np.array([[1, 2, 3, 4, 5], [3, 2, 4, 5, 1], [ 2, 2, 3, 1, 1, ]]) / 10 self.guest_Y = np.array([[1], [1], [-1]]) self.values = [] for idx, x in enumerate(self.guest_X): inst = Instance(inst_id=idx, features=x, label=self.guest_Y[idx]) self.values.append((idx, inst)) self.host_X = np.array([[1, 1.2, 3.1, 4, 5], [2.3, 2, 4, 5.3, 1], [ 2, 2.2, 1.3, 1, 1.6, ]]) / 10 self.host_Y = np.array([[-1], [1], [-1]]) self.host_values = [] for idx, x in enumerate(self.host_X): inst = Instance(inst_id=idx, features=x, label=self.host_Y[idx]) self.values.append((idx, inst)) self.max_iter = 10 self.alpha = 0.01 self.learning_rate = 0.01 optimizer = 'SGD' self.gradient_operator = LogisticGradient() self.initializer = Initializer() self.fit_intercept = True self.init_param_obj = InitParam(fit_intercept=self.fit_intercept) self.updater = L2Updater(self.alpha, self.learning_rate) self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=optimizer) self.__init_model() def __init_model(self): model_shape = self.guest_X.shape[1] w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 return w def __init_host_model(self): model_shape = self.host_X.shape[1] w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.host_coef_ = w[:-1] self.host_intercept_ = w[-1] else: self.host_coef_ = w self.host_intercept_ = 0 return w def test_one_iter(self): w = self.__init_model() print("before training, coef: {}, intercept: {}".format( self.coef_, self.intercept_)) self.assertEqual(self.coef_.shape[0], self.guest_X.shape[1]) grad, loss = self.gradient_operator.compute( self.values, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) loss_norm = self.updater.loss_norm(self.coef_) loss = loss + loss_norm delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) print("After training, coef: {}, intercept: {}, loss: {}".format( self.coef_, self.intercept_, loss)) def test_multi_iter(self): w = self.__init_model() loss_hist = [100] for iter_num in range(self.max_iter): grad, loss = self.gradient_operator.compute( self.values, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) loss_norm = self.updater.loss_norm(self.coef_) loss = loss + loss_norm delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) self.assertTrue(loss <= loss_hist[-1]) loss_hist.append(loss) print(loss_hist) def test_host_iter(self): w = self.__init_host_model() print("before training, coef: {}, intercept: {}".format( self.coef_, self.intercept_)) self.assertEqual(self.host_coef_.shape[0], self.host_X.shape[1]) grad, loss = self.gradient_operator.compute( self.host_values, coef=self.host_coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) loss_norm = self.updater.loss_norm(self.coef_) # print("***********************************************") # print(loss, loss_norm) self.assertTrue(loss is None) def update_model(self, gradient): LOGGER.debug( "In update_model function, shape of coef: {}, shape of gradient: {}" .format(np.shape(self.coef_), np.shape(gradient))) if self.fit_intercept: if self.updater is not None: self.coef_ = self.updater.update_coef(self.coef_, gradient[:-1]) else: self.coef_ = self.coef_ - gradient[:-1] self.intercept_ -= gradient[-1] else: if self.updater is not None: self.coef_ = self.updater.update_coef(self.coef_, gradient) else: self.coef_ = self.coef_ - gradient
class HomoLRHost(BaseLogisticRegression): def __init__(self, params: LogisticParam): super(HomoLRHost, self).__init__(params) self.learning_rate = params.learning_rate self.batch_size = params.batch_size self.encrypt_params = params.encrypt_param if self.encrypt_params.method in [consts.PAILLIER]: self.use_encrypt = True else: self.use_encrypt = False if self.use_encrypt and params.penalty != consts.L2_PENALTY: raise RuntimeError("Encrypted h**o-lr supports L2 penalty only") if self.use_encrypt: self.gradient_operator = TaylorLogisticGradient() self.re_encrypt_batches = params.re_encrypt_batches else: self.gradient_operator = LogisticGradient() self.aggregator = HomoFederatedAggregator() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.mini_batch_obj = None self.evaluator = Evaluation(classi_type=consts.BINARY) self.classes_ = [0, 1] self.has_sychronized_encryption = False def fit(self, data_instances): LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}" "batch_size: {}".format(self.alpha, self.eps, self.max_iter, self.batch_size)) self.__init_parameters(data_instances) w = self.__init_model(data_instances) for iter_num in range(self.max_iter): # mini-batch LOGGER.debug("In iter: {}".format(iter_num)) batch_data_generator = self.mini_batch_obj.mini_batch_data_generator() batch_num = 0 total_loss = 0 for batch_data in batch_data_generator: f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) n = grad_loss.count() if not self.use_encrypt: grad, loss = grad_loss.reduce(self.aggregator.aggregate_grad_loss) grad = np.array(grad) grad /= n loss /= n # gradient_regular, loss_regular = self.updater.compute(w) if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += loss + loss_norm # LOGGER.debug("iter: {}, grad: {}, loss: {}".format(iter_num, grad, loss)) else: grad, _ = grad_loss.reduce(self.aggregator.aggregate_grad) grad = np.array(grad) grad /= n # gradient_regular = self.updater.gradient_norm(w) # grad += gradient_regular # grad = np.array(grad) self.update_model(grad) w = self.merge_model() batch_num += 1 if self.use_encrypt and batch_num % self.re_encrypt_batches == 0: to_encrypt_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.to_encrypt_model, iter_num, batch_num ) federation.remote(w, name=self.transfer_variable.to_encrypt_model.name, tag=to_encrypt_model_id, role=consts.ARBITER, idx=0) re_encrypted_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypted_model, iter_num, batch_num ) LOGGER.debug("re_encrypted_model_id: {}".format(re_encrypted_model_id)) w = federation.get(name=self.transfer_variable.re_encrypted_model.name, tag=re_encrypted_model_id, idx=0) w = np.array(w) self.set_coef_(w) model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_model, iter_num) federation.remote(w, name=self.transfer_variable.host_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) if not self.use_encrypt: loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.host_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) LOGGER.debug("model and loss sent") final_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=final_model_id, idx=0) w = np.array(w) # LOGGER.debug("Recevide model from arbiter, model: {}".format(w)) self.set_coef_(w) converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get(name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge_flag: {}".format(converge_flag)) if converge_flag: break # self.save_model() def __init_parameters(self, data_instances): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.host_party_weight ) # LOGGER.debug("party_weight_id: {}".format(party_weight_id)) federation.remote(self.party_weight, name=self.transfer_variable.host_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) self.__synchronize_encryption() # Send re-encrypt times self.mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) if self.use_encrypt: # LOGGER.debug("Use encryption, send re_encrypt_times") total_batch_num = self.mini_batch_obj.batch_nums re_encrypt_times = total_batch_num // self.re_encrypt_batches transfer_id = self.transfer_variable.generate_transferid(self.transfer_variable.re_encrypt_times) federation.remote(re_encrypt_times, name=self.transfer_variable.re_encrypt_times.name, tag=transfer_id, role=consts.ARBITER, idx=0) LOGGER.info("sent re_encrypt_times: {}".format(re_encrypt_times)) def __synchronize_encryption(self): """ Communicate with hosts. Specify whether use encryption or not and transfer the public keys. """ # Send if this host use encryption or not use_encryption_id = self.transfer_variable.generate_transferid( self.transfer_variable.use_encrypt ) federation.remote(self.use_encrypt, name=self.transfer_variable.use_encrypt.name, tag=use_encryption_id, role=consts.ARBITER, idx=0) # Set public key if self.use_encrypt: pubkey_id = self.transfer_variable.generate_transferid(self.transfer_variable.paillier_pubkey) pubkey = federation.get(name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, idx=0) self.encrypt_operator.set_public_key(pubkey) LOGGER.info("Finish synchronized ecryption") self.has_sychronized_encryption = True def predict(self, data_instances, predict_param): if not self.has_sychronized_encryption: self.__synchronize_encryption() self.__load_arbiter_model() else: LOGGER.info("in predict, has synchronize encryption information") wx = self.compute_wx(data_instances, self.coef_, self.intercept_) if self.use_encrypt: encrypted_wx_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_wx) # LOGGER.debug("predict_wd_id: {}".format(encrypted_wx_id)) federation.remote(wx, name=self.transfer_variable.predict_wx.name, tag=encrypted_wx_id, role=consts.ARBITER, idx=0) predict_result_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_result) # LOGGER.debug("predict_result_id: {}".format(predict_result_id)) predict_result = federation.get(name=self.transfer_variable.predict_result.name, tag=predict_result_id, idx=0) # local_predict_table = predict_result.collect() predict_result_table = predict_result.join(data_instances, lambda p, d: (d.label, None, p)) else: pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result_table = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result_table def __init_model(self, data_instances): model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) w = self.encrypt_operator.encrypt_list(w) w = np.array(w) # LOGGER.debug("self use encryption: {}, w: {}, type of w: {}".format(self.use_encrypt, w, type(w))) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Type of coef: {}".format(type(self.coef_))) return w def __load_arbiter_model(self): final_model_id = self.transfer_variable.generate_transferid(self.transfer_variable.final_model, "predict") final_model = federation.get(name=self.transfer_variable.final_model.name, tag=final_model_id, idx=0) self.set_coef_(final_model)
class HomoLRGuest(BaseLogisticRegression): def __init__(self, params: LogisticParam): super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() self.header = [] self.penalty = params.penalty self.loss_history = [] self.is_converged = False def fit(self, data_instances): self._abnormal_detection(data_instances) self.header = data_instances.schema.get( 'header') # ['x1', 'x2', 'x3' ... ] self.__init_parameters() self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: n = batch_data.count() f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num w = self.merge_model() self.loss_history.append(total_loss) LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: self.is_converged = True break self.show_meta() self.show_model() LOGGER.debug("in fit self coef: {}".format(self.coef_)) return data_instances def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = data_overview.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances, predict_param): LOGGER.debug("coef: {}, intercept: {}".format(self.coef_, self.intercept_)) wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result def set_flowid(self, flowid=0): self.transfer_variable.set_flowid(flowid)
class HomoLRGuest(HomoLRBase): def __init__(self): super(HomoLRGuest, self).__init__() self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() self.loss_history = [] self.is_converged = False self.role = consts.GUEST def fit(self, data_instances): if not self.need_run: return data_instances self._abnormal_detection(data_instances) self.init_schema(data_instances) self.__init_parameters() self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: n = batch_data.count() f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num # if not self.use_loss: # total_loss = np.linalg.norm(self.coef_) w = self.merge_model() if not self.need_one_vs_rest: metric_meta = MetricMeta(name='train', metric_type="LOSS", extra_metas={ "unit_name": "iters", }) # metric_name = self.get_metric_name('loss') self.callback_meta(metric_name='loss', metric_namespace='train', metric_meta=metric_meta) self.callback_metric( metric_name='loss', metric_namespace='train', metric_data=[Metric(iter_num, total_loss)]) self.loss_history.append(total_loss) LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) LOGGER.debug("Start to remote model: {}, transfer_id: {}".format( w, model_transfer_id)) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss # if self.use_loss: loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) LOGGER.debug( "Start to remote total_loss: {}, transfer_id: {}".format( total_loss, loss_transfer_id)) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: self.is_converged = True break def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) LOGGER.debug( "Start to remote party_weight: {}, transfer_id: {}".format( self.party_weight, party_weight_id)) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = data_overview.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances): if not self.need_run: return data_instances LOGGER.debug( "homo_lr guest need run predict, coef: {}, instercept: {}".format( len(self.coef_), self.intercept_)) wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, self.predict_param.threshold) predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) predict_result = predict_result.join( pred_label, lambda x, y: [x[0], y, x[1], { "1": x[1], "0": (1 - x[1]) }]) return predict_result
class HomoLRGuest(BaseLogisticRegression): def __init__(self, params: LogisticParam): """ :param penalty: l1 or l2 :param alpha: :param lr: :param eps: :param max_iter: :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad'] :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size """ super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() def fit(self, data_instances): LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}" "batch_size: {}".format(self.alpha, self.eps, self.max_iter, self.batch_size)) self.__init_parameters() w = self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch # LOGGER.debug("Enter iter_num: {}".format(iter_num)) batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) n = grad_loss.count() grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) # LOGGER.debug("before update: {}".format(grad)) delta_grad = self.optimizer.apply_gradients(grad) # LOGGER.debug("after apply: {}".format(delta_grad)) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num w = self.merge_model() LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) # LOGGER.debug("Received final model: {}".format(w)) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: # self.save_model(w) break # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format( # self.coef_, self.intercept_)) def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = self.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances, predict_param): wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result def set_flowid(self, flowid=0): self.transfer_variable.set_flowid(flowid)