def compute_gradient_procedure(self, cipher_operator, optimizer, n_iter_, batch_index): self.batch_index = batch_index self.n_iter = n_iter_ # LOGGER.debug("In compute_gradient_procedure, n_iter: {}, batch_index: {}, iter_k: {}".format( # self.n_iter, self.batch_index, self.iter_k # )) optimizer.set_hess_matrix(self.opt_Hess) delta_grad = self.gradient_computer.compute_gradient_procedure( cipher_operator, optimizer, n_iter_, batch_index) self._update_w_tilde( LinearModelWeights(delta_grad, fit_intercept=False)) if self.iter_k % self.update_interval_L == 0: self.count_t += 1 # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) self.this_w_tilde /= self.update_interval_L # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) if self.count_t > 0: LOGGER.info( "iter_k: {}, count_t: {}, start to update hessian".format( self.iter_k, self.count_t)) self._update_hessian(cipher_operator) self.last_w_tilde = self.this_w_tilde self.this_w_tilde = LinearModelWeights( np.zeros_like(self.last_w_tilde.unboxed), self.last_w_tilde.fit_intercept) return delta_grad
def regularization_update(self, model_weights: LinearModelWeights, grad, prev_round_weights: LinearModelWeights = None): # LOGGER.debug(f"In regularization_update, input model_weights: {model_weights.unboxed}") if self.penalty == consts.L1_PENALTY: model_weights = self._l1_updator(model_weights, grad) elif self.penalty == consts.L2_PENALTY: model_weights = self._l2_updator(model_weights, grad) else: new_vars = model_weights.unboxed - grad model_weights = LinearModelWeights(new_vars, model_weights.fit_intercept, model_weights.raise_overflow_error) if prev_round_weights is not None: # additional proximal term coef_ = model_weights.unboxed if model_weights.fit_intercept: coef_without_intercept = coef_[: -1] else: coef_without_intercept = coef_ coef_without_intercept -= self.mu * (model_weights.coef_ - prev_round_weights.coef_) if model_weights.fit_intercept: new_coef_ = np.append(coef_without_intercept, coef_[-1]) else: new_coef_ = coef_without_intercept model_weights = LinearModelWeights(new_coef_, model_weights.fit_intercept, model_weights.raise_overflow_error) return model_weights
def hess_vector_norm(self, delta_s: LinearModelWeights): if self.penalty == consts.L1_PENALTY: return LinearModelWeights(np.zeros_like(delta_s.unboxed), fit_intercept=delta_s.fit_intercept) elif self.penalty == consts.L2_PENALTY: return LinearModelWeights(self.alpha * np.array(delta_s.unboxed), fit_intercept=delta_s.fit_intercept) else: return LinearModelWeights(np.zeros_like(delta_s.unboxed), fit_intercept=delta_s.fit_intercept)
def test_optimizer(self): model_weights = LinearModelWeights( np.array([ 0.10145129, 0.39987222, -0.96630206, -0.41208423, -0.24609715, -0.70518652, 0.71478064, 0.57973894, 0.5703622, -0.45482125, 0.32676194, -0.00648212, 0.35542874, -0.26412695, -0.07964603, 1.2158522, -0.41255564, -0.01686044, -0.99897542, 1.56407211, 0.52040711, 0.24568055, 0.4880494, 0.52269909, -0.14431923, 0.03282471, 0.09437969, 0.21407206, -0.270922 ]), True) prev_model_weights = LinearModelWeights( np.array([ 0.10194331, 0.40062114, -0.96597859, -0.41202348, -0.24587005, -0.7047801, 0.71515712, 0.58045583, 0.57079086, -0.45473676, 0.32775863, -0.00633238, 0.35567219, -0.26343469, -0.07964763, 1.2165642, -0.41244749, -0.01589344, -0.99862982, 1.56498698, 0.52058152, 0.24572171, 0.48809946, 0.52272993, -0.14330367, 0.03283002, 0.09439601, 0.21433497, -0.27011673 ]), True) prev_model_weights_null = None eps = 0.00001 ## 1: alpha = 0, no regularization learning_rate = 0.2 alpha = 0 penalty = "L2" decay = "0.2" decay_sqrt = "true" mu = 0.01 init_params = [learning_rate, alpha, penalty, decay, decay_sqrt, mu] optimizer = _SgdOptimizer(*init_params) loss_norm = optimizer.loss_norm(model_weights, prev_model_weights_null) self.assertTrue(math.fabs(loss_norm) <= eps) # == 0 ## 2 alpha = 0.1 init_params = [learning_rate, alpha, penalty, decay, decay_sqrt, mu] optimizer = _SgdOptimizer(*init_params) loss_norm = optimizer.loss_norm(model_weights, prev_model_weights_null) print("loss_norm = {}".format(loss_norm)) self.assertTrue(math.fabs(loss_norm - 0.47661579875266186) <= eps) ##3 loss_norm = optimizer.loss_norm(model_weights, prev_model_weights) print("loss_norm = {}".format(loss_norm)) self.assertTrue(math.fabs(loss_norm - 0.47661583737200075) <= eps)
def compute_gradient_procedure(self, *args): data_instances = args[0] encrypted_calculator = args[1] model_weights = args[2] optimizer = args[3] self.batch_index = args[5] self.n_iter = args[4] cipher_operator = encrypted_calculator[0].encrypter # one_data = data_instances.first() # LOGGER.debug("data shape: {}, model weights shape: {}, model weights coef: {}, intercept: {}".format( # one_data[1].features.shape, model_weights.unboxed.shape, model_weights.coef_, model_weights.intercept_ # )) gradient_results = self.gradient_computer.compute_gradient_procedure(*args) self._update_w_tilde(model_weights) if self.iter_k % self.update_interval_L == 0: self.count_t += 1 # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) self.this_w_tilde /= self.update_interval_L # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) if self.count_t > 0: LOGGER.info("iter_k: {}, count_t: {}, start to update hessian".format(self.iter_k, self.count_t)) self._update_hessian(data_instances, optimizer, cipher_operator) self.last_w_tilde = self.this_w_tilde self.this_w_tilde = LinearModelWeights(np.zeros_like(self.last_w_tilde.unboxed), self.last_w_tilde.fit_intercept) # LOGGER.debug("After replace, last_w_tilde: {}, this_w_tilde: {}".format(self.last_w_tilde.unboxed, # self.this_w_tilde.unboxed)) return gradient_results
def compute_gradient_procedure(self, *args, **kwargs): data_instances = args[0] cipher = args[1] model_weights = args[2] optimizer = args[3] self.batch_index = args[5] self.n_iter = args[4] gradient_results = self.gradient_computer.compute_gradient_procedure( *args) self._update_w_tilde(model_weights) if self.iter_k % self.update_interval_L == 0: self.count_t += 1 # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) self.this_w_tilde /= self.update_interval_L # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) if self.count_t > 0: LOGGER.info( "iter_k: {}, count_t: {}, start to update hessian".format( self.iter_k, self.count_t)) self._update_hessian(data_instances, optimizer, cipher) self.last_w_tilde = self.this_w_tilde self.this_w_tilde = LinearModelWeights( np.zeros_like(self.last_w_tilde.unboxed), self.last_w_tilde.fit_intercept, raise_overflow_error=self.raise_weight_overflow_error) # LOGGER.debug("After replace, last_w_tilde: {}, this_w_tilde: {}".format(self.last_w_tilde.unboxed, # self.this_w_tilde.unboxed)) return gradient_results
def init_model(self, model_shape, init_params, data_instance=None): init_method = init_params.init_method fit_intercept = init_params.fit_intercept random_seed = init_params.random_seed np.random.seed(random_seed) if fit_intercept: if isinstance(model_shape, int): model_shape += 1 else: new_shape = [] for ds in model_shape: new_shape.append(ds + 1) model_shape = tuple(new_shape) if init_method == 'random_normal': w = self.random_normal(model_shape) elif init_method == 'random_uniform': w = self.random_uniform(model_shape) elif init_method == 'ones': w = self.ones(model_shape) elif init_method == 'zeros': w = self.zeros(model_shape, fit_intercept, data_instance) elif init_method == 'const': init_const = init_params.init_const w = self.constant(model_shape, const=init_const) else: raise NotImplementedError("Initial method cannot be recognized: {}".format(init_method)) LOGGER.debug("Initialed model: {}".format(w)) lr_weights = LinearModelWeights(w, init_params.fit_intercept) return lr_weights
def load_single_model(self, single_model_obj): LOGGER.info("start to load single model") if self.is_respectively_reveal: self.load_single_model_weight(single_model_obj) else: feature_shape = len(self.header) tmp_vars = [None] * feature_shape weight_dict = dict(single_model_obj.encrypted_weight) for idx, header_name in enumerate(self.header): cipher_weight = weight_dict.get(header_name) public_key = PaillierPublicKey(int(cipher_weight.public_key.n)) cipher_text = int(cipher_weight.cipher_text) exponent = int(cipher_weight.exponent) is_obfuscator = cipher_weight.is_obfuscator coef_i = PaillierEncryptedNumber(public_key, cipher_text, exponent) if is_obfuscator: coef_i.apply_obfuscator() tmp_vars[idx] = coef_i self.model_weights = LinearModelWeights(tmp_vars, fit_intercept=self.fit_intercept) self.n_iter_ = single_model_obj.iters return self
def _init_model(self, params): super()._init_model(params) self.model_weights = LinearModelWeights( [], fit_intercept=self.fit_intercept) self.one_vs_rest_obj = one_vs_rest_factory(self, role=self.role, mode=self.mode, has_arbiter=True)
def regularization_update(self, model_weights: LinearModelWeights, grad, prev_round_weights: LinearModelWeights = None): LOGGER.debug( f"In regularization_update, input model_weights: {model_weights.unboxed}" ) if self.penalty == consts.L1_PENALTY: model_weights = self._l1_updator(model_weights, grad) elif self.penalty == consts.L2_PENALTY: model_weights = self._l2_updator(model_weights, grad) else: new_vars = model_weights.unboxed - grad model_weights = LinearModelWeights(new_vars, model_weights.fit_intercept) if prev_round_weights is not None: # additional proximal term coef_ = model_weights.unboxed if model_weights.fit_intercept: coef_without_intercept = coef_[:-1] else: coef_without_intercept = coef_ LOGGER.debug( "before applying additional proximal terms, weights {}".format( coef_without_intercept)) coef_without_intercept -= self.mu * (model_weights.coef_ - prev_round_weights.coef_) LOGGER.debug( "after applying additional proximal terms, new weights {}, with difference {}" .format(coef_without_intercept, model_weights.coef_ - prev_round_weights.coef_)) if model_weights.fit_intercept: new_coef_ = np.append(coef_without_intercept, coef_[-1]) else: new_coef_ = coef_without_intercept model_weights = LinearModelWeights(new_coef_, model_weights.fit_intercept) LOGGER.debug( f"In regularization_update, model_weights: {model_weights.unboxed}," f" grad: {grad}") return model_weights
def fit(self, data_instances=None, validate_data=None): self._server_check_data() host_ciphers = self.cipher.paillier_keygen( key_length=self.model_param.encrypt_param.key_length, suffix=('fit', )) host_has_no_cipher_ids = [ idx for idx, cipher in host_ciphers.items() if cipher is None ] self.re_encrypt_times = self.cipher.set_re_cipher_time(host_ciphers) max_iter = self.max_iter # validation_strategy = self.init_validation_strategy() while self.n_iter_ < max_iter + 1: suffix = (self.n_iter_, ) if ((self.n_iter_ + 1) % self.aggregate_iters == 0) or self.n_iter_ == max_iter: merged_model = self.aggregator.aggregate_and_broadcast( ciphers_dict=host_ciphers, suffix=suffix) total_loss = self.aggregator.aggregate_loss( host_has_no_cipher_ids, suffix) self.callback_loss(self.n_iter_, total_loss) self.loss_history.append(total_loss) if self.use_loss: converge_var = total_loss else: converge_var = np.array(merged_model.unboxed) self.is_converged = self.aggregator.send_converge_status( self.converge_func.is_converge, (converge_var, ), suffix=(self.n_iter_, )) LOGGER.info( "n_iters: {}, total_loss: {}, converge flag is :{}".format( self.n_iter_, total_loss, self.is_converged)) self.model_weights = LogisticRegressionWeights( merged_model.unboxed, self.model_param.init_param.fit_intercept) if self.header is None: self.header = [ 'x' + str(i) for i in range(len(self.model_weights.coef_)) ] if self.is_converged or self.n_iter_ == max_iter: break self.cipher.re_cipher(iter_num=self.n_iter_, re_encrypt_times=self.re_encrypt_times, host_ciphers_dict=host_ciphers, re_encrypt_batches=self.re_encrypt_batches) # validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 LOGGER.info("Finish Training task, total iters: {}".format( self.n_iter_))
def _l2_updator(self, lr_weights: LinearModelWeights, gradient): """ For l2 regularization, the regular term has been added in gradients. """ new_weights = lr_weights.unboxed - gradient new_param = LinearModelWeights(new_weights, lr_weights.fit_intercept) return new_param
def _init_model_variables(self, data_instances): model_shape = data_overview.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj, data_instance=data_instances) model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept) return model_weights
def regularization_update(self, model_weights: LinearModelWeights, grad): if self.penalty == consts.L1_PENALTY: model_weights = self._l1_updator(model_weights, grad) elif self.penalty == consts.L2_PENALTY: model_weights = self._l2_updator(model_weights, grad) else: new_vars = model_weights.unboxed - grad model_weights = LinearModelWeights(new_vars, model_weights.fit_intercept) return model_weights
def load_single_model_weight(self, single_model_obj): feature_shape = len(self.header) tmp_vars = np.zeros(feature_shape) weight_dict = dict(single_model_obj.weight) for idx, header_name in enumerate(self.header): tmp_vars[idx] = weight_dict.get(header_name) if self.fit_intercept: tmp_vars = np.append(tmp_vars, single_model_obj.intercept) self.model_weights = LinearModelWeights(tmp_vars, fit_intercept=self.fit_intercept)
def load_single_model(self, single_model_obj): LOGGER.info("It's a binary task, start to load single model") if self.role == consts.GUEST or self.is_respectively_reveal: feature_shape = len(self.header) tmp_vars = np.zeros(feature_shape) weight_dict = dict(single_model_obj.weight) for idx, header_name in enumerate(self.header): tmp_vars[idx] = weight_dict.get(header_name) if self.fit_intercept: tmp_vars = np.append(tmp_vars, single_model_obj.intercept) self.model_weights = LinearModelWeights( tmp_vars, fit_intercept=self.fit_intercept) self.n_iter_ = single_model_obj.iters return self
def _l1_updator(self, model_weights: LinearModelWeights, gradient): coef_ = model_weights.coef_ if model_weights.fit_intercept: gradient_without_intercept = gradient[: -1] else: gradient_without_intercept = gradient new_weights = np.sign(coef_ - gradient_without_intercept) * np.maximum(0, np.abs( coef_ - gradient_without_intercept) - self.shrinkage_val) if model_weights.fit_intercept: new_weights = np.append(new_weights, model_weights.intercept_) new_weights[-1] -= gradient[-1] new_param = LinearModelWeights(new_weights, model_weights.fit_intercept, model_weights.raise_overflow_error) # LOGGER.debug("In _l1_updator, original weight: {}, new_weights: {}".format( # model_weights.unboxed, new_weights # )) return new_param
def load_single_model(self, single_model_obj): super(HeteroLRHost, self).load_single_model(single_model_obj) if not self.is_respectively_reveal: feature_shape = len(self.header) tmp_vars = [None] * feature_shape weight_dict = dict(single_model_obj.encrypted_weight) for idx, header_name in enumerate(self.header): cipher_weight = weight_dict.get(header_name) public_key = PaillierPublicKey(int(cipher_weight.public_key.n)) cipher_text = int(cipher_weight.cipher_text) exponent = int(cipher_weight.exponent) is_obfuscator = cipher_weight.is_obfuscator coef_i = PaillierEncryptedNumber(public_key, cipher_text, exponent) if is_obfuscator: coef_i.apply_obfuscator() tmp_vars[idx] = coef_i self.model_weights = LinearModelWeights( tmp_vars, fit_intercept=self.fit_intercept)
def test_compute_fore_gradient(self): # fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards(self.data_inst, self.wx) model_weights = LinearModelWeights(l=self.w, fit_intercept=False) class EncryptedCalculator(object): encrypter = self.paillier_encrypt def encrypt_row(self, row): return np.array([self.encrypter.encrypt(row)]) def encrypt(self, input_data): return input_data.mapValues(self.encrypt_row) encrypted_calculator = [EncryptedCalculator()] batch_index = 0 fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards( self.data_inst, model_weights, encrypted_calculator, batch_index) fore_gradient_local = [ self.paillier_encrypt.decrypt(iterator[1]) for iterator in fore_gradient.collect() ] self.assertListEqual(fore_gradient_local, self.fore_gradient_local)
def fit(self, data_instances, validate_data=None): """ Train linR model of role guest Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_linR_guest fit") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) # Start gradient procedure optim_guest_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( data_instances, self.n_iter_, batch_index, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optim_guest_gradient) batch_index += 1 # LOGGER.debug( # "model_weights, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed)) self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) # LOGGER.debug("model weights is {}".format(self.model_weights.coef_)) if self.validation_strategy: LOGGER.debug('LinR guest running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 if self.is_converged: break if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model)
def fit(self, data_instances, validate_data=None): """ Train poisson regression model of role host Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_poisson host") self._abnormal_detection(data_instances) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) self.header = self.get_header(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() self.batch_generator.initialize_batch_generator(data_instances) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: batch_feat_inst = self.transform(batch_data) optim_host_gradient, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) self.gradient_loss_operator.compute_loss( batch_feat_inst, self.model_weights, self.encrypted_calculator, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model( self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("Get is_converged flag from arbiter:{}".format( self.is_converged)) if self.validation_strategy: LOGGER.debug('Poisson host running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.is_converged: break if not self.is_converged: LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter)) if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model) self.set_summary(self.get_model_summary())
def fit(self, data_instances, validate_data=None): """ Train poisson model of role guest Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_poisson_guest fit") self._abnormal_detection(data_instances) self.header = copy.deepcopy(self.get_header(data_instances)) validation_strategy = self.init_validation_strategy(data_instances, validate_data) self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname) if self.exposure_index > -1: self.header.pop(self.exposure_index) LOGGER.info("expsoure provided at Guest, colname is {}".format(self.exposure_colname)) exposure = data_instances.mapValues(lambda v: self.load_exposure(v)) data_instances = data_instances.mapValues(lambda v: self.load_instance(v)) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator(data_instances, self.batch_size) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) # compute offset of this batch batch_offset = exposure.join(batch_feat_inst, lambda ei, d: self.safe_log(ei)) # Start gradient procedure optimized_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index, batch_offset ) LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss(data_instances, self.model_weights, self.n_iter_, batch_index, batch_offset, loss_norm) self.model_weights = self.optimizer.update_model(self.model_weights, optimized_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 if self.is_converged: break
class HomoLRArbiter(HomoLRBase): def __init__(self): super(HomoLRArbiter, self).__init__() self.re_encrypt_times = [] # Record the times needed for each host self.loss_history = [] self.is_converged = False self.role = consts.ARBITER self.aggregator = aggregator.Arbiter() self.model_weights = None self.cipher = paillier_cipher.Arbiter() self.host_predict_results = [] def _init_model(self, params): super()._init_model(params) self.cipher.register_paillier_cipher(self.transfer_variable) def fit(self, data_instances=None, validate_data=None): host_ciphers = self.cipher.paillier_keygen( key_length=self.model_param.encrypt_param.key_length, suffix=('fit', )) host_has_no_cipher_ids = [ idx for idx, cipher in host_ciphers.items() if cipher is None ] self.re_encrypt_times = self.cipher.set_re_cipher_time(host_ciphers) max_iter = self.max_iter # validation_strategy = self.init_validation_strategy() while self.n_iter_ < max_iter + 1: suffix = (self.n_iter_, ) if ((self.n_iter_ + 1) % self.aggregate_iters == 0) or self.n_iter_ == max_iter: merged_model = self.aggregator.aggregate_and_broadcast( ciphers_dict=host_ciphers, suffix=suffix) total_loss = self.aggregator.aggregate_loss( host_has_no_cipher_ids, suffix) self.callback_loss(self.n_iter_, total_loss) self.loss_history.append(total_loss) if self.use_loss: converge_var = total_loss else: converge_var = np.array(merged_model.unboxed) self.is_converged = self.aggregator.send_converge_status( self.converge_func.is_converge, (converge_var, ), suffix=(self.n_iter_, )) LOGGER.info( "n_iters: {}, total_loss: {}, converge flag is :{}".format( self.n_iter_, total_loss, self.is_converged)) if self.is_converged or self.n_iter_ == max_iter: break self.model_weights = LogisticRegressionWeights( merged_model.unboxed, self.model_param.init_param.fit_intercept) if self.header is None: self.header = [ 'x' + str(i) for i in range(len(self.model_weights.coef_)) ] self.cipher.re_cipher(iter_num=self.n_iter_, re_encrypt_times=self.re_encrypt_times, host_ciphers_dict=host_ciphers, re_encrypt_batches=self.re_encrypt_batches) # validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 LOGGER.info("Finish Training task, total iters: {}".format( self.n_iter_)) def predict(self, data_instantces=None): LOGGER.info(f'Start predict task') current_suffix = ('predict', ) host_ciphers = self.cipher.paillier_keygen( key_length=self.model_param.encrypt_param.key_length, suffix=current_suffix) LOGGER.debug("Loaded arbiter model: {}".format( self.model_weights.unboxed)) for idx, cipher in host_ciphers.items(): if cipher is None: continue encrypted_model_weights = self.model_weights.encrypted( cipher, inplace=False) self.transfer_variable.aggregated_model.remote( obj=encrypted_model_weights.for_remote(), role=consts.HOST, idx=idx, suffix=current_suffix) # Receive wx results for idx, cipher in host_ciphers.items(): if cipher is None: continue encrypted_predict_wx = self.transfer_variable.predict_wx.get( idx=idx, suffix=current_suffix) predict_wx = cipher.distribute_decrypt(encrypted_predict_wx) prob_table = predict_wx.mapValues(lambda x: activation.sigmoid(x)) predict_table = prob_table.mapValues( lambda x: 1 if x > self.model_param.predict_param.threshold else 0) self.transfer_variable.predict_result.remote(predict_table, role=consts.HOST, idx=idx, suffix=current_suffix) self.host_predict_results.append((prob_table, predict_table))
def _renew_w_tilde(self): self.last_w_tilde = self.this_w_tilde self.this_w_tilde = LinearModelWeights(np.zeros_like(self.last_w_tilde.unboxed), self.last_w_tilde.fit_intercept)
def fit(self, data_instances, validate_data=None): """ Train poisson model of role guest Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_poisson_guest fit") # self._abnormal_detection(data_instances) # self.header = copy.deepcopy(self.get_header(data_instances)) self.prepare_fit(data_instances, validate_data) self.callback_list.on_train_begin(data_instances, validate_data) if with_weight(data_instances): LOGGER.warning( "input data with weight. Poisson regression does not support weighted training." ) self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname) exposure_index = self.exposure_index if exposure_index > -1: self.header.pop(exposure_index) LOGGER.info("Guest provides exposure value.") exposure = data_instances.mapValues( lambda v: HeteroPoissonBase.load_exposure(v, exposure_index)) data_instances = data_instances.mapValues( lambda v: HeteroPoissonBase.load_instance(v, exposure_index)) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # compute offset of this batch batch_offset = exposure.join( batch_data, lambda ei, d: HeteroPoissonBase.safe_log(ei)) # Start gradient procedure optimized_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.cipher_operator, self.model_weights, self.optimizer, self.n_iter_, batch_index, batch_offset) # LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( batch_data, self.model_weights, self.n_iter_, batch_index, batch_offset, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optimized_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())
def fit(self, data_instances, validate_data=None): """ Train linR model of role guest Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_linR_guest fit") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) self.callback_list.on_train_begin(data_instances, validate_data) # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() use_async = False if with_weight(data_instances): if self.model_param.early_stop == "diff": LOGGER.warning("input data with weight, please use 'weight_diff' for 'early_stop'.") data_instances = scale_sample_weight(data_instances) self.gradient_loss_operator.set_use_sample_weight() LOGGER.debug(f"instance weight scaled; use weighted gradient loss operator") # LOGGER.debug(f"data_instances after scale: {[v[1].weight for v in list(data_instances.collect())]}") elif len(self.component_properties.host_party_idlist) == 1: LOGGER.debug(f"set_use_async") self.gradient_loss_operator.set_use_async() use_async = True self.transfer_variable.use_async.remote(use_async) LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator(data_instances, self.batch_size) self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums) self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums)] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # Start gradient procedure optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index ) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss(batch_data, self.n_iter_, batch_index, loss_norm) self.model_weights = self.optimizer.update_model(self.model_weights, optim_guest_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())
def fit_binary(self, data_instances, validate_data): self._abnormal_detection(data_instances) validation_strategy = self.init_validation_strategy( data_instances, validate_data) LOGGER.debug( f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}" ) self.header = self.get_header(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() self.batch_generator.initialize_batch_generator(data_instances) self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) LOGGER.debug("model_shape: {}, w shape: {}, w: {}".format( model_shape, w.shape, w)) self.model_weights = LinearModelWeights( w, fit_intercept=self.init_param_obj.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() batch_index = 0 self.optimizer.set_iters(self.n_iter_) for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) LOGGER.debug( f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}" ) optim_host_gradient, fore_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) LOGGER.debug( 'optim_host_gradient: {}'.format(optim_host_gradient)) training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.update_local_model(fore_gradient, data_instances, self.model_weights.coef_, **training_info) self.gradient_loss_operator.compute_loss( self.model_weights, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model( self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("Get is_converged flag from arbiter:{}".format( self.is_converged)) validation_strategy.validate(self, self.n_iter_) self.n_iter_ += 1 LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.is_converged: break LOGGER.debug("Final lr weights: {}".format(self.model_weights.unboxed))
def fit_binary(self, data_instances, validate_data=None): LOGGER.info("Enter hetero_lr_guest fit") self.header = self.get_header(data_instances) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) data_instances = data_instances.mapValues(HeteroLRGuest.load_data) LOGGER.debug( f"MODEL_STEP After load data, data count: {data_instances.count()}" ) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = batch_data # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}") # Start gradient procedure LOGGER.debug( "iter: {}, before compute gradient, data count: {}".format( self.n_iter_, batch_feat_inst.count())) optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index) # LOGGER.debug('optim_guest_gradient: {}'.format(optim_guest_gradient)) # training_info = {"iteration": self.n_iter_, "batch_index": batch_index} # self.update_local_model(fore_gradient, data_instances, self.model_weights.coef_, **training_info) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( data_instances, self.model_weights, self.n_iter_, batch_index, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optim_guest_gradient) batch_index += 1 # LOGGER.debug("lr_weight, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed)) self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.validation_strategy: LOGGER.debug('LR guest running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 if self.is_converged: break if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model) self.set_summary(self.get_model_summary())
def fit_binary(self, data_instances, validate_data): # self._abnormal_detection(data_instances) # self.check_abnormal_values(data_instances) # self.check_abnormal_values(validate_data) # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data) self.callback_list.on_train_begin(data_instances, validate_data) LOGGER.debug( f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}" ) self.header = self.get_header(data_instances) model_shape = self.get_features_shape(data_instances) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() self.batch_generator.initialize_batch_generator(data_instances, shuffle=self.shuffle) if self.batch_generator.batch_masked: self.batch_generator.verify_batch_legality( least_batch_size=model_shape) if self.transfer_variable.use_async.get(idx=0): LOGGER.debug(f"set_use_async") self.gradient_loss_operator.set_use_async() self.gradient_loss_operator.set_total_batch_nums( self.batch_generator.batch_nums) LOGGER.info("Start initialize model.") # model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.init_param_obj.fit_intercept) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter: " + str(self.n_iter_)) batch_data_generator = self.batch_generator.generate_batch_data( suffix=(self.n_iter_, )) batch_index = 0 self.optimizer.set_iters(self.n_iter_) for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = batch_data # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}") LOGGER.debug( "iter: {}, batch: {}, before compute gradient, data count: {}" .format(self.n_iter_, batch_index, batch_feat_inst.count())) optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.cipher_operator, self.model_weights, self.optimizer, self.n_iter_, batch_index) # LOGGER.debug('optim_host_gradient: {}'.format(optim_host_gradient)) self.gradient_loss_operator.compute_loss( self.model_weights, self.optimizer, self.n_iter_, batch_index, self.cipher_operator, batch_masked=self.batch_generator.batch_masked) self.model_weights = self.optimizer.update_model( self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("Get is_converged flag from arbiter:{}".format( self.is_converged)) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) LOGGER.debug(f"flowid: {self.flowid}, step_index: {self.n_iter_}") self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())
def fit(self, data_instances, validate_data=None): """ Train linear regression model of role host Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_linR host") # self._abnormal_detection(data_instances) # self.header = self.get_header(data_instances) self.prepare_fit(data_instances, validate_data) self.callback_list.on_train_begin(data_instances, validate_data) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() if self.transfer_variable.use_async.get(idx=0): LOGGER.debug(f"set_use_async") self.gradient_loss_operator.set_use_async() self.batch_generator.initialize_batch_generator(data_instances) self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums) LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:" + str(self.n_iter_)) self.optimizer.set_iters(self.n_iter_) batch_data_generator = self.batch_generator.generate_batch_data() batch_index = 0 for batch_data in batch_data_generator: optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.cipher_operator, self.model_weights, self.optimizer, self.n_iter_, batch_index) self.gradient_loss_operator.compute_loss(self.model_weights, self.optimizer, self.n_iter_, batch_index, self.cipher_operator) self.model_weights = self.optimizer.update_model(self.model_weights, optim_host_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,)) LOGGER.info("Get is_converged flag from arbiter:{}".format(self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged)) if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())