def predict(self, data_instances): """ Prediction of Poisson Parameters ---------- data_instances: DTable of Instance, input data Returns ---------- DTable include input data label, predict results """ LOGGER.info("Start predict ...") header = data_instances.schema.get("header") self.exposure_index = self.get_exposure_index(header, self.exposure_colname) exposure_index = self.exposure_index # OK exposure = data_instances.mapValues(lambda v: HeteroPoissonBase.load_exposure(v, exposure_index)) data_instances = data_instances.mapValues(lambda v: HeteroPoissonBase.load_instance(v, exposure_index)) data_features = self.transform(data_instances) pred_guest = self.compute_mu(data_features, self.model_weights.coef_, self.model_weights.intercept_, exposure) pred_host = self.transfer_variable.host_partial_prediction.get(idx=0) LOGGER.info("Get prediction from Host") pred = pred_guest.join(pred_host, lambda g, h: g * h) predict_result = data_instances.join(pred, lambda d, p: [d.label, p, p, {"label": p}]) return predict_result
def fit(self, data_instances, validate_data=None): """ Train poisson model of role guest Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_poisson_guest fit") self._abnormal_detection(data_instances) self.header = copy.deepcopy(self.get_header(data_instances)) self.validation_strategy = self.init_validation_strategy( data_instances, validate_data) self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname) exposure_index = self.exposure_index if exposure_index > -1: self.header.pop(exposure_index) LOGGER.info("expsoure provided at Guest, colname is {}".format( self.exposure_colname)) exposure = data_instances.mapValues( lambda v: HeteroPoissonBase.load_exposure(v, exposure_index)) data_instances = data_instances.mapValues( lambda v: HeteroPoissonBase.load_instance(v, exposure_index)) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept) while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data) # compute offset of this batch batch_offset = exposure.join( batch_feat_inst, lambda ei, d: HeteroPoissonBase.safe_log(ei)) # Start gradient procedure optimized_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure( batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index, batch_offset) LOGGER.debug("iteration:{} Guest's gradient: {}".format( self.n_iter_, optimized_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( data_instances, self.model_weights, self.n_iter_, batch_index, batch_offset, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optimized_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) if self.validation_strategy: LOGGER.debug('Poisson guest running validation') self.validation_strategy.validate(self, self.n_iter_) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break self.n_iter_ += 1 if self.is_converged: break if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model)
def fit(self, data_instances, validate_data=None): """ Train poisson model of role guest Parameters ---------- data_instances: Table of Instance, input data """ LOGGER.info("Enter hetero_poisson_guest fit") self._abnormal_detection(data_instances) self.header = copy.deepcopy(self.get_header(data_instances)) self.callback_list.on_train_begin(data_instances, validate_data) # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data) if with_weight(data_instances): LOGGER.warning( "input data with weight. Poisson regression does not support weighted training." ) self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname) exposure_index = self.exposure_index if exposure_index > -1: self.header.pop(exposure_index) LOGGER.info("Guest provides exposure value.") exposure = data_instances.mapValues( lambda v: HeteroPoissonBase.load_exposure(v, exposure_index)) data_instances = data_instances.mapValues( lambda v: HeteroPoissonBase.load_instance(v, exposure_index)) self.cipher_operator = self.cipher.gen_paillier_cipher_operator() LOGGER.info("Generate mini-batch from input data") self.batch_generator.initialize_batch_generator( data_instances, self.batch_size) self.encrypted_calculator = [ EncryptModeCalculator( self.cipher_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_generator.batch_nums) ] LOGGER.info("Start initialize model.") LOGGER.info("fit_intercept:{}".format( self.init_param_obj.fit_intercept)) model_shape = self.get_features_shape(data_instances) if not self.component_properties.is_warm_start: w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) self.model_weights = LinearModelWeights( w, fit_intercept=self.fit_intercept, raise_overflow_error=False) else: self.callback_warm_start_init_iter(self.n_iter_) while self.n_iter_ < self.max_iter: self.callback_list.on_epoch_begin(self.n_iter_) LOGGER.info("iter:{}".format(self.n_iter_)) # each iter will get the same batch_data_generator batch_data_generator = self.batch_generator.generate_batch_data() self.optimizer.set_iters(self.n_iter_) batch_index = 0 for batch_data in batch_data_generator: # compute offset of this batch batch_offset = exposure.join( batch_data, lambda ei, d: HeteroPoissonBase.safe_log(ei)) # Start gradient procedure optimized_gradient = self.gradient_loss_operator.compute_gradient_procedure( batch_data, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_, batch_index, batch_offset) # LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient)) loss_norm = self.optimizer.loss_norm(self.model_weights) self.gradient_loss_operator.compute_loss( batch_data, self.model_weights, self.n_iter_, batch_index, batch_offset, loss_norm) self.model_weights = self.optimizer.update_model( self.model_weights, optimized_gradient) batch_index += 1 self.is_converged = self.converge_procedure.sync_converge_info( suffix=(self.n_iter_, )) LOGGER.info("iter: {}, is_converged: {}".format( self.n_iter_, self.is_converged)) self.callback_list.on_epoch_end(self.n_iter_) self.n_iter_ += 1 if self.stop_training: break if self.is_converged: break self.callback_list.on_train_end() self.set_summary(self.get_model_summary())