Ejemplo n.º 1
0
    def predict(self, data_instances):
        """
        Prediction of Poisson
        Parameters
        ----------
        data_instances: DTable of Instance, input data

        Returns
        ----------
        DTable
            include input data label, predict results
        """
        LOGGER.info("Start predict ...")

        header = data_instances.schema.get("header")
        self.exposure_index = self.get_exposure_index(header, self.exposure_colname)
        exposure_index = self.exposure_index

        # OK
        exposure = data_instances.mapValues(lambda v: HeteroPoissonBase.load_exposure(v, exposure_index))

        data_instances = data_instances.mapValues(lambda v: HeteroPoissonBase.load_instance(v, exposure_index))

        data_features = self.transform(data_instances)

        pred_guest = self.compute_mu(data_features, self.model_weights.coef_, self.model_weights.intercept_, exposure)
        pred_host = self.transfer_variable.host_partial_prediction.get(idx=0)

        LOGGER.info("Get prediction from Host")

        pred = pred_guest.join(pred_host, lambda g, h: g * h)
        predict_result = data_instances.join(pred, lambda d, p: [d.label, p, p, {"label": p}])
        return predict_result
Ejemplo n.º 2
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson model of role guest
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson_guest fit")
        self._abnormal_detection(data_instances)
        self.header = copy.deepcopy(self.get_header(data_instances))

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)

        self.exposure_index = self.get_exposure_index(self.header,
                                                      self.exposure_colname)
        exposure_index = self.exposure_index
        if exposure_index > -1:
            self.header.pop(exposure_index)
            LOGGER.info("expsoure provided at Guest, colname is {}".format(
                self.exposure_colname))
        exposure = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_exposure(v, exposure_index))
        data_instances = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_instance(v, exposure_index))

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)
                # compute offset of this batch
                batch_offset = exposure.join(
                    batch_feat_inst,
                    lambda ei, d: HeteroPoissonBase.safe_log(ei))

                # Start gradient procedure
                optimized_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index, batch_offset)
                LOGGER.debug("iteration:{} Guest's gradient: {}".format(
                    self.n_iter_, optimized_gradient))
                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    data_instances, self.model_weights, self.n_iter_,
                    batch_index, batch_offset, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optimized_gradient)

                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            if self.validation_strategy:
                LOGGER.debug('Poisson guest running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break
            self.n_iter_ += 1
            if self.is_converged:
                break
        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
Ejemplo n.º 3
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson model of role guest
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson_guest fit")
        self._abnormal_detection(data_instances)
        self.header = copy.deepcopy(self.get_header(data_instances))
        self.callback_list.on_train_begin(data_instances, validate_data)

        # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data)
        if with_weight(data_instances):
            LOGGER.warning(
                "input data with weight. Poisson regression does not support weighted training."
            )

        self.exposure_index = self.get_exposure_index(self.header,
                                                      self.exposure_colname)
        exposure_index = self.exposure_index
        if exposure_index > -1:
            self.header.pop(exposure_index)
            LOGGER.info("Guest provides exposure value.")
        exposure = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_exposure(v, exposure_index))
        data_instances = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_instance(v, exposure_index))

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape,
                                            init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(
                w,
                fit_intercept=self.fit_intercept,
                raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # compute offset of this batch
                batch_offset = exposure.join(
                    batch_data, lambda ei, d: HeteroPoissonBase.safe_log(ei))

                # Start gradient procedure
                optimized_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data, self.encrypted_calculator, self.model_weights,
                    self.optimizer, self.n_iter_, batch_index, batch_offset)
                # LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient))
                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    batch_data, self.model_weights, self.n_iter_, batch_index,
                    batch_offset, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optimized_gradient)

                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1

            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()
        self.set_summary(self.get_model_summary())