コード例 #1
0
    def compute_gradient_procedure(self, cipher_operator, optimizer, n_iter_,
                                   batch_index):
        self.batch_index = batch_index
        self.n_iter = n_iter_
        # LOGGER.debug("In compute_gradient_procedure, n_iter: {}, batch_index: {}, iter_k: {}".format(
        #     self.n_iter, self.batch_index, self.iter_k
        # ))

        optimizer.set_hess_matrix(self.opt_Hess)
        delta_grad = self.gradient_computer.compute_gradient_procedure(
            cipher_operator, optimizer, n_iter_, batch_index)
        self._update_w_tilde(
            LinearModelWeights(delta_grad, fit_intercept=False))
        if self.iter_k % self.update_interval_L == 0:
            self.count_t += 1
            # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed))
            self.this_w_tilde /= self.update_interval_L
            # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed))

            if self.count_t > 0:
                LOGGER.info(
                    "iter_k: {}, count_t: {}, start to update hessian".format(
                        self.iter_k, self.count_t))
                self._update_hessian(cipher_operator)
            self.last_w_tilde = self.this_w_tilde
            self.this_w_tilde = LinearModelWeights(
                np.zeros_like(self.last_w_tilde.unboxed),
                self.last_w_tilde.fit_intercept)
        return delta_grad
コード例 #2
0
    def regularization_update(self, model_weights: LinearModelWeights, grad,
                              prev_round_weights: LinearModelWeights = None):
        # LOGGER.debug(f"In regularization_update, input model_weights: {model_weights.unboxed}")

        if self.penalty == consts.L1_PENALTY:
            model_weights = self._l1_updator(model_weights, grad)
        elif self.penalty == consts.L2_PENALTY:
            model_weights = self._l2_updator(model_weights, grad)
        else:
            new_vars = model_weights.unboxed - grad
            model_weights = LinearModelWeights(new_vars,
                                               model_weights.fit_intercept,
                                               model_weights.raise_overflow_error)

        if prev_round_weights is not None:  # additional proximal term
            coef_ = model_weights.unboxed

            if model_weights.fit_intercept:
                coef_without_intercept = coef_[: -1]
            else:
                coef_without_intercept = coef_

            coef_without_intercept -= self.mu * (model_weights.coef_ - prev_round_weights.coef_)

            if model_weights.fit_intercept:
                new_coef_ = np.append(coef_without_intercept, coef_[-1])
            else:
                new_coef_ = coef_without_intercept

            model_weights = LinearModelWeights(new_coef_,
                                               model_weights.fit_intercept,
                                               model_weights.raise_overflow_error)
        return model_weights
コード例 #3
0
ファイル: optimizer.py プロジェクト: UnreliableBuilder/Fate
 def hess_vector_norm(self, delta_s: LinearModelWeights):
     if self.penalty == consts.L1_PENALTY:
         return LinearModelWeights(np.zeros_like(delta_s.unboxed),
                                   fit_intercept=delta_s.fit_intercept)
     elif self.penalty == consts.L2_PENALTY:
         return LinearModelWeights(self.alpha * np.array(delta_s.unboxed),
                                   fit_intercept=delta_s.fit_intercept)
     else:
         return LinearModelWeights(np.zeros_like(delta_s.unboxed),
                                   fit_intercept=delta_s.fit_intercept)
コード例 #4
0
ファイル: optimizer_test.py プロジェクト: zpskt/FATE
    def test_optimizer(self):
        model_weights = LinearModelWeights(
            np.array([
                0.10145129, 0.39987222, -0.96630206, -0.41208423, -0.24609715,
                -0.70518652, 0.71478064, 0.57973894, 0.5703622, -0.45482125,
                0.32676194, -0.00648212, 0.35542874, -0.26412695, -0.07964603,
                1.2158522, -0.41255564, -0.01686044, -0.99897542, 1.56407211,
                0.52040711, 0.24568055, 0.4880494, 0.52269909, -0.14431923,
                0.03282471, 0.09437969, 0.21407206, -0.270922
            ]), True)

        prev_model_weights = LinearModelWeights(
            np.array([
                0.10194331, 0.40062114, -0.96597859, -0.41202348, -0.24587005,
                -0.7047801, 0.71515712, 0.58045583, 0.57079086, -0.45473676,
                0.32775863, -0.00633238, 0.35567219, -0.26343469, -0.07964763,
                1.2165642, -0.41244749, -0.01589344, -0.99862982, 1.56498698,
                0.52058152, 0.24572171, 0.48809946, 0.52272993, -0.14330367,
                0.03283002, 0.09439601, 0.21433497, -0.27011673
            ]), True)

        prev_model_weights_null = None

        eps = 0.00001

        ## 1: alpha = 0, no regularization
        learning_rate = 0.2
        alpha = 0
        penalty = "L2"
        decay = "0.2"
        decay_sqrt = "true"
        mu = 0.01

        init_params = [learning_rate, alpha, penalty, decay, decay_sqrt, mu]
        optimizer = _SgdOptimizer(*init_params)
        loss_norm = optimizer.loss_norm(model_weights, prev_model_weights_null)
        self.assertTrue(math.fabs(loss_norm) <= eps)  # == 0

        ## 2
        alpha = 0.1
        init_params = [learning_rate, alpha, penalty, decay, decay_sqrt, mu]
        optimizer = _SgdOptimizer(*init_params)
        loss_norm = optimizer.loss_norm(model_weights, prev_model_weights_null)
        print("loss_norm = {}".format(loss_norm))
        self.assertTrue(math.fabs(loss_norm - 0.47661579875266186) <= eps)

        ##3
        loss_norm = optimizer.loss_norm(model_weights, prev_model_weights)
        print("loss_norm = {}".format(loss_norm))
        self.assertTrue(math.fabs(loss_norm - 0.47661583737200075) <= eps)
コード例 #5
0
ファイル: hetero_sqn_gradient.py プロジェクト: zpskt/FATE
    def compute_gradient_procedure(self, *args):
        data_instances = args[0]
        encrypted_calculator = args[1]
        model_weights = args[2]
        optimizer = args[3]
        self.batch_index = args[5]
        self.n_iter = args[4]
        cipher_operator = encrypted_calculator[0].encrypter
        # one_data = data_instances.first()
        # LOGGER.debug("data shape: {}, model weights shape: {}, model weights coef: {}, intercept: {}".format(
        #     one_data[1].features.shape, model_weights.unboxed.shape, model_weights.coef_, model_weights.intercept_
        # ))

        gradient_results = self.gradient_computer.compute_gradient_procedure(*args)
        self._update_w_tilde(model_weights)

        if self.iter_k % self.update_interval_L == 0:
            self.count_t += 1
            # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed))
            self.this_w_tilde /= self.update_interval_L
            # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed))

            if self.count_t > 0:
                LOGGER.info("iter_k: {}, count_t: {}, start to update hessian".format(self.iter_k, self.count_t))
                self._update_hessian(data_instances, optimizer, cipher_operator)
            self.last_w_tilde = self.this_w_tilde
            self.this_w_tilde = LinearModelWeights(np.zeros_like(self.last_w_tilde.unboxed),
                                                   self.last_w_tilde.fit_intercept)
            # LOGGER.debug("After replace, last_w_tilde: {}, this_w_tilde: {}".format(self.last_w_tilde.unboxed,
            #                                                                         self.this_w_tilde.unboxed))

        return gradient_results
コード例 #6
0
ファイル: hetero_sqn_gradient.py プロジェクト: yubo1993/FATE
    def compute_gradient_procedure(self, *args, **kwargs):
        data_instances = args[0]
        cipher = args[1]
        model_weights = args[2]
        optimizer = args[3]
        self.batch_index = args[5]
        self.n_iter = args[4]

        gradient_results = self.gradient_computer.compute_gradient_procedure(
            *args)
        self._update_w_tilde(model_weights)

        if self.iter_k % self.update_interval_L == 0:
            self.count_t += 1
            # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed))
            self.this_w_tilde /= self.update_interval_L
            # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed))

            if self.count_t > 0:
                LOGGER.info(
                    "iter_k: {}, count_t: {}, start to update hessian".format(
                        self.iter_k, self.count_t))
                self._update_hessian(data_instances, optimizer, cipher)
            self.last_w_tilde = self.this_w_tilde
            self.this_w_tilde = LinearModelWeights(
                np.zeros_like(self.last_w_tilde.unboxed),
                self.last_w_tilde.fit_intercept,
                raise_overflow_error=self.raise_weight_overflow_error)
            # LOGGER.debug("After replace, last_w_tilde: {}, this_w_tilde: {}".format(self.last_w_tilde.unboxed,
            #                                                                         self.this_w_tilde.unboxed))

        return gradient_results
コード例 #7
0
    def init_model(self, model_shape, init_params, data_instance=None):
        init_method = init_params.init_method
        fit_intercept = init_params.fit_intercept

        random_seed = init_params.random_seed
        np.random.seed(random_seed)

        if fit_intercept:
            if isinstance(model_shape, int):
                model_shape += 1
            else:
                new_shape = []
                for ds in model_shape:
                    new_shape.append(ds + 1)
                model_shape = tuple(new_shape)

        if init_method == 'random_normal':
            w = self.random_normal(model_shape)
        elif init_method == 'random_uniform':
            w = self.random_uniform(model_shape)
        elif init_method == 'ones':
            w = self.ones(model_shape)
        elif init_method == 'zeros':
            w = self.zeros(model_shape, fit_intercept, data_instance)
        elif init_method == 'const':
            init_const = init_params.init_const
            w = self.constant(model_shape, const=init_const)
        else:
            raise NotImplementedError("Initial method cannot be recognized: {}".format(init_method))

        LOGGER.debug("Initialed model: {}".format(w))
        lr_weights = LinearModelWeights(w, init_params.fit_intercept)
        return lr_weights
コード例 #8
0
    def load_single_model(self, single_model_obj):
        LOGGER.info("start to load single model")

        if self.is_respectively_reveal:
            self.load_single_model_weight(single_model_obj)
        else:
            feature_shape = len(self.header)
            tmp_vars = [None] * feature_shape
            weight_dict = dict(single_model_obj.encrypted_weight)
            for idx, header_name in enumerate(self.header):
                cipher_weight = weight_dict.get(header_name)
                public_key = PaillierPublicKey(int(cipher_weight.public_key.n))
                cipher_text = int(cipher_weight.cipher_text)
                exponent = int(cipher_weight.exponent)
                is_obfuscator = cipher_weight.is_obfuscator
                coef_i = PaillierEncryptedNumber(public_key, cipher_text, exponent)
                if is_obfuscator:
                    coef_i.apply_obfuscator()

                tmp_vars[idx] = coef_i

            self.model_weights = LinearModelWeights(tmp_vars, fit_intercept=self.fit_intercept)

        self.n_iter_ = single_model_obj.iters

        return self
コード例 #9
0
 def _init_model(self, params):
     super()._init_model(params)
     self.model_weights = LinearModelWeights(
         [], fit_intercept=self.fit_intercept)
     self.one_vs_rest_obj = one_vs_rest_factory(self,
                                                role=self.role,
                                                mode=self.mode,
                                                has_arbiter=True)
コード例 #10
0
    def regularization_update(self,
                              model_weights: LinearModelWeights,
                              grad,
                              prev_round_weights: LinearModelWeights = None):
        LOGGER.debug(
            f"In regularization_update, input model_weights: {model_weights.unboxed}"
        )

        if self.penalty == consts.L1_PENALTY:
            model_weights = self._l1_updator(model_weights, grad)
        elif self.penalty == consts.L2_PENALTY:
            model_weights = self._l2_updator(model_weights, grad)
        else:
            new_vars = model_weights.unboxed - grad
            model_weights = LinearModelWeights(new_vars,
                                               model_weights.fit_intercept)

        if prev_round_weights is not None:  # additional proximal term
            coef_ = model_weights.unboxed

            if model_weights.fit_intercept:
                coef_without_intercept = coef_[:-1]
            else:
                coef_without_intercept = coef_

            LOGGER.debug(
                "before applying additional proximal terms, weights {}".format(
                    coef_without_intercept))
            coef_without_intercept -= self.mu * (model_weights.coef_ -
                                                 prev_round_weights.coef_)
            LOGGER.debug(
                "after applying additional proximal terms, new weights {}, with difference {}"
                .format(coef_without_intercept,
                        model_weights.coef_ - prev_round_weights.coef_))

            if model_weights.fit_intercept:
                new_coef_ = np.append(coef_without_intercept, coef_[-1])
            else:
                new_coef_ = coef_without_intercept

            model_weights = LinearModelWeights(new_coef_,
                                               model_weights.fit_intercept)
        LOGGER.debug(
            f"In regularization_update, model_weights: {model_weights.unboxed},"
            f" grad: {grad}")
        return model_weights
コード例 #11
0
ファイル: homo_lr_arbiter.py プロジェクト: zark7777/FATE
    def fit(self, data_instances=None, validate_data=None):
        self._server_check_data()

        host_ciphers = self.cipher.paillier_keygen(
            key_length=self.model_param.encrypt_param.key_length,
            suffix=('fit', ))
        host_has_no_cipher_ids = [
            idx for idx, cipher in host_ciphers.items() if cipher is None
        ]
        self.re_encrypt_times = self.cipher.set_re_cipher_time(host_ciphers)
        max_iter = self.max_iter
        # validation_strategy = self.init_validation_strategy()

        while self.n_iter_ < max_iter + 1:
            suffix = (self.n_iter_, )

            if ((self.n_iter_ + 1) % self.aggregate_iters
                    == 0) or self.n_iter_ == max_iter:
                merged_model = self.aggregator.aggregate_and_broadcast(
                    ciphers_dict=host_ciphers, suffix=suffix)
                total_loss = self.aggregator.aggregate_loss(
                    host_has_no_cipher_ids, suffix)
                self.callback_loss(self.n_iter_, total_loss)
                self.loss_history.append(total_loss)
                if self.use_loss:
                    converge_var = total_loss
                else:
                    converge_var = np.array(merged_model.unboxed)

                self.is_converged = self.aggregator.send_converge_status(
                    self.converge_func.is_converge, (converge_var, ),
                    suffix=(self.n_iter_, ))
                LOGGER.info(
                    "n_iters: {}, total_loss: {}, converge flag is :{}".format(
                        self.n_iter_, total_loss, self.is_converged))

                self.model_weights = LogisticRegressionWeights(
                    merged_model.unboxed,
                    self.model_param.init_param.fit_intercept)
                if self.header is None:
                    self.header = [
                        'x' + str(i)
                        for i in range(len(self.model_weights.coef_))
                    ]

                if self.is_converged or self.n_iter_ == max_iter:
                    break

            self.cipher.re_cipher(iter_num=self.n_iter_,
                                  re_encrypt_times=self.re_encrypt_times,
                                  host_ciphers_dict=host_ciphers,
                                  re_encrypt_batches=self.re_encrypt_batches)

            # validation_strategy.validate(self, self.n_iter_)
            self.n_iter_ += 1

        LOGGER.info("Finish Training task, total iters: {}".format(
            self.n_iter_))
コード例 #12
0
ファイル: optimizer.py プロジェクト: anonyauth2020/batchcrypt
    def _l2_updator(self, lr_weights: LinearModelWeights, gradient):
        """
        For l2 regularization, the regular term has been added in gradients.
        """

        new_weights = lr_weights.unboxed - gradient
        new_param = LinearModelWeights(new_weights, lr_weights.fit_intercept)

        return new_param
コード例 #13
0
    def _init_model_variables(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape, init_params=self.init_param_obj,
                                        data_instance=data_instances)
        model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept)
        return model_weights
コード例 #14
0
ファイル: optimizer.py プロジェクト: anonyauth2020/batchcrypt
 def regularization_update(self, model_weights: LinearModelWeights, grad):
     if self.penalty == consts.L1_PENALTY:
         model_weights = self._l1_updator(model_weights, grad)
     elif self.penalty == consts.L2_PENALTY:
         model_weights = self._l2_updator(model_weights, grad)
     else:
         new_vars = model_weights.unboxed - grad
         model_weights = LinearModelWeights(new_vars,
                                            model_weights.fit_intercept)
     return model_weights
コード例 #15
0
    def load_single_model_weight(self, single_model_obj):
        feature_shape = len(self.header)
        tmp_vars = np.zeros(feature_shape)
        weight_dict = dict(single_model_obj.weight)

        for idx, header_name in enumerate(self.header):
            tmp_vars[idx] = weight_dict.get(header_name)

        if self.fit_intercept:
            tmp_vars = np.append(tmp_vars, single_model_obj.intercept)
        self.model_weights = LinearModelWeights(tmp_vars, fit_intercept=self.fit_intercept)
コード例 #16
0
    def load_single_model(self, single_model_obj):
        LOGGER.info("It's a binary task, start to load single model")

        if self.role == consts.GUEST or self.is_respectively_reveal:
            feature_shape = len(self.header)
            tmp_vars = np.zeros(feature_shape)
            weight_dict = dict(single_model_obj.weight)

            for idx, header_name in enumerate(self.header):
                tmp_vars[idx] = weight_dict.get(header_name)

            if self.fit_intercept:
                tmp_vars = np.append(tmp_vars, single_model_obj.intercept)
            self.model_weights = LinearModelWeights(
                tmp_vars, fit_intercept=self.fit_intercept)

        self.n_iter_ = single_model_obj.iters
        return self
コード例 #17
0
    def _l1_updator(self, model_weights: LinearModelWeights, gradient):
        coef_ = model_weights.coef_
        if model_weights.fit_intercept:
            gradient_without_intercept = gradient[: -1]
        else:
            gradient_without_intercept = gradient

        new_weights = np.sign(coef_ - gradient_without_intercept) * np.maximum(0, np.abs(
            coef_ - gradient_without_intercept) - self.shrinkage_val)

        if model_weights.fit_intercept:
            new_weights = np.append(new_weights, model_weights.intercept_)
            new_weights[-1] -= gradient[-1]
        new_param = LinearModelWeights(new_weights, model_weights.fit_intercept, model_weights.raise_overflow_error)
        # LOGGER.debug("In _l1_updator, original weight: {}, new_weights: {}".format(
        #     model_weights.unboxed, new_weights
        # ))
        return new_param
コード例 #18
0
ファイル: hetero_lr_host.py プロジェクト: FederatedAI/FATE
    def load_single_model(self, single_model_obj):
        super(HeteroLRHost, self).load_single_model(single_model_obj)
        if not self.is_respectively_reveal:
            feature_shape = len(self.header)
            tmp_vars = [None] * feature_shape
            weight_dict = dict(single_model_obj.encrypted_weight)
            for idx, header_name in enumerate(self.header):
                cipher_weight = weight_dict.get(header_name)
                public_key = PaillierPublicKey(int(cipher_weight.public_key.n))
                cipher_text = int(cipher_weight.cipher_text)
                exponent = int(cipher_weight.exponent)
                is_obfuscator = cipher_weight.is_obfuscator
                coef_i = PaillierEncryptedNumber(public_key, cipher_text,
                                                 exponent)
                if is_obfuscator:
                    coef_i.apply_obfuscator()

                tmp_vars[idx] = coef_i

            self.model_weights = LinearModelWeights(
                tmp_vars, fit_intercept=self.fit_intercept)
コード例 #19
0
    def test_compute_fore_gradient(self):
        # fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards(self.data_inst, self.wx)
        model_weights = LinearModelWeights(l=self.w, fit_intercept=False)

        class EncryptedCalculator(object):
            encrypter = self.paillier_encrypt

            def encrypt_row(self, row):
                return np.array([self.encrypter.encrypt(row)])

            def encrypt(self, input_data):
                return input_data.mapValues(self.encrypt_row)

        encrypted_calculator = [EncryptedCalculator()]
        batch_index = 0
        fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards(
            self.data_inst, model_weights, encrypted_calculator, batch_index)

        fore_gradient_local = [
            self.paillier_encrypt.decrypt(iterator[1])
            for iterator in fore_gradient.collect()
        ]

        self.assertListEqual(fore_gradient_local, self.fore_gradient_local)
コード例 #20
0
    def fit(self, data_instances, validate_data=None):
        """
        Train linR model of role guest
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_linR_guest fit")
        self._abnormal_detection(data_instances)
        self.header = self.get_header(data_instances)

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)

                # Start gradient procedure
                optim_guest_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    data_instances, self.n_iter_, batch_index, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_guest_gradient)
                batch_index += 1
                # LOGGER.debug(
                #     "model_weights, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed))

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            # LOGGER.debug("model weights is {}".format(self.model_weights.coef_))

            if self.validation_strategy:
                LOGGER.debug('LinR guest running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            self.n_iter_ += 1
            if self.is_converged:
                break
        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
コード例 #21
0
ファイル: hetero_poisson_host.py プロジェクト: zpskt/FATE
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson regression model of role host
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson host")
        self._abnormal_detection(data_instances)

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)

        self.header = self.get_header(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        self.batch_generator.initialize_batch_generator(data_instances)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:" + str(self.n_iter_))

            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)

            batch_index = 0
            for batch_data in batch_data_generator:
                batch_feat_inst = self.transform(batch_data)
                optim_host_gradient, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                self.gradient_loss_operator.compute_loss(
                    batch_feat_inst, self.model_weights,
                    self.encrypted_calculator, self.optimizer, self.n_iter_,
                    batch_index, self.cipher_operator)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(
                self.is_converged))

            if self.validation_strategy:
                LOGGER.debug('Poisson host running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            self.n_iter_ += 1
            LOGGER.info("iter: {}, is_converged: {}".format(
                self.n_iter_, self.is_converged))
            if self.is_converged:
                break

        if not self.is_converged:
            LOGGER.info("Reach max iter {}, train model finish!".format(
                self.max_iter))

        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
        self.set_summary(self.get_model_summary())
コード例 #22
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson model of role guest
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson_guest fit")
        self._abnormal_detection(data_instances)
        self.header = copy.deepcopy(self.get_header(data_instances))

        validation_strategy = self.init_validation_strategy(data_instances, validate_data)

        self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname)
        if self.exposure_index > -1:
            self.header.pop(self.exposure_index)
            LOGGER.info("expsoure provided at Guest, colname is {}".format(self.exposure_colname))
        exposure = data_instances.mapValues(lambda v: self.load_exposure(v))
        data_instances = data_instances.mapValues(lambda v: self.load_instance(v))

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(data_instances, self.batch_size)
        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)
                # compute offset of this batch
                batch_offset = exposure.join(batch_feat_inst, lambda ei, d: self.safe_log(ei))

                # Start gradient procedure
                optimized_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst,
                    self.encrypted_calculator,
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index,
                    batch_offset
                )
                LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient))
                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(data_instances, self.model_weights, self.n_iter_,
                                                         batch_index, batch_offset, loss_norm)

                self.model_weights = self.optimizer.update_model(self.model_weights, optimized_gradient)

                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))
            LOGGER.info("iter: {},  is_converged: {}".format(self.n_iter_, self.is_converged))

            validation_strategy.validate(self, self.n_iter_)
            self.n_iter_ += 1
            if self.is_converged:
                break
コード例 #23
0
class HomoLRArbiter(HomoLRBase):
    def __init__(self):
        super(HomoLRArbiter, self).__init__()
        self.re_encrypt_times = []  # Record the times needed for each host

        self.loss_history = []
        self.is_converged = False
        self.role = consts.ARBITER
        self.aggregator = aggregator.Arbiter()
        self.model_weights = None
        self.cipher = paillier_cipher.Arbiter()
        self.host_predict_results = []

    def _init_model(self, params):
        super()._init_model(params)
        self.cipher.register_paillier_cipher(self.transfer_variable)

    def fit(self, data_instances=None, validate_data=None):
        host_ciphers = self.cipher.paillier_keygen(
            key_length=self.model_param.encrypt_param.key_length,
            suffix=('fit', ))
        host_has_no_cipher_ids = [
            idx for idx, cipher in host_ciphers.items() if cipher is None
        ]
        self.re_encrypt_times = self.cipher.set_re_cipher_time(host_ciphers)
        max_iter = self.max_iter
        # validation_strategy = self.init_validation_strategy()

        while self.n_iter_ < max_iter + 1:
            suffix = (self.n_iter_, )

            if ((self.n_iter_ + 1) % self.aggregate_iters
                    == 0) or self.n_iter_ == max_iter:
                merged_model = self.aggregator.aggregate_and_broadcast(
                    ciphers_dict=host_ciphers, suffix=suffix)
                total_loss = self.aggregator.aggregate_loss(
                    host_has_no_cipher_ids, suffix)
                self.callback_loss(self.n_iter_, total_loss)
                self.loss_history.append(total_loss)
                if self.use_loss:
                    converge_var = total_loss
                else:
                    converge_var = np.array(merged_model.unboxed)

                self.is_converged = self.aggregator.send_converge_status(
                    self.converge_func.is_converge, (converge_var, ),
                    suffix=(self.n_iter_, ))
                LOGGER.info(
                    "n_iters: {}, total_loss: {}, converge flag is :{}".format(
                        self.n_iter_, total_loss, self.is_converged))
                if self.is_converged or self.n_iter_ == max_iter:
                    break
                self.model_weights = LogisticRegressionWeights(
                    merged_model.unboxed,
                    self.model_param.init_param.fit_intercept)
                if self.header is None:
                    self.header = [
                        'x' + str(i)
                        for i in range(len(self.model_weights.coef_))
                    ]

            self.cipher.re_cipher(iter_num=self.n_iter_,
                                  re_encrypt_times=self.re_encrypt_times,
                                  host_ciphers_dict=host_ciphers,
                                  re_encrypt_batches=self.re_encrypt_batches)

            # validation_strategy.validate(self, self.n_iter_)
            self.n_iter_ += 1

        LOGGER.info("Finish Training task, total iters: {}".format(
            self.n_iter_))

    def predict(self, data_instantces=None):
        LOGGER.info(f'Start predict task')
        current_suffix = ('predict', )
        host_ciphers = self.cipher.paillier_keygen(
            key_length=self.model_param.encrypt_param.key_length,
            suffix=current_suffix)

        LOGGER.debug("Loaded arbiter model: {}".format(
            self.model_weights.unboxed))
        for idx, cipher in host_ciphers.items():
            if cipher is None:
                continue
            encrypted_model_weights = self.model_weights.encrypted(
                cipher, inplace=False)
            self.transfer_variable.aggregated_model.remote(
                obj=encrypted_model_weights.for_remote(),
                role=consts.HOST,
                idx=idx,
                suffix=current_suffix)

        # Receive wx results

        for idx, cipher in host_ciphers.items():
            if cipher is None:
                continue
            encrypted_predict_wx = self.transfer_variable.predict_wx.get(
                idx=idx, suffix=current_suffix)
            predict_wx = cipher.distribute_decrypt(encrypted_predict_wx)

            prob_table = predict_wx.mapValues(lambda x: activation.sigmoid(x))
            predict_table = prob_table.mapValues(
                lambda x: 1
                if x > self.model_param.predict_param.threshold else 0)

            self.transfer_variable.predict_result.remote(predict_table,
                                                         role=consts.HOST,
                                                         idx=idx,
                                                         suffix=current_suffix)
            self.host_predict_results.append((prob_table, predict_table))
コード例 #24
0
ファイル: hetero_sqn_gradient.py プロジェクト: zpskt/FATE
 def _renew_w_tilde(self):
     self.last_w_tilde = self.this_w_tilde
     self.this_w_tilde = LinearModelWeights(np.zeros_like(self.last_w_tilde.unboxed),
                                            self.last_w_tilde.fit_intercept)
コード例 #25
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson model of role guest
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson_guest fit")
        # self._abnormal_detection(data_instances)
        # self.header = copy.deepcopy(self.get_header(data_instances))
        self.prepare_fit(data_instances, validate_data)
        self.callback_list.on_train_begin(data_instances, validate_data)

        if with_weight(data_instances):
            LOGGER.warning(
                "input data with weight. Poisson regression does not support weighted training."
            )

        self.exposure_index = self.get_exposure_index(self.header,
                                                      self.exposure_colname)
        exposure_index = self.exposure_index
        if exposure_index > -1:
            self.header.pop(exposure_index)
            LOGGER.info("Guest provides exposure value.")
        exposure = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_exposure(v, exposure_index))
        data_instances = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_instance(v, exposure_index))

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape,
                                            init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(
                w,
                fit_intercept=self.fit_intercept,
                raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # compute offset of this batch
                batch_offset = exposure.join(
                    batch_data, lambda ei, d: HeteroPoissonBase.safe_log(ei))

                # Start gradient procedure
                optimized_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data, self.cipher_operator, self.model_weights,
                    self.optimizer, self.n_iter_, batch_index, batch_offset)
                # LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient))
                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    batch_data, self.model_weights, self.n_iter_, batch_index,
                    batch_offset, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optimized_gradient)

                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1

            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()
        self.set_summary(self.get_model_summary())
コード例 #26
0
ファイル: hetero_linr_guest.py プロジェクト: FederatedAI/FATE
    def fit(self, data_instances, validate_data=None):
        """
        Train linR model of role guest
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_linR_guest fit")
        self._abnormal_detection(data_instances)
        self.header = self.get_header(data_instances)
        self.callback_list.on_train_begin(data_instances, validate_data)
        # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data)

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        use_async = False
        if with_weight(data_instances):
            if self.model_param.early_stop == "diff":
                LOGGER.warning("input data with weight, please use 'weight_diff' for 'early_stop'.")
            data_instances = scale_sample_weight(data_instances)
            self.gradient_loss_operator.set_use_sample_weight()
            LOGGER.debug(f"instance weight scaled; use weighted gradient loss operator")
            # LOGGER.debug(f"data_instances after scale: {[v[1].weight for v in list(data_instances.collect())]}")
        elif len(self.component_properties.host_party_idlist) == 1:
            LOGGER.debug(f"set_use_async")
            self.gradient_loss_operator.set_use_async()
            use_async = True
        self.transfer_variable.use_async.remote(use_async)

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(data_instances, self.batch_size)
        self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums)

        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # Start gradient procedure
                optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data,
                    self.encrypted_calculator,
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index
                )

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(batch_data, self.n_iter_, batch_index, loss_norm)

                self.model_weights = self.optimizer.update_model(self.model_weights, optim_guest_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))
            LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1
            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()

        self.set_summary(self.get_model_summary())
コード例 #27
0
ファイル: hetero_lr_host.py プロジェクト: zhilangtaosha/FATE
    def fit_binary(self, data_instances, validate_data):
        self._abnormal_detection(data_instances)

        validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)
        LOGGER.debug(
            f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}"
        )

        self.header = self.get_header(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        self.batch_generator.initialize_batch_generator(data_instances)
        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        LOGGER.debug("model_shape: {}, w shape: {}, w: {}".format(
            model_shape, w.shape, w))
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.init_param_obj.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:" + str(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            batch_index = 0
            self.optimizer.set_iters(self.n_iter_)
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)
                LOGGER.debug(
                    f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}"
                )

                optim_host_gradient, fore_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)
                LOGGER.debug(
                    'optim_host_gradient: {}'.format(optim_host_gradient))

                training_info = {
                    "iteration": self.n_iter_,
                    "batch_index": batch_index
                }
                self.update_local_model(fore_gradient, data_instances,
                                        self.model_weights.coef_,
                                        **training_info)

                self.gradient_loss_operator.compute_loss(
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index, self.cipher_operator)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(
                self.is_converged))

            validation_strategy.validate(self, self.n_iter_)

            self.n_iter_ += 1
            LOGGER.info("iter: {}, is_converged: {}".format(
                self.n_iter_, self.is_converged))
            if self.is_converged:
                break

        LOGGER.debug("Final lr weights: {}".format(self.model_weights.unboxed))
コード例 #28
0
    def fit_binary(self, data_instances, validate_data=None):
        LOGGER.info("Enter hetero_lr_guest fit")
        self.header = self.get_header(data_instances)

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)
        data_instances = data_instances.mapValues(HeteroLRGuest.load_data)
        LOGGER.debug(
            f"MODEL_STEP After load data, data count: {data_instances.count()}"
        )
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = batch_data
                # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}")

                # Start gradient procedure
                LOGGER.debug(
                    "iter: {}, before compute gradient, data count: {}".format(
                        self.n_iter_, batch_feat_inst.count()))
                optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                # LOGGER.debug('optim_guest_gradient: {}'.format(optim_guest_gradient))
                # training_info = {"iteration": self.n_iter_, "batch_index": batch_index}
                # self.update_local_model(fore_gradient, data_instances, self.model_weights.coef_, **training_info)

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    data_instances, self.model_weights, self.n_iter_,
                    batch_index, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_guest_gradient)
                batch_index += 1
                # LOGGER.debug("lr_weight, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed))

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            if self.validation_strategy:
                LOGGER.debug('LR guest running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            self.n_iter_ += 1

            if self.is_converged:
                break

        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
        self.set_summary(self.get_model_summary())
コード例 #29
0
    def fit_binary(self, data_instances, validate_data):
        # self._abnormal_detection(data_instances)
        # self.check_abnormal_values(data_instances)
        # self.check_abnormal_values(validate_data)
        # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data)
        self.callback_list.on_train_begin(data_instances, validate_data)

        LOGGER.debug(
            f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}"
        )

        self.header = self.get_header(data_instances)
        model_shape = self.get_features_shape(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        self.batch_generator.initialize_batch_generator(data_instances,
                                                        shuffle=self.shuffle)
        if self.batch_generator.batch_masked:
            self.batch_generator.verify_batch_legality(
                least_batch_size=model_shape)

        if self.transfer_variable.use_async.get(idx=0):
            LOGGER.debug(f"set_use_async")
            self.gradient_loss_operator.set_use_async()

        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        LOGGER.info("Start initialize model.")
        # model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False

        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape,
                                            init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(
                w, fit_intercept=self.init_param_obj.fit_intercept)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)

            LOGGER.info("iter: " + str(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data(
                suffix=(self.n_iter_, ))
            batch_index = 0
            self.optimizer.set_iters(self.n_iter_)
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = batch_data
                # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}")

                LOGGER.debug(
                    "iter: {}, batch: {}, before compute gradient, data count: {}"
                    .format(self.n_iter_, batch_index,
                            batch_feat_inst.count()))
                optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.cipher_operator, self.model_weights,
                    self.optimizer, self.n_iter_, batch_index)
                # LOGGER.debug('optim_host_gradient: {}'.format(optim_host_gradient))

                self.gradient_loss_operator.compute_loss(
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index,
                    self.cipher_operator,
                    batch_masked=self.batch_generator.batch_masked)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(
                self.is_converged))
            LOGGER.info("iter: {}, is_converged: {}".format(
                self.n_iter_, self.is_converged))
            LOGGER.debug(f"flowid: {self.flowid}, step_index: {self.n_iter_}")

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1
            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()
        self.set_summary(self.get_model_summary())
コード例 #30
0
ファイル: hetero_linr_host.py プロジェクト: yubo1993/FATE
    def fit(self, data_instances, validate_data=None):
        """
        Train linear regression model of role host
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_linR host")
        # self._abnormal_detection(data_instances)
        # self.header = self.get_header(data_instances)
        self.prepare_fit(data_instances, validate_data)
        self.callback_list.on_train_begin(data_instances, validate_data)

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        if self.transfer_variable.use_async.get(idx=0):
            LOGGER.debug(f"set_use_async")
            self.gradient_loss_operator.set_use_async()

        self.batch_generator.initialize_batch_generator(data_instances)
        self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums)

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False

        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:" + str(self.n_iter_))
            self.optimizer.set_iters(self.n_iter_)
            batch_data_generator = self.batch_generator.generate_batch_data()
            batch_index = 0
            for batch_data in batch_data_generator:
                optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data,
                    self.cipher_operator,
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index)

                self.gradient_loss_operator.compute_loss(self.model_weights, self.optimizer, self.n_iter_, batch_index,
                                                         self.cipher_operator)

                self.model_weights = self.optimizer.update_model(self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1
            if self.stop_training:
                break

            LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged))
            if self.is_converged:
                break
        self.callback_list.on_train_end()

        self.set_summary(self.get_model_summary())