예제 #1
0
 def compute_gradient(self, data_instance, fore_gradient, fit_intercept):
     feat_join_grad = data_instance.join(fore_gradient, lambda d, g:
                                         (d.features, g))
     f = functools.partial(self.__compute_gradient,
                           fit_intercept=fit_intercept)
     gradient_partition = feat_join_grad.mapPartitions(f)
     gradient = HeteroFederatedAggregator.aggregate_mean(gradient_partition)
     return gradient
예제 #2
0
    def test_aggregate_add_square(self):
        res = HeteroFederatedAggregator.aggregate_add_square(
            self.table_list_a, self.table_list_b, self.table_list_a_square,
            self.table_list_b_square).collect()
        res_to_list = []
        for iterator in res:
            res_to_list.append(iterator[1])

        res = list(np.sort(np.array(res_to_list)))
        self.assertListEqual(self.list_add_square_result, res)
예제 #3
0
    def test_aggregate_add(self):
        table_add_res = HeteroFederatedAggregator.aggregate_add(
            self.table_a, self.table_b)

        res = []
        for iterater in table_add_res.collect():
            res.append(iterater[1])

        res = np.sort(np.array(res))
        self.assertListEqual(self.add_a_b, list(res))
예제 #4
0
    def compute_gradient_and_loss(self, data_instance, fore_gradient,
                                  encrypted_wx, en_sum_wx_square,
                                  fit_intercept):
        # compute gradient
        gradient = self.compute_gradient(data_instance, fore_gradient,
                                         fit_intercept)

        # compute and loss
        half_ywx = encrypted_wx.join(data_instance,
                                     lambda wx, d: 0.5 * wx * int(d.label))
        half_ywx_join_en_sum_wx_square = half_ywx.join(en_sum_wx_square,
                                                       lambda yz, ez: (yz, ez))
        f = functools.partial(self.__compute_loss)
        loss_partition = half_ywx_join_en_sum_wx_square.mapPartitions(f)
        loss = HeteroFederatedAggregator.aggregate_mean(loss_partition)

        return gradient, loss
예제 #5
0
    def fit(self, data_instances=None):
        """
        Train lr model of role arbiter
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_lr_arbiter fit")
        if data_instances:
            # self.header = data_instance.schema.get('header')
            self.header = self.get_header(data_instances)
        else:
            self.header = []

        # Generate encrypt keys
        self.encrypt_operator.generate_key(self.key_length)
        public_key = self.encrypt_operator.get_public_key()
        public_key = public_key
        LOGGER.info("public_key:{}".format(public_key))

        # remote is to send an object to other party
        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("remote public_key to host")

        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.GUEST,
                          idx=0)
        LOGGER.info("remote public_key to guest")

        # get method will block until the remote object is fetched.
        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest:{}".format(batch_info))
        self.batch_num = batch_info["batch_num"]

        is_stop = False
        self.n_iter_ = 0
        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_index = 0
            iter_loss = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))
                host_gradient = federation.get(
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_gradient from Host")

                guest_gradient = federation.get(
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get guest_gradient from Guest")

                # aggregate gradient
                host_gradient, guest_gradient = np.array(
                    host_gradient), np.array(guest_gradient)
                gradient = np.hstack((host_gradient, guest_gradient))

                LOGGER.info("gradient shape={}".format(gradient.shape))

                # decrypt gradient
                for i in range(gradient.shape[0]):
                    gradient[i] = self.encrypt_operator.decrypt(gradient[i])

                # optimization
                optim_gradient = self.optimizer.apply_gradients(gradient)

                # separate optim_gradient according gradient size of Host and Guest
                separate_optim_gradient = HeteroFederatedAggregator.separate(
                    optim_gradient,
                    [host_gradient.shape[0], guest_gradient.shape[0]])
                host_optim_gradient = separate_optim_gradient[0]
                guest_optim_gradient = separate_optim_gradient[1]

                LOGGER.info("host data feature dims:{}".format(
                    np.array(host_optim_gradient).shape[0]))
                LOGGER.info("guest data feature dims:{}".format(
                    np.array(guest_optim_gradient).shape[0]))

                federation.remote(
                    host_optim_gradient,
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.HOST,
                    idx=0)
                LOGGER.info("Remote host_optim_gradient to Host")

                federation.remote(
                    guest_optim_gradient,
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote guest_optim_gradient to Guest")

                training_info = {
                    "iteration": self.n_iter_,
                    "batch_index": batch_index
                }
                self.perform_subtasks(**training_info)

                loss = federation.get(
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    idx=0)

                de_loss = self.encrypt_operator.decrypt(loss)
                iter_loss += de_loss
                # LOGGER.info("Get loss from guest:{}".format(de_loss))

                batch_index += 1

            # if converge
            loss = iter_loss / self.batch_num
            LOGGER.info("iter loss:{}".format(loss))
            if self.converge_func.is_converge(loss):
                is_stop = True

            federation.remote(is_stop,
                              name=self.transfer_variable.is_stopped.name,
                              tag=self.transfer_variable.generate_transferid(
                                  self.transfer_variable.is_stopped,
                                  self.n_iter_, batch_index),
                              role=consts.HOST,
                              idx=0)
            LOGGER.info("Remote is_stop to host:{}".format(is_stop))

            federation.remote(is_stop,
                              name=self.transfer_variable.is_stopped.name,
                              tag=self.transfer_variable.generate_transferid(
                                  self.transfer_variable.is_stopped,
                                  self.n_iter_, batch_index),
                              role=consts.GUEST,
                              idx=0)
            LOGGER.info("Remote is_stop to guest:{}".format(is_stop))

            self.n_iter_ += 1
            if is_stop:
                LOGGER.info("Model is converged, iter:{}".format(self.n_iter_))
                break

        LOGGER.info(
            "Reach max iter {} or converge, train model finish!".format(
                self.max_iter))
예제 #6
0
    def fit(self, data_instance=None):
        # Generate encrypt keys
        self.encrypt_operator.generate_key(self.key_length)
        public_key = self.encrypt_operator.get_public_key()
        public_key = public_key
        LOGGER.info("public_key:{}".format(public_key))
        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("remote public_key to host")

        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.GUEST,
                          idx=0)
        LOGGER.info("remote public_key to guest")

        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest:{}".format(batch_info))
        self.batch_num = batch_info["batch_num"]

        is_stop = False
        self.n_iter_ = 0
        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_index = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))
                host_gradient = federation.get(
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_gradient from Host")
                guest_gradient = federation.get(
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get guest_gradient from Guest")

                # aggregate gradient
                host_gradient, guest_gradient = np.array(
                    host_gradient), np.array(guest_gradient)
                gradient = np.hstack(
                    (np.array(host_gradient), np.array(guest_gradient)))
                # decrypt gradient
                for i in range(gradient.shape[0]):
                    gradient[i] = self.encrypt_operator.decrypt(gradient[i])

                # optimization
                optim_gradient = self.optimizer.apply_gradients(gradient)
                # separate optim_gradient according gradient size of Host and Guest
                separate_optim_gradient = HeteroFederatedAggregator.separate(
                    optim_gradient,
                    [host_gradient.shape[0], guest_gradient.shape[0]])
                host_optim_gradient = separate_optim_gradient[0]
                guest_optim_gradient = separate_optim_gradient[1]

                federation.remote(
                    host_optim_gradient,
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.HOST,
                    idx=0)
                LOGGER.info("Remote host_optim_gradient to Host")

                federation.remote(
                    guest_optim_gradient,
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote guest_optim_gradient to Guest")

                loss = federation.get(
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    idx=0)

                de_loss = self.encrypt_operator.decrypt(loss)
                LOGGER.info("Get loss from guest:{}".format(de_loss))
                # if converge
                if self.converge_func.is_converge(de_loss):
                    is_stop = True

                federation.remote(
                    is_stop,
                    name=self.transfer_variable.is_stopped.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.is_stopped, self.n_iter_,
                        batch_index),
                    role=consts.HOST,
                    idx=0)
                LOGGER.info("Remote is_stop to guest:{}".format(is_stop))

                federation.remote(
                    is_stop,
                    name=self.transfer_variable.is_stopped.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.is_stopped, self.n_iter_,
                        batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote is_stop to guest:".format(is_stop))

                batch_index += 1
                if is_stop:
                    LOGGER.info("Model is converged, iter:{}".format(
                        self.n_iter_))
                    break

            self.n_iter_ += 1
            if is_stop:
                break

        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
예제 #7
0
 def test_separate(self):
     res = HeteroFederatedAggregator.separate(self.separate_data,
                                              self.separate_size_list)
     self.assertListEqual(res, self.separate_result)
예제 #8
0
 def test_aggreagte_mean(self):
     res = HeteroFederatedAggregator.aggregate_mean(self.table_a)
     self.assertEqual(res, self.reduce_a)
     res = HeteroFederatedAggregator.aggregate_mean(self.table_d_tuple)
     self.assertListEqual(list(res), self.reduce_d_tuple)