Example #1
0
class TestHomoLR(unittest.TestCase):
    def setUp(self):

        self.guest_X = np.array([[1, 2, 3, 4, 5], [3, 2, 4, 5, 1],
                                 [
                                     2,
                                     2,
                                     3,
                                     1,
                                     1,
                                 ]]) / 10
        self.guest_Y = np.array([[1], [1], [-1]])

        self.values = []
        for idx, x in enumerate(self.guest_X):
            inst = Instance(inst_id=idx, features=x, label=self.guest_Y[idx])
            self.values.append((idx, inst))

        self.host_X = np.array([[1, 1.2, 3.1, 4, 5], [2.3, 2, 4, 5.3, 1],
                                [
                                    2,
                                    2.2,
                                    1.3,
                                    1,
                                    1.6,
                                ]]) / 10
        self.host_Y = np.array([[-1], [1], [-1]])

        self.host_values = []
        for idx, x in enumerate(self.host_X):
            inst = Instance(inst_id=idx, features=x, label=self.host_Y[idx])
            self.values.append((idx, inst))

        self.max_iter = 10
        self.alpha = 0.01
        self.learning_rate = 0.01
        optimizer = 'SGD'
        self.gradient_operator = LogisticGradient()
        self.initializer = Initializer()
        self.fit_intercept = True
        self.init_param_obj = InitParam(fit_intercept=self.fit_intercept)
        self.updater = L2Updater(self.alpha, self.learning_rate)
        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=optimizer)
        self.__init_model()

    def __init_model(self):
        model_shape = self.guest_X.shape[1]
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0
        return w

    def __init_host_model(self):
        model_shape = self.host_X.shape[1]
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.host_coef_ = w[:-1]
            self.host_intercept_ = w[-1]
        else:
            self.host_coef_ = w
            self.host_intercept_ = 0
        return w

    def test_one_iter(self):
        w = self.__init_model()
        print("before training, coef: {}, intercept: {}".format(
            self.coef_, self.intercept_))
        self.assertEqual(self.coef_.shape[0], self.guest_X.shape[1])
        grad, loss = self.gradient_operator.compute(
            self.values,
            coef=self.coef_,
            intercept=self.intercept_,
            fit_intercept=self.fit_intercept)
        loss_norm = self.updater.loss_norm(self.coef_)
        loss = loss + loss_norm
        delta_grad = self.optimizer.apply_gradients(grad)
        self.update_model(delta_grad)
        print("After training, coef: {}, intercept: {}, loss: {}".format(
            self.coef_, self.intercept_, loss))

    def test_multi_iter(self):
        w = self.__init_model()
        loss_hist = [100]
        for iter_num in range(self.max_iter):
            grad, loss = self.gradient_operator.compute(
                self.values,
                coef=self.coef_,
                intercept=self.intercept_,
                fit_intercept=self.fit_intercept)
            loss_norm = self.updater.loss_norm(self.coef_)
            loss = loss + loss_norm
            delta_grad = self.optimizer.apply_gradients(grad)
            self.update_model(delta_grad)
            self.assertTrue(loss <= loss_hist[-1])
            loss_hist.append(loss)
        print(loss_hist)

    def test_host_iter(self):
        w = self.__init_host_model()
        print("before training, coef: {}, intercept: {}".format(
            self.coef_, self.intercept_))
        self.assertEqual(self.host_coef_.shape[0], self.host_X.shape[1])
        grad, loss = self.gradient_operator.compute(
            self.host_values,
            coef=self.host_coef_,
            intercept=self.intercept_,
            fit_intercept=self.fit_intercept)
        loss_norm = self.updater.loss_norm(self.coef_)
        # print("***********************************************")
        # print(loss, loss_norm)
        self.assertTrue(loss is None)

    def update_model(self, gradient):
        LOGGER.debug(
            "In update_model function, shape of coef: {}, shape of gradient: {}"
            .format(np.shape(self.coef_), np.shape(gradient)))
        if self.fit_intercept:
            if self.updater is not None:
                self.coef_ = self.updater.update_coef(self.coef_,
                                                      gradient[:-1])
            else:
                self.coef_ = self.coef_ - gradient[:-1]
            self.intercept_ -= gradient[-1]

        else:
            if self.updater is not None:
                self.coef_ = self.updater.update_coef(self.coef_, gradient)
            else:
                self.coef_ = self.coef_ - gradient
Example #2
0
class HeteroLRArbiter(BaseLogisticRegression):
    def __init__(self, logistic_params):
        # LogisticParamChecker.check_param(logistic_params)
        super(HeteroLRArbiter, self).__init__(logistic_params)
        self.converge_func = DiffConverge(logistic_params.eps)

        # attribute
        self.pre_loss = None
        self.batch_num = None
        self.transfer_variable = HeteroLRTransferVariable()
        self.optimizer = Optimizer(logistic_params.learning_rate,
                                   logistic_params.optimizer)
        self.key_length = logistic_params.encrypt_param.key_length

    def perform_subtasks(self, **training_info):
        """
        performs any tasks that the arbiter is responsible for.

        This 'perform_subtasks' function serves as a handler on conducting any task that the arbiter is responsible
        for. For example, for the 'perform_subtasks' function of 'HeteroDNNLRArbiter' class located in
        'hetero_dnn_lr_arbiter.py', it performs some works related to updating/training local neural networks of guest
        or host.

        For this particular class (i.e., 'HeteroLRArbiter') that serves as a base arbiter class for neural-networks-based
        hetero-logistic-regression model, the 'perform_subtasks' function will do nothing. In other words, no subtask is
        performed by this arbiter.

        :param training_info: a dictionary holding training information
        """
        pass

    def fit(self, data_instances=None):
        """
        Train lr model of role arbiter
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_lr_arbiter fit")
        if data_instances:
            # self.header = data_instance.schema.get('header')
            self.header = self.get_header(data_instances)
        else:
            self.header = []

        # Generate encrypt keys
        self.encrypt_operator.generate_key(self.key_length)
        public_key = self.encrypt_operator.get_public_key()
        public_key = public_key
        LOGGER.info("public_key:{}".format(public_key))

        # remote is to send an object to other party
        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("remote public_key to host")

        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.GUEST,
                          idx=0)
        LOGGER.info("remote public_key to guest")

        # get method will block until the remote object is fetched.
        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest:{}".format(batch_info))
        self.batch_num = batch_info["batch_num"]

        is_stop = False
        self.n_iter_ = 0
        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_index = 0
            iter_loss = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))
                host_gradient = federation.get(
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_gradient from Host")

                guest_gradient = federation.get(
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get guest_gradient from Guest")

                # aggregate gradient
                host_gradient, guest_gradient = np.array(
                    host_gradient), np.array(guest_gradient)
                gradient = np.hstack((host_gradient, guest_gradient))

                LOGGER.info("gradient shape={}".format(gradient.shape))

                # decrypt gradient
                for i in range(gradient.shape[0]):
                    gradient[i] = self.encrypt_operator.decrypt(gradient[i])

                # optimization
                optim_gradient = self.optimizer.apply_gradients(gradient)

                # separate optim_gradient according gradient size of Host and Guest
                separate_optim_gradient = HeteroFederatedAggregator.separate(
                    optim_gradient,
                    [host_gradient.shape[0], guest_gradient.shape[0]])
                host_optim_gradient = separate_optim_gradient[0]
                guest_optim_gradient = separate_optim_gradient[1]

                LOGGER.info("host data feature dims:{}".format(
                    np.array(host_optim_gradient).shape[0]))
                LOGGER.info("guest data feature dims:{}".format(
                    np.array(guest_optim_gradient).shape[0]))

                federation.remote(
                    host_optim_gradient,
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.HOST,
                    idx=0)
                LOGGER.info("Remote host_optim_gradient to Host")

                federation.remote(
                    guest_optim_gradient,
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote guest_optim_gradient to Guest")

                training_info = {
                    "iteration": self.n_iter_,
                    "batch_index": batch_index
                }
                self.perform_subtasks(**training_info)

                loss = federation.get(
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    idx=0)

                de_loss = self.encrypt_operator.decrypt(loss)
                iter_loss += de_loss
                # LOGGER.info("Get loss from guest:{}".format(de_loss))

                batch_index += 1

            # if converge
            loss = iter_loss / self.batch_num
            LOGGER.info("iter loss:{}".format(loss))
            if self.converge_func.is_converge(loss):
                is_stop = True

            federation.remote(is_stop,
                              name=self.transfer_variable.is_stopped.name,
                              tag=self.transfer_variable.generate_transferid(
                                  self.transfer_variable.is_stopped,
                                  self.n_iter_, batch_index),
                              role=consts.HOST,
                              idx=0)
            LOGGER.info("Remote is_stop to host:{}".format(is_stop))

            federation.remote(is_stop,
                              name=self.transfer_variable.is_stopped.name,
                              tag=self.transfer_variable.generate_transferid(
                                  self.transfer_variable.is_stopped,
                                  self.n_iter_, batch_index),
                              role=consts.GUEST,
                              idx=0)
            LOGGER.info("Remote is_stop to guest:{}".format(is_stop))

            self.n_iter_ += 1
            if is_stop:
                LOGGER.info("Model is converged, iter:{}".format(self.n_iter_))
                break

        LOGGER.info(
            "Reach max iter {} or converge, train model finish!".format(
                self.max_iter))
Example #3
0
class HeteroNEGuest(BaseNetworkEmbeddig):
    def __init__(self, network_embedding_params):
        super(HeteroNEGuest, self).__init__(network_embedding_params)
        self.transfer_variable = HeteroNETransferVariable()
        self.data_batch_count = []

        self.encrypted_calculator = None

        self.guest_forward = None

        ######
        self.local_optimizer = Optimizer(
            network_embedding_params.learning_rate,
            network_embedding_params.optimizer)
        ######

    def aggregate_forward(self, host_forward):
        """
        Compute e_guest.dot(e_host)
        Paramters
        ---------
        host_forward: DTable. key, en_e(host)
        """
        aggregate_forward_res = self.guest_forward.join(
            host_forward, lambda e1, e2:
            (fate_operator.dot(e1[1], e2[1]),
             math.pow(fate_operator.dot(e1[1], e2[1]), 2)))
        return aggregate_forward_res

    @staticmethod
    def load_data(data_instance):
        """
        transform pair data to Instance
        Parameters
        ----------
        data_instance: tuple (node, label)
        """
        return Instance(features=data_instance[0], label=data_instance[1])

    def fit(self,
            data_instances,
            node2id,
            local_instances=None,
            common_nodes=None):
        """
        Train node embedding for role guest
        Parameters
        ----------
        data_instances: DTable of target node and label, input data
        node2id: a dict which can map node name to id
        """
        LOGGER.info("samples number:{}".format(data_instances.count()))
        LOGGER.info("Enter network embedding procedure:")
        self.n_node = len(node2id)
        LOGGER.info("Bank A has {} nodes".format(self.n_node))

        data_instances = data_instances.mapValues(HeteroNEGuest.load_data)
        LOGGER.info("Transform input data to train instance")

        public_key = federation.get(
            name=self.transfer_variable.paillier_pubkey.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey),
            idx=0)
        LOGGER.info("Get public_key from arbiter:{}".format(public_key))
        self.encrypt_operator.set_public_key(public_key)

        # hetero network embedding
        LOGGER.info("Generate mini-batch from input data")
        mini_batch_obj = MiniBatch(data_instances, batch_size=self.batch_size)
        batch_num = mini_batch_obj.batch_nums

        LOGGER.info("samples number:{}".format(data_instances.count()))
        if self.batch_size == -1:
            LOGGER.info(
                "batch size is -1, set it to the number of data in data_instances"
            )
            self.batch_size = data_instances.count()

        ##############
        # horizontal federated learning
        LOGGER.info("Generate mini-batch for local instances in guest")
        mini_batch_obj_local = MiniBatch(local_instances,
                                         batch_size=self.batch_size)
        local_batch_num = mini_batch_obj_local.batch_nums
        common_node_instances = eggroll.parallelize(
            ((node, node) for node in common_nodes),
            include_key=True,
            name='common_nodes')
        ##############

        batch_info = {'batch_size': self.batch_size, "batch_num": batch_num}
        LOGGER.info("batch_info:{}".format(batch_info))
        federation.remote(batch_info,
                          name=self.transfer_variable.batch_info.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.batch_info),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("Remote batch_info to Host")

        federation.remote(batch_info,
                          name=self.transfer_variable.batch_info.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.batch_info),
                          role=consts.ARBITER,
                          idx=0)
        LOGGER.info("Remote batch_info to Arbiter")

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.encrypt_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(batch_num)
        ]

        LOGGER.info("Start initialize model.")
        self.embedding_ = self.initializer.init_model((self.n_node, self.dim),
                                                      self.init_param_obj)
        LOGGER.info("Embedding shape={}".format(self.embedding_.shape))

        is_send_all_batch_index = False
        self.n_iter_ = 0
        index_data_inst_map = {}

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))

            #################
            local_batch_data_generator = mini_batch_obj_local.mini_batch_data_generator(
            )
            total_loss = 0
            local_batch_num = 0
            LOGGER.info("Enter the horizontally federated learning procedure:")
            for local_batch_data in local_batch_data_generator:
                n = local_batch_data.count()
                #LOGGER.info("Local batch data count:{}".format(n))
                E_Y = self.compute_local_embedding(local_batch_data,
                                                   self.embedding_, node2id)
                local_grads_e1, local_grads_e2, local_loss = self.local_gradient_operator.compute(
                    E_Y, 'E_1')
                local_grads_e1 = local_grads_e1.mapValues(
                    lambda g: self.local_optimizer.apply_gradients(g / n))
                local_grads_e2 = local_grads_e2.mapValues(
                    lambda g: self.local_optimizer.apply_gradients(g / n))
                e1id_join_grads = local_batch_data.join(
                    local_grads_e1, lambda v, g: (node2id[v[0]], g))
                e2id_join_grads = local_batch_data.join(
                    local_grads_e2, lambda v, g: (node2id[v[1]], g))
                self.update_model(e1id_join_grads)
                self.update_model(e2id_join_grads)

                local_loss = local_loss / n
                local_batch_num += 1
                total_loss += local_loss
                #LOGGER.info("gradient count:{}".format(e1id_join_grads.count()))

            guest_common_embedding = common_node_instances.mapValues(
                lambda node: self.embedding_[node2id[node]])
            federation.remote(
                guest_common_embedding,
                name=self.transfer_variable.guest_common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.guest_common_embedding,
                    self.n_iter_, 0),
                role=consts.ARBITER,
                idx=0)
            LOGGER.info("Remote the embedding of common node to arbiter!")

            common_embedding = federation.get(
                name=self.transfer_variable.common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.common_embedding, self.n_iter_, 0),
                idx=0)
            LOGGER.info(
                "Get the aggregated embedding of common node from arbiter!")

            self.update_common_nodes(common_embedding, common_nodes, node2id)

            total_loss /= local_batch_num
            LOGGER.info(
                "Iter {}, horizontally feaderated learning loss: {}".format(
                    self.n_iter_, total_loss))

            #################

            # verticallly feaderated learning
            # each iter will get the same batch_data_generator
            LOGGER.info("Enter the vertically federated learning:")
            batch_data_generator = mini_batch_obj.mini_batch_data_generator(
                result='index')

            batch_index = 0
            for batch_data_index in batch_data_generator:
                LOGGER.info("batch:{}".format(batch_index))

                # only need to send one times
                if not is_send_all_batch_index:
                    LOGGER.info("remote mini-batch index to Host")
                    federation.remote(
                        batch_data_index,
                        name=self.transfer_variable.batch_data_index.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.batch_data_index,
                            self.n_iter_, batch_index),
                        role=consts.HOST,
                        idx=0)
                    if batch_index >= mini_batch_obj.batch_nums - 1:
                        is_send_all_batch_index = True

                # in order to avoid joining in next iteration
                # Get mini-batch train data
                if len(index_data_inst_map) < batch_num:
                    batch_data_inst = data_instances.join(
                        batch_data_index, lambda data_inst, index: data_inst)
                    index_data_inst_map[batch_index] = batch_data_inst
                else:
                    batch_data_inst = index_data_inst_map[batch_index]

                # For inductive learning: transform node attributes to node embedding
                # self.transform(batch_data_inst)
                self.guest_forward = self.compute_forward(
                    batch_data_inst, self.embedding_, node2id, batch_index)

                host_forward = federation.get(
                    name=self.transfer_variable.host_forward_dict.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_forward_dict, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_forward from host")
                aggregate_forward_res = self.aggregate_forward(host_forward)
                en_aggregate_ee = aggregate_forward_res.mapValues(
                    lambda v: v[0])
                en_aggregate_ee_square = aggregate_forward_res.mapValues(
                    lambda v: v[1])

                # compute [[d]]
                if self.gradient_operator is None:
                    self.gradient_operator = HeteroNetworkEmbeddingGradient(
                        self.encrypt_operator)
                fore_gradient = self.gradient_operator.compute_fore_gradient(
                    batch_data_inst, en_aggregate_ee)

                host_gradient = self.gradient_operator.compute_gradient(
                    self.guest_forward.mapValues(
                        lambda v: Instance(features=v[1])), fore_gradient)
                federation.remote(
                    host_gradient,
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote host_gradient to arbiter")

                composed_data_inst = host_forward.join(
                    batch_data_inst,
                    lambda hf, d: Instance(features=hf[1], label=d.label))
                guest_gradient, loss = self.gradient_operator.compute_gradient_and_loss(
                    composed_data_inst, fore_gradient, en_aggregate_ee,
                    en_aggregate_ee_square)
                federation.remote(
                    guest_gradient,
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote guest_gradient to arbiter")

                optim_guest_gradient = federation.get(
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    idx=0)
                LOGGER.info("Get optim_guest_gradient from arbiter")

                # update node embedding
                LOGGER.info("Update node embedding")
                nodeid_join_gradient = batch_data_inst.join(
                    optim_guest_gradient, lambda instance, gradient:
                    (node2id[instance.features], gradient))
                self.update_model(nodeid_join_gradient)

                # update local model that transform attribute to node embedding
                training_info = {
                    'iteration': self.n_iter_,
                    'batch_index': batch_index
                }
                self.update_local_model(fore_gradient, batch_data_inst,
                                        self.embedding_, **training_info)

                # loss need to be encrypted !!!!!!

                federation.remote(
                    loss,
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote loss to arbiter")

                # is converge of loss in arbiter
                batch_index += 1

                # remove temporary resource
                rubbish_list = [
                    host_forward, aggregate_forward_res, en_aggregate_ee,
                    en_aggregate_ee_square, fore_gradient, self.guest_forward
                ]
                rubbish_clear(rubbish_list)

            ##########
            guest_common_embedding = common_node_instances.mapValues(
                lambda node: self.embedding_[node2id[node]])
            federation.remote(
                guest_common_embedding,
                name=self.transfer_variable.guest_common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.guest_common_embedding,
                    self.n_iter_, 1),
                role=consts.ARBITER,
                idx=0)

            common_embedding = federation.get(
                name=self.transfer_variable.common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.common_embedding, self.n_iter_, 1),
                idx=0)

            self.update_common_nodes(common_embedding, common_nodes, node2id)
            ##########

            is_stopped = federation.get(
                name=self.transfer_variable.is_stopped.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.is_stopped, self.n_iter_),
                idx=0)

            LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped))

            self.n_iter_ += 1
            if is_stopped:
                LOGGER.info(
                    "Get stop signal from arbiter, model is converged, iter:{}"
                    .format(self.n_iter_))
                break

        embedding_table = eggroll.table(name='guest',
                                        namespace='node_embedding',
                                        partition=10)
        id2node = dict(zip(node2id.values(), node2id.keys()))
        for id, embedding in enumerate(self.embedding_):
            embedding_table.put(id2node[id], embedding)
        embedding_table.save_as(name='guest',
                                namespace='node_embedding',
                                partition=10)
        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
Example #4
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRGuest, self).__init__(params)
        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
        self.header = []
        self.penalty = params.penalty
        self.loss_history = []
        self.is_converged = False

    def fit(self, data_instances):
        self._abnormal_detection(data_instances)

        self.header = data_instances.schema.get(
            'header')  # ['x1', 'x2', 'x3' ... ]

        self.__init_parameters()

        self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)

        for iter_num in range(self.max_iter):
            # mini-batch
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0

            for batch_data in batch_data_generator:
                n = batch_data.count()

                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)

                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                delta_grad = self.optimizer.apply_gradients(grad)

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            self.loss_history.append(total_loss)
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # send loss

            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)
            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)

            w = np.array(w)
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)

            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                self.is_converged = True
                break

        self.show_meta()
        self.show_model()
        LOGGER.debug("in fit self coef: {}".format(self.coef_))
        return data_instances

    def __init_parameters(self):
        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        LOGGER.debug("coef: {}, intercept: {}".format(self.coef_,
                                                      self.intercept_))
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)
Example #5
0
class HeteroLRArbiter(BaseLogisticRegression):
    def __init__(self, logistic_params):
        super(HeteroLRArbiter, self).__init__(logistic_params)
        self.converge_func = DiffConverge(logistic_params.eps)

        # attribute
        self.pre_loss = None
        self.batch_num = None
        self.transfer_variable = HeteroLRTransferVariable()
        self.optimizer = Optimizer(logistic_params.learning_rate,
                                   logistic_params.optimizer)
        self.key_length = logistic_params.encrypt_param.key_length

    def fit(self, data_instance=None):
        # Generate encrypt keys
        self.encrypt_operator.generate_key(self.key_length)
        public_key = self.encrypt_operator.get_public_key()
        public_key = public_key
        LOGGER.info("public_key:{}".format(public_key))
        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("remote public_key to host")

        federation.remote(public_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.paillier_pubkey),
                          role=consts.GUEST,
                          idx=0)
        LOGGER.info("remote public_key to guest")

        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest:{}".format(batch_info))
        self.batch_num = batch_info["batch_num"]

        is_stop = False
        self.n_iter_ = 0
        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_index = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))
                host_gradient = federation.get(
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_gradient from Host")
                guest_gradient = federation.get(
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get guest_gradient from Guest")

                # aggregate gradient
                host_gradient, guest_gradient = np.array(
                    host_gradient), np.array(guest_gradient)
                gradient = np.hstack(
                    (np.array(host_gradient), np.array(guest_gradient)))
                # decrypt gradient
                for i in range(gradient.shape[0]):
                    gradient[i] = self.encrypt_operator.decrypt(gradient[i])

                # optimization
                optim_gradient = self.optimizer.apply_gradients(gradient)
                # separate optim_gradient according gradient size of Host and Guest
                separate_optim_gradient = HeteroFederatedAggregator.separate(
                    optim_gradient,
                    [host_gradient.shape[0], guest_gradient.shape[0]])
                host_optim_gradient = separate_optim_gradient[0]
                guest_optim_gradient = separate_optim_gradient[1]

                federation.remote(
                    host_optim_gradient,
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.HOST,
                    idx=0)
                LOGGER.info("Remote host_optim_gradient to Host")

                federation.remote(
                    guest_optim_gradient,
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote guest_optim_gradient to Guest")

                loss = federation.get(
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    idx=0)

                de_loss = self.encrypt_operator.decrypt(loss)
                LOGGER.info("Get loss from guest:{}".format(de_loss))
                # if converge
                if self.converge_func.is_converge(de_loss):
                    is_stop = True

                federation.remote(
                    is_stop,
                    name=self.transfer_variable.is_stopped.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.is_stopped, self.n_iter_,
                        batch_index),
                    role=consts.HOST,
                    idx=0)
                LOGGER.info("Remote is_stop to guest:{}".format(is_stop))

                federation.remote(
                    is_stop,
                    name=self.transfer_variable.is_stopped.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.is_stopped, self.n_iter_,
                        batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote is_stop to guest:".format(is_stop))

                batch_index += 1
                if is_stop:
                    LOGGER.info("Model is converged, iter:{}".format(
                        self.n_iter_))
                    break

            self.n_iter_ += 1
            if is_stop:
                break

        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
Example #6
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        """
        :param penalty: l1 or l2
        :param alpha:
        :param lr:
        :param eps:
        :param max_iter:
        :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad']
        :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size
        """
        super(HomoLRGuest, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()

    def fit(self, data_instances):
        LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}"
                    "batch_size: {}".format(self.alpha, self.eps,
                                            self.max_iter, self.batch_size))
        self.__init_parameters()

        w = self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)
        for iter_num in range(self.max_iter):
            # mini-batch
            # LOGGER.debug("Enter iter_num: {}".format(iter_num))
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0
            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)
                n = grad_loss.count()
                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)
                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                # LOGGER.debug("before update: {}".format(grad))
                delta_grad = self.optimizer.apply_gradients(grad)
                # LOGGER.debug("after apply: {}".format(delta_grad))

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)
            # send loss
            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)
            w = np.array(w)
            # LOGGER.debug("Received final model: {}".format(w))
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)
            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                # self.save_model(w)
                break
        # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format(
        #     self.coef_, self.intercept_))

    def __init_parameters(self):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)
        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = self.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)