예제 #1
0
 def test_initializer(self):
     initializer = Initializer()
     data_shape = 10
     init_param_obj = InitParam(init_method=consts.RANDOM_NORMAL,
                                init_const=20,
                                fit_intercept=False)
     model = initializer.init_model(model_shape=data_shape,
                                    init_params=init_param_obj)
     model_shape = model.shape
     self.assertTrue(model_shape == (10, ))
예제 #2
0
class HomoLRHost(HomoLRBase):
    def __init__(self):
        super(HomoLRHost, self).__init__()

        self.aggregator = HomoFederatedAggregator()

        self.initializer = Initializer()
        self.mini_batch_obj = None
        self.classes_ = [0, 1]
        self.has_sychronized_encryption = False
        self.role = consts.HOST

    def _init_model(self, params):
        super(HomoLRHost, self)._init_model(params)
        encrypt_params = params.encrypt_param
        if encrypt_params.method in [consts.PAILLIER]:
            self.use_encrypt = True
        else:
            self.use_encrypt = False

        if self.use_encrypt and params.penalty == 'L1':
            raise RuntimeError(
                "Encrypted h**o-lr supports L2 penalty or 'none' only")

        if self.use_encrypt:
            self.gradient_operator = TaylorLogisticGradient()
            self.re_encrypt_batches = params.re_encrypt_batches
        else:
            self.gradient_operator = LogisticGradient()

    def fit(self, data_instances):
        if not self.need_run:
            return data_instances

        self.init_schema(data_instances)
        LOGGER.debug("Before trainning, self.header: {}".format(self.header))
        self._abnormal_detection(data_instances)

        self.__init_parameters(data_instances)

        w = self.__init_model(data_instances)

        for iter_num in range(self.max_iter):
            # mini-batch
            LOGGER.debug("In iter: {}".format(iter_num))
            batch_data_generator = self.mini_batch_obj.mini_batch_data_generator(
            )
            batch_num = 0
            total_loss = 0

            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)

                grad_loss = batch_data.mapPartitions(f)

                n = batch_data.count()
                if not self.use_encrypt:
                    grad, loss = grad_loss.reduce(
                        self.aggregator.aggregate_grad_loss)
                    grad = np.array(grad)
                    grad /= n
                    loss /= n
                    if self.updater is not None:
                        loss_norm = self.updater.loss_norm(self.coef_)
                        total_loss += loss + loss_norm

                    # if not self.use_loss:
                    #     total_loss = np.linalg.norm(self.coef_)

                    if not self.need_one_vs_rest:
                        metric_meta = MetricMeta(
                            name='train',
                            metric_type="LOSS",
                            extra_metas={"unit_name": "iters"})
                        metric_name = self.get_metric_name('loss')

                        self.callback_meta(metric_name=metric_name,
                                           metric_namespace='train',
                                           metric_meta=metric_meta)
                        self.callback_metric(
                            metric_name=metric_name,
                            metric_namespace='train',
                            metric_data=[Metric(iter_num, total_loss)])

                else:
                    grad, _ = grad_loss.reduce(self.aggregator.aggregate_grad)
                    grad = np.array(grad)
                    grad /= n

                self.update_model(grad)
                w = self.merge_model()

                batch_num += 1
                if self.use_encrypt and batch_num % self.re_encrypt_batches == 0:
                    to_encrypt_model_id = self.transfer_variable.generate_transferid(
                        self.transfer_variable.to_encrypt_model, iter_num,
                        batch_num)

                    federation.remote(
                        w,
                        name=self.transfer_variable.to_encrypt_model.name,
                        tag=to_encrypt_model_id,
                        role=consts.ARBITER,
                        idx=0)

                    re_encrypted_model_id = self.transfer_variable.generate_transferid(
                        self.transfer_variable.re_encrypted_model, iter_num,
                        batch_num)
                    LOGGER.debug("re_encrypted_model_id: {}".format(
                        re_encrypted_model_id))
                    w = federation.get(
                        name=self.transfer_variable.re_encrypted_model.name,
                        tag=re_encrypted_model_id,
                        idx=0)

                    w = np.array(w)
                    self.set_coef_(w)

            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.host_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.host_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            if not self.use_encrypt:
                loss_transfer_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.host_loss, iter_num)

                federation.remote(total_loss,
                                  name=self.transfer_variable.host_loss.name,
                                  tag=loss_transfer_id,
                                  role=consts.ARBITER,
                                  idx=0)

            LOGGER.debug("model and loss sent")

            final_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=final_model_id,
                               idx=0)

            w = np.array(w)
            self.set_coef_(w)

            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)

            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)

            self.n_iter_ = iter_num
            LOGGER.debug("converge_flag: {}".format(converge_flag))
            if converge_flag:
                break
                # self.save_model()

    def __init_parameters(self, data_instances):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.host_party_weight)
        LOGGER.debug(
            "Start to remote party_weight: {}, transfer_id: {}".format(
                self.party_weight, party_weight_id))

        federation.remote(self.party_weight,
                          name=self.transfer_variable.host_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        self.__synchronize_encryption()

        # Send re-encrypt times
        self.mini_batch_obj = MiniBatch(data_inst=data_instances,
                                        batch_size=self.batch_size)
        if self.use_encrypt:
            # LOGGER.debug("Use encryption, send re_encrypt_times")
            total_batch_num = self.mini_batch_obj.batch_nums
            re_encrypt_times = total_batch_num // self.re_encrypt_batches
            transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.re_encrypt_times)
            LOGGER.debug(
                "Start to remote re_encrypt_times: {}, transfer_id: {}".format(
                    re_encrypt_times, transfer_id))

            federation.remote(
                re_encrypt_times,
                name=self.transfer_variable.re_encrypt_times.name,
                tag=transfer_id,
                role=consts.ARBITER,
                idx=0)
            LOGGER.info("sent re_encrypt_times: {}".format(re_encrypt_times))

    def __synchronize_encryption(self, mode='train'):
        """
        Communicate with hosts. Specify whether use encryption or not and transfer the public keys.
        """
        # Send if this host use encryption or not
        use_encryption_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.use_encrypt, mode)
        LOGGER.debug("Start to remote use_encrypt: {}, transfer_id: {}".format(
            self.use_encrypt, use_encryption_id))

        federation.remote(self.use_encrypt,
                          name=self.transfer_variable.use_encrypt.name,
                          tag=use_encryption_id,
                          role=consts.ARBITER,
                          idx=0)

        # Set public key
        if self.use_encrypt:
            pubkey_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey, mode)
            pubkey = federation.get(
                name=self.transfer_variable.paillier_pubkey.name,
                tag=pubkey_id,
                idx=0)
            LOGGER.debug("Received pubkey")
            self.encrypt_operator.set_public_key(pubkey)
        LOGGER.info("Finish synchronized ecryption")
        self.has_sychronized_encryption = True

    def predict(self, data_instances):
        if not self.need_run:
            return data_instances

        if not self.has_sychronized_encryption:
            self.__synchronize_encryption(mode='predict')
            self.__load_arbiter_model()
        else:
            LOGGER.info("in predict, has synchronize encryption information")

        feature_shape = get_features_shape(data_instances)
        LOGGER.debug("Shape of coef_ : {}, feature shape: {}".format(
            len(self.coef_), feature_shape))
        local_data = data_instances.first()
        LOGGER.debug("One data, features: {}".format(local_data[1].features))
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)

        if self.use_encrypt:
            encrypted_wx_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.predict_wx)
            LOGGER.debug("Host encrypted wx id: {}".format(encrypted_wx_id))
            LOGGER.debug("Start to remote wx: {}, transfer_id: {}".format(
                wx, encrypted_wx_id))
            federation.remote(wx,
                              name=self.transfer_variable.predict_wx.name,
                              tag=encrypted_wx_id,
                              role=consts.ARBITER,
                              idx=0)
            predict_result_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.predict_result)
            LOGGER.debug("predict_result_id: {}".format(predict_result_id))

            predict_result = federation.get(
                name=self.transfer_variable.predict_result.name,
                tag=predict_result_id,
                idx=0)
            # local_predict_table = predict_result.collect()
            LOGGER.debug(
                "predict_result count: {}, data_instances count: {}".format(
                    predict_result.count(), data_instances.count()))

            predict_result_table = predict_result.join(
                data_instances,
                lambda p, d: [d.label, None, p, {
                    "0": None,
                    "1": None
                }])

        else:
            pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
            pred_label = self.classified(pred_prob,
                                         self.predict_param.threshold)
            if self.predict_param.with_proba:
                predict_result = data_instances.mapValues(lambda x: x.label)
                predict_result = predict_result.join(pred_prob, lambda x, y:
                                                     (x, y))
            else:
                predict_result = data_instances.mapValues(lambda x:
                                                          (x.label, None))
            predict_result_table = predict_result.join(
                pred_label,
                lambda x, y: [x[0], y, x[1], {
                    "0": None,
                    "1": None
                }])

        LOGGER.debug("Finish predict")

        LOGGER.debug("In host predict, predict_result_table is : {}".format(
            predict_result_table.first()))
        return predict_result_table

    def __init_model(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)

        w = self.encrypt_operator.encrypt_list(w)
        w = np.array(w)

        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0
        return w

    def __load_arbiter_model(self):
        final_model_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.final_model, "predict")
        final_model = federation.get(
            name=self.transfer_variable.final_model.name,
            tag=final_model_id,
            idx=0)
        # LOGGER.info("Received arbiter's model")
        # LOGGER.debug("final_model: {}".format(final_model))
        self.set_coef_(final_model)

    def _get_param(self):
        if self.need_one_vs_rest:
            one_vs_rest_class = list(map(str, self.one_vs_rest_obj.classes))
            param_protobuf_obj = lr_model_param_pb2.LRModelParam(
                iters=self.n_iter_,
                loss_history=[],
                is_converged=self.is_converged,
                weight={},
                intercept=0,
                need_one_vs_rest=self.need_one_vs_rest,
                one_vs_rest_classes=one_vs_rest_class)
            return param_protobuf_obj

        header = self.header
        weight_dict = {}
        for idx, header_name in enumerate(header):
            coef_i = self.coef_[idx]
            weight_dict[header_name] = coef_i

        param_protobuf_obj = lr_model_param_pb2.LRModelParam(
            iters=self.n_iter_,
            loss_history=[],
            is_converged=self.is_converged,
            weight={},
            intercept=0,
            need_one_vs_rest=self.need_one_vs_rest,
            header=header)
        from google.protobuf import json_format
        json_result = json_format.MessageToJson(param_protobuf_obj)
        LOGGER.debug("json_result: {}".format(json_result))
        return param_protobuf_obj
예제 #3
0
파일: homo_lr_test.py 프로젝트: zzzcq/FATE
class TestHomoLR(unittest.TestCase):
    def setUp(self):

        self.guest_X = np.array([[1, 2, 3, 4, 5], [3, 2, 4, 5, 1],
                                 [
                                     2,
                                     2,
                                     3,
                                     1,
                                     1,
                                 ]]) / 10
        self.guest_Y = np.array([[1], [1], [-1]])

        self.values = []
        for idx, x in enumerate(self.guest_X):
            inst = Instance(inst_id=idx, features=x, label=self.guest_Y[idx])
            self.values.append((idx, inst))

        self.host_X = np.array([[1, 1.2, 3.1, 4, 5], [2.3, 2, 4, 5.3, 1],
                                [
                                    2,
                                    2.2,
                                    1.3,
                                    1,
                                    1.6,
                                ]]) / 10
        self.host_Y = np.array([[-1], [1], [-1]])

        self.host_values = []
        for idx, x in enumerate(self.host_X):
            inst = Instance(inst_id=idx, features=x, label=self.host_Y[idx])
            self.values.append((idx, inst))

        self.max_iter = 10
        self.alpha = 0.01
        self.learning_rate = 0.01
        optimizer = 'SGD'
        self.gradient_operator = LogisticGradient()
        self.initializer = Initializer()
        self.fit_intercept = True
        self.init_param_obj = InitParam(fit_intercept=self.fit_intercept)
        self.updater = L2Updater(self.alpha, self.learning_rate)
        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=optimizer)
        self.__init_model()

    def __init_model(self):
        model_shape = self.guest_X.shape[1]
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0
        return w

    def __init_host_model(self):
        model_shape = self.host_X.shape[1]
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.host_coef_ = w[:-1]
            self.host_intercept_ = w[-1]
        else:
            self.host_coef_ = w
            self.host_intercept_ = 0
        return w

    def test_one_iter(self):
        w = self.__init_model()
        print("before training, coef: {}, intercept: {}".format(
            self.coef_, self.intercept_))
        self.assertEqual(self.coef_.shape[0], self.guest_X.shape[1])
        grad, loss = self.gradient_operator.compute(
            self.values,
            coef=self.coef_,
            intercept=self.intercept_,
            fit_intercept=self.fit_intercept)
        loss_norm = self.updater.loss_norm(self.coef_)
        loss = loss + loss_norm
        delta_grad = self.optimizer.apply_gradients(grad)
        self.update_model(delta_grad)
        print("After training, coef: {}, intercept: {}, loss: {}".format(
            self.coef_, self.intercept_, loss))

    def test_multi_iter(self):
        w = self.__init_model()
        loss_hist = [100]
        for iter_num in range(self.max_iter):
            grad, loss = self.gradient_operator.compute(
                self.values,
                coef=self.coef_,
                intercept=self.intercept_,
                fit_intercept=self.fit_intercept)
            loss_norm = self.updater.loss_norm(self.coef_)
            loss = loss + loss_norm
            delta_grad = self.optimizer.apply_gradients(grad)
            self.update_model(delta_grad)
            self.assertTrue(loss <= loss_hist[-1])
            loss_hist.append(loss)
        print(loss_hist)

    def test_host_iter(self):
        w = self.__init_host_model()
        print("before training, coef: {}, intercept: {}".format(
            self.coef_, self.intercept_))
        self.assertEqual(self.host_coef_.shape[0], self.host_X.shape[1])
        grad, loss = self.gradient_operator.compute(
            self.host_values,
            coef=self.host_coef_,
            intercept=self.intercept_,
            fit_intercept=self.fit_intercept)
        loss_norm = self.updater.loss_norm(self.coef_)
        # print("***********************************************")
        # print(loss, loss_norm)
        self.assertTrue(loss is None)

    def update_model(self, gradient):
        LOGGER.debug(
            "In update_model function, shape of coef: {}, shape of gradient: {}"
            .format(np.shape(self.coef_), np.shape(gradient)))
        if self.fit_intercept:
            if self.updater is not None:
                self.coef_ = self.updater.update_coef(self.coef_,
                                                      gradient[:-1])
            else:
                self.coef_ = self.coef_ - gradient[:-1]
            self.intercept_ -= gradient[-1]

        else:
            if self.updater is not None:
                self.coef_ = self.updater.update_coef(self.coef_, gradient)
            else:
                self.coef_ = self.coef_ - gradient
예제 #4
0
class HomoLRHost(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRHost, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.batch_size = params.batch_size
        self.encrypt_params = params.encrypt_param

        if self.encrypt_params.method in [consts.PAILLIER]:
            self.use_encrypt = True
        else:
            self.use_encrypt = False

        if self.use_encrypt and params.penalty != consts.L2_PENALTY:
            raise RuntimeError("Encrypted h**o-lr supports L2 penalty only")

        if self.use_encrypt:
            self.gradient_operator = TaylorLogisticGradient()
            self.re_encrypt_batches = params.re_encrypt_batches
        else:
            self.gradient_operator = LogisticGradient()

        self.aggregator = HomoFederatedAggregator()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.mini_batch_obj = None
        self.evaluator = Evaluation(classi_type=consts.BINARY)
        self.classes_ = [0, 1]
        self.has_sychronized_encryption = False

    def fit(self, data_instances):
        LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}"
                    "batch_size: {}".format(self.alpha,
                                            self.eps, self.max_iter, self.batch_size))
        self.__init_parameters(data_instances)

        w = self.__init_model(data_instances)

        for iter_num in range(self.max_iter):
            # mini-batch
            LOGGER.debug("In iter: {}".format(iter_num))
            batch_data_generator = self.mini_batch_obj.mini_batch_data_generator()
            batch_num = 0
            total_loss = 0
            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                n = grad_loss.count()
                if not self.use_encrypt:
                    grad, loss = grad_loss.reduce(self.aggregator.aggregate_grad_loss)
                    grad = np.array(grad)
                    grad /= n
                    loss /= n
                    # gradient_regular, loss_regular = self.updater.compute(w)
                    if self.updater is not None:
                        loss_norm = self.updater.loss_norm(self.coef_)
                        total_loss += loss + loss_norm
                    # LOGGER.debug("iter: {}, grad: {}, loss: {}".format(iter_num, grad, loss))
                else:
                    grad, _ = grad_loss.reduce(self.aggregator.aggregate_grad)
                    grad = np.array(grad)
                    grad /= n
                    # gradient_regular = self.updater.gradient_norm(w)
                # grad += gradient_regular
                # grad = np.array(grad)
                self.update_model(grad)
                w = self.merge_model()

                batch_num += 1
                if self.use_encrypt and batch_num % self.re_encrypt_batches == 0:
                    to_encrypt_model_id = self.transfer_variable.generate_transferid(
                        self.transfer_variable.to_encrypt_model, iter_num, batch_num
                    )
                    federation.remote(w,
                                      name=self.transfer_variable.to_encrypt_model.name,
                                      tag=to_encrypt_model_id,
                                      role=consts.ARBITER,
                                      idx=0)

                    re_encrypted_model_id = self.transfer_variable.generate_transferid(
                        self.transfer_variable.re_encrypted_model, iter_num, batch_num
                    )
                    LOGGER.debug("re_encrypted_model_id: {}".format(re_encrypted_model_id))
                    w = federation.get(name=self.transfer_variable.re_encrypted_model.name,
                                       tag=re_encrypted_model_id,
                                       idx=0)
                    w = np.array(w)
                    self.set_coef_(w)

            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.host_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.host_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            if not self.use_encrypt:
                loss_transfer_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.host_loss, iter_num)
                federation.remote(total_loss,
                                  name=self.transfer_variable.host_loss.name,
                                  tag=loss_transfer_id,
                                  role=consts.ARBITER,
                                  idx=0)
            LOGGER.debug("model and loss sent")

            final_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=final_model_id,
                               idx=0)
            w = np.array(w)
            # LOGGER.debug("Recevide model from arbiter, model: {}".format(w))
            self.set_coef_(w)

            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(name=self.transfer_variable.converge_flag.name,
                                           tag=converge_flag_id,
                                           idx=0)
            self.n_iter_ = iter_num
            LOGGER.debug("converge_flag: {}".format(converge_flag))
            if converge_flag:
                break
                # self.save_model()

    def __init_parameters(self, data_instances):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.host_party_weight
        )
        # LOGGER.debug("party_weight_id: {}".format(party_weight_id))
        federation.remote(self.party_weight,
                          name=self.transfer_variable.host_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        self.__synchronize_encryption()

        # Send re-encrypt times
        self.mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size)
        if self.use_encrypt:
            # LOGGER.debug("Use encryption, send re_encrypt_times")
            total_batch_num = self.mini_batch_obj.batch_nums
            re_encrypt_times = total_batch_num // self.re_encrypt_batches
            transfer_id = self.transfer_variable.generate_transferid(self.transfer_variable.re_encrypt_times)
            federation.remote(re_encrypt_times,
                              name=self.transfer_variable.re_encrypt_times.name,
                              tag=transfer_id,
                              role=consts.ARBITER,
                              idx=0)
            LOGGER.info("sent re_encrypt_times: {}".format(re_encrypt_times))

    def __synchronize_encryption(self):
        """
        Communicate with hosts. Specify whether use encryption or not and transfer the public keys.
        """
        # Send if this host use encryption or not
        use_encryption_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.use_encrypt
        )
        federation.remote(self.use_encrypt,
                          name=self.transfer_variable.use_encrypt.name,
                          tag=use_encryption_id,
                          role=consts.ARBITER,
                          idx=0)

        # Set public key
        if self.use_encrypt:
            pubkey_id = self.transfer_variable.generate_transferid(self.transfer_variable.paillier_pubkey)
            pubkey = federation.get(name=self.transfer_variable.paillier_pubkey.name,
                                    tag=pubkey_id,
                                    idx=0)
            self.encrypt_operator.set_public_key(pubkey)
        LOGGER.info("Finish synchronized ecryption")
        self.has_sychronized_encryption = True

    def predict(self, data_instances, predict_param):
        if not self.has_sychronized_encryption:
            self.__synchronize_encryption()
            self.__load_arbiter_model()
        else:
            LOGGER.info("in predict, has synchronize encryption information")

        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)

        if self.use_encrypt:
            encrypted_wx_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_wx)
            # LOGGER.debug("predict_wd_id: {}".format(encrypted_wx_id))
            federation.remote(wx,
                              name=self.transfer_variable.predict_wx.name,
                              tag=encrypted_wx_id,
                              role=consts.ARBITER,
                              idx=0)
            predict_result_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_result)
            # LOGGER.debug("predict_result_id: {}".format(predict_result_id))
            predict_result = federation.get(name=self.transfer_variable.predict_result.name,
                                            tag=predict_result_id,
                                            idx=0)
            # local_predict_table = predict_result.collect()
            predict_result_table = predict_result.join(data_instances, lambda p, d: (d.label, None, p))
        else:
            pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
            pred_label = self.classified(pred_prob, predict_param.threshold)
            if predict_param.with_proba:
                predict_result = data_instances.mapValues(lambda x: x.label)
                predict_result = predict_result.join(pred_prob, lambda x, y: (x, y))
            else:
                predict_result = data_instances.mapValues(lambda x: (x.label, None))
            predict_result_table = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y))
        return predict_result_table

    def __init_model(self, data_instances):
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)

        w = self.encrypt_operator.encrypt_list(w)
        w = np.array(w)

        # LOGGER.debug("self use encryption: {}, w: {}, type of w: {}".format(self.use_encrypt, w, type(w)))
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0
        # LOGGER.debug("Type of coef: {}".format(type(self.coef_)))
        return w

    def __load_arbiter_model(self):
        final_model_id = self.transfer_variable.generate_transferid(self.transfer_variable.final_model, "predict")
        final_model = federation.get(name=self.transfer_variable.final_model.name,
                                     tag=final_model_id,
                                     idx=0)
        self.set_coef_(final_model)
예제 #5
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRGuest, self).__init__(params)
        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
        self.header = []
        self.penalty = params.penalty
        self.loss_history = []
        self.is_converged = False

    def fit(self, data_instances):
        self._abnormal_detection(data_instances)

        self.header = data_instances.schema.get(
            'header')  # ['x1', 'x2', 'x3' ... ]

        self.__init_parameters()

        self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)

        for iter_num in range(self.max_iter):
            # mini-batch
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0

            for batch_data in batch_data_generator:
                n = batch_data.count()

                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)

                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                delta_grad = self.optimizer.apply_gradients(grad)

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            self.loss_history.append(total_loss)
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # send loss

            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)
            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)

            w = np.array(w)
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)

            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                self.is_converged = True
                break

        self.show_meta()
        self.show_model()
        LOGGER.debug("in fit self coef: {}".format(self.coef_))
        return data_instances

    def __init_parameters(self):
        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        LOGGER.debug("coef: {}, intercept: {}".format(self.coef_,
                                                      self.intercept_))
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)
예제 #6
0
class HomoLRGuest(HomoLRBase):
    def __init__(self):
        super(HomoLRGuest, self).__init__()
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()

        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
        self.loss_history = []
        self.is_converged = False
        self.role = consts.GUEST

    def fit(self, data_instances):
        if not self.need_run:
            return data_instances

        self._abnormal_detection(data_instances)
        self.init_schema(data_instances)
        self.__init_parameters()

        self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)

        for iter_num in range(self.max_iter):
            # mini-batch
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0

            for batch_data in batch_data_generator:
                n = batch_data.count()

                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)

                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                delta_grad = self.optimizer.apply_gradients(grad)

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num

            # if not self.use_loss:
            #     total_loss = np.linalg.norm(self.coef_)

            w = self.merge_model()
            if not self.need_one_vs_rest:
                metric_meta = MetricMeta(name='train',
                                         metric_type="LOSS",
                                         extra_metas={
                                             "unit_name": "iters",
                                         })
                # metric_name = self.get_metric_name('loss')

                self.callback_meta(metric_name='loss',
                                   metric_namespace='train',
                                   metric_meta=metric_meta)
                self.callback_metric(
                    metric_name='loss',
                    metric_namespace='train',
                    metric_data=[Metric(iter_num, total_loss)])

            self.loss_history.append(total_loss)
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            LOGGER.debug("Start to remote model: {}, transfer_id: {}".format(
                w, model_transfer_id))

            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # send loss
            # if self.use_loss:
            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            LOGGER.debug(
                "Start to remote total_loss: {}, transfer_id: {}".format(
                    total_loss, loss_transfer_id))
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)
            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)

            w = np.array(w)
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)

            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                self.is_converged = True
                break

    def __init_parameters(self):
        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        LOGGER.debug(
            "Start to remote party_weight: {}, transfer_id: {}".format(
                self.party_weight, party_weight_id))
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances):

        if not self.need_run:
            return data_instances
        LOGGER.debug(
            "homo_lr guest need run predict, coef: {}, instercept: {}".format(
                len(self.coef_), self.intercept_))
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, self.predict_param.threshold)

        predict_result = data_instances.mapValues(lambda x: x.label)
        predict_result = predict_result.join(pred_prob, lambda x, y: (x, y))
        predict_result = predict_result.join(
            pred_label,
            lambda x, y: [x[0], y, x[1], {
                "1": x[1],
                "0": (1 - x[1])
            }])
        return predict_result
예제 #7
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        """
        :param penalty: l1 or l2
        :param alpha:
        :param lr:
        :param eps:
        :param max_iter:
        :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad']
        :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size
        """
        super(HomoLRGuest, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()

    def fit(self, data_instances):
        LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}"
                    "batch_size: {}".format(self.alpha, self.eps,
                                            self.max_iter, self.batch_size))
        self.__init_parameters()

        w = self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)
        for iter_num in range(self.max_iter):
            # mini-batch
            # LOGGER.debug("Enter iter_num: {}".format(iter_num))
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0
            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)
                n = grad_loss.count()
                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)
                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                # LOGGER.debug("before update: {}".format(grad))
                delta_grad = self.optimizer.apply_gradients(grad)
                # LOGGER.debug("after apply: {}".format(delta_grad))

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)
            # send loss
            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)
            w = np.array(w)
            # LOGGER.debug("Received final model: {}".format(w))
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)
            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                # self.save_model(w)
                break
        # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format(
        #     self.coef_, self.intercept_))

    def __init_parameters(self):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)
        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = self.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)