예제 #1
0
    def __init__(self, params: LogisticParam):
        super(HomoLRArbiter, self).__init__(params)
        self.re_encrypt_batches = params.re_encrypt_batches
        self.aggregator = HomoFederatedAggregator()
        if params.converge_func == 'diff':
            self.convege_func = DiffConverge(eps=self.eps)
        else:
            raise RuntimeWarning(
                "Cannot recognize converge_func, must be 'eps'.")
        self.transfer_variable = HomoLRTransferVariable()

        self.predict_threshold = params
        self.encrypt_param = params.encrypt_param
        self.classes_ = [0, 1]

        # To be initialized
        self.host_use_encryption = []
        self.re_encrypt_times = []  # Record the times needed for each host
        self.curt_re_encrypt_times = []
        self.host_encrypter = []
        self.party_weights = [
        ]  # The first one is guest weight, host weights for otherwise
        self.has_sychronized_encryption = False
        self.loss_history = []
        self.is_converged = False
        self.header = []
예제 #2
0
    def __init__(self, params: LogisticParam):
        super(HomoLRHost, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.batch_size = params.batch_size
        self.encrypt_params = params.encrypt_param

        if self.encrypt_params.method in [consts.PAILLIER]:
            self.use_encrypt = True
        else:
            self.use_encrypt = False

        if self.use_encrypt and params.penalty != consts.L2_PENALTY:
            raise RuntimeError("Encrypted h**o-lr supports L2 penalty only")

        if self.use_encrypt:
            self.gradient_operator = TaylorLogisticGradient()
            self.re_encrypt_batches = params.re_encrypt_batches
        else:
            self.gradient_operator = LogisticGradient()

        self.aggregator = HomoFederatedAggregator()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.mini_batch_obj = None
        self.evaluator = Evaluation(classi_type=consts.BINARY)
        self.classes_ = [0, 1]
        self.has_sychronized_encryption = False
예제 #3
0
    def __init__(self, params: LogisticParam):
        super(HomoLRGuest, self).__init__(params)
        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
        self.header = []
        self.penalty = params.penalty
        self.loss_history = []
        self.is_converged = False
예제 #4
0
    def __init__(self, params: LogisticParam):
        """
        :param penalty: l1 or l2
        :param alpha:
        :param lr:
        :param eps:
        :param max_iter:
        :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad']
        :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size
        """
        super(HomoLRGuest, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
예제 #5
0
class HomoLRArbiter(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRArbiter, self).__init__(params)
        self.re_encrypt_batches = params.re_encrypt_batches
        self.aggregator = HomoFederatedAggregator()
        if params.converge_func == 'diff':
            self.convege_func = DiffConverge(eps=self.eps)
        else:
            raise RuntimeWarning(
                "Cannot recognize converge_func, must be 'eps'.")
        self.transfer_variable = HomoLRTransferVariable()

        self.predict_threshold = params
        self.encrypt_param = params.encrypt_param
        self.classes_ = [0, 1]

        # To be initialized
        self.host_use_encryption = []
        self.re_encrypt_times = []  # Record the times needed for each host
        self.curt_re_encrypt_times = []
        self.host_encrypter = []
        self.party_weights = [
        ]  # The first one is guest weight, host weights for otherwise
        self.has_sychronized_encryption = False
        self.loss_history = []
        self.is_converged = False
        self.header = []

    def fit(self, data=None):
        LOGGER.debug("self.has_sychronized_encryption: {}".format(
            self.has_sychronized_encryption))
        self.__init_parameters()
        LOGGER.debug("self.has_sychronized_encryption: {}".format(
            self.has_sychronized_encryption))

        LOGGER.info("Finish init parameters")

        for iter_num in range(self.max_iter):
            # re_encrypt host models
            self.__re_encrypt(iter_num)

            # Part3: Aggregate models receive from each party
            final_model = self.aggregator.aggregate_model(
                transfer_variable=self.transfer_variable,
                iter_num=iter_num,
                party_weights=self.party_weights,
                host_encrypter=self.host_encrypter)
            total_loss = self.aggregator.aggregate_loss(
                transfer_variable=self.transfer_variable,
                iter_num=iter_num,
                party_weights=self.party_weights,
                host_use_encryption=self.host_use_encryption)
            self.loss_history.append(total_loss)
            LOGGER.info("Iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            final_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)
            federation.remote(final_model,
                              name=self.transfer_variable.final_model.name,
                              tag=final_model_id,
                              role=consts.GUEST,
                              idx=0)
            for idx, encrypter in enumerate(self.host_encrypter):
                encrypted_model = encrypter.encrypt_list(final_model)

                federation.remote(encrypted_model,
                                  name=self.transfer_variable.final_model.name,
                                  tag=final_model_id,
                                  role=consts.HOST,
                                  idx=idx)

            # send converge flag
            converge_flag = self.convege_func.is_converge(total_loss)
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)

            federation.remote(converge_flag,
                              name=self.transfer_variable.converge_flag.name,
                              tag=converge_flag_id,
                              role=consts.GUEST,
                              idx=0)
            federation.remote(converge_flag,
                              name=self.transfer_variable.converge_flag.name,
                              tag=converge_flag_id,
                              role=consts.HOST,
                              idx=-1)
            self.set_coef_(final_model)
            self.n_iter_ = iter_num
            if converge_flag:
                self.is_converged = True
                break
        self._set_header()

    def predict(self, data=None, predict_param=None):
        # synchronize encryption information
        if not self.has_sychronized_encryption:
            self.__synchronize_encryption()
            self.__send_host_mode()

        for idx, use_encrypt in enumerate(self.host_use_encryption):
            if use_encrypt:
                encrypter = self.host_encrypter[idx]
                predict_wx_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.predict_wx)
                predict_wx = federation.get(
                    name=self.transfer_variable.predict_wx.name,
                    tag=predict_wx_id,
                    idx=idx)
                decrypted_wx = encrypter.distribute_decrypt(predict_wx)
                pred_prob = decrypted_wx.mapValues(
                    lambda x: activation.sigmoid(x))
                pred_label = self.classified(pred_prob,
                                             predict_param.threshold)
                predict_result_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.predict_result)
                federation.remote(
                    pred_label,
                    name=self.transfer_variable.predict_result.name,
                    tag=predict_result_id,
                    role=consts.HOST,
                    idx=idx)
        LOGGER.info("Finish predicting, result has been sent back")
        return

    def __init_parameters(self):
        """
        This function is used to synchronized the parameters from each guest and host.
        :return:
        """
        # 1. Receive the party weight of each party
        # LOGGER.debug("To receive guest party weight")
        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        guest_weight = federation.get(
            name=self.transfer_variable.guest_party_weight.name,
            tag=party_weight_id,
            idx=0)

        # LOGGER.debug("Received guest_weight: {}".format(guest_weight))
        host_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.host_party_weight)
        host_weights = federation.get(
            name=self.transfer_variable.host_party_weight.name,
            tag=host_weight_id,
            idx=-1)
        weights = [guest_weight]
        weights.extend(host_weights)

        self.party_weights = [x / sum(weights) for x in weights]

        # 2. Synchronize encryption information
        self.__synchronize_encryption()

        # 3. Receive re-encrypt-times
        self.re_encrypt_times = [0] * len(self.host_use_encryption)
        for idx, use_encryption in enumerate(self.host_use_encryption):
            if not use_encryption:
                self.re_encrypt_times[idx] = 0
                continue
            re_encrypt_times_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.re_encrypt_times)
            re_encrypt_times = federation.get(
                name=self.transfer_variable.re_encrypt_times.name,
                tag=re_encrypt_times_id,
                idx=idx)
            self.re_encrypt_times[idx] = re_encrypt_times
        LOGGER.info("re encrypt times for all parties: {}".format(
            self.re_encrypt_times))

    def __synchronize_encryption(self):
        """
        Communicate with hosts. Specify whether use encryption or not and transfer the public keys.
        """
        # 1. Use Encrypt: Specify which host use encryption
        host_use_encryption_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.use_encrypt)
        host_use_encryption = federation.get(
            name=self.transfer_variable.use_encrypt.name,
            tag=host_use_encryption_id,
            idx=-1)
        self.host_use_encryption = host_use_encryption

        LOGGER.info("host use encryption: {}".format(self.host_use_encryption))
        # 2. Send pubkey to those use-encryption hosts
        for idx, use_encryption in enumerate(self.host_use_encryption):
            if not use_encryption:
                encrypter = FakeEncrypt()
            else:
                encrypter = PaillierEncrypt()
                encrypter.generate_key(self.encrypt_param.key_length)
                pub_key = encrypter.get_public_key()
                pubkey_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.paillier_pubkey)
                federation.remote(
                    pub_key,
                    name=self.transfer_variable.paillier_pubkey.name,
                    tag=pubkey_id,
                    role=consts.HOST,
                    idx=idx)
                # LOGGER.debug("send pubkey to host: {}".format(idx))

            self.host_encrypter.append(encrypter)
        self.has_sychronized_encryption = True

    def __send_host_mode(self):
        model = self.merge_model()
        final_model_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.final_model, "predict")
        for idx, use_encrypt in enumerate(self.host_use_encryption):
            if use_encrypt:
                encrypter = self.host_encrypter[idx]
                final_model = encrypter.encrypt_list(model)
            else:
                final_model = model
            federation.remote(final_model,
                              name=self.transfer_variable.final_model.name,
                              tag=final_model_id,
                              role=consts.HOST,
                              idx=idx)

    def __re_encrypt(self, iter_num):
        # If use encrypt, model weight need to be re-encrypt every several batches.
        self.curt_re_encrypt_times = self.re_encrypt_times.copy()

        # Part2: re-encrypt model weight from each host
        batch_num = 0
        while True:
            batch_num += self.re_encrypt_batches

            to_encrypt_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.to_encrypt_model, iter_num, batch_num)
            re_encrypted_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.re_encrypted_model, iter_num, batch_num)
            for idx, left_times in enumerate(self.curt_re_encrypt_times):
                if left_times <= 0:
                    continue
                re_encrypt_model = federation.get(
                    name=self.transfer_variable.to_encrypt_model.name,
                    tag=to_encrypt_model_id,
                    idx=idx)
                encrypter = self.host_encrypter[idx]
                decrypt_model = encrypter.decrypt_list(re_encrypt_model)
                re_encrypt_model = encrypter.encrypt_list(decrypt_model)
                federation.remote(
                    re_encrypt_model,
                    name=self.transfer_variable.re_encrypted_model.name,
                    tag=re_encrypted_model_id,
                    role=consts.HOST,
                    idx=idx)

                left_times -= 1
                self.curt_re_encrypt_times[idx] = left_times

            if sum(self.curt_re_encrypt_times) == 0:
                break

    def _set_header(self):
        self.header = ['head_' + str(x) for x in range(len(self.coef_))]
예제 #6
0
class HomoLRHost(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRHost, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.batch_size = params.batch_size
        self.encrypt_params = params.encrypt_param

        if self.encrypt_params.method in [consts.PAILLIER]:
            self.use_encrypt = True
        else:
            self.use_encrypt = False

        if self.use_encrypt and params.penalty != consts.L2_PENALTY:
            raise RuntimeError("Encrypted h**o-lr supports L2 penalty only")

        if self.use_encrypt:
            self.gradient_operator = TaylorLogisticGradient()
            self.re_encrypt_batches = params.re_encrypt_batches
        else:
            self.gradient_operator = LogisticGradient()

        self.aggregator = HomoFederatedAggregator()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.mini_batch_obj = None
        self.evaluator = Evaluation(classi_type=consts.BINARY)
        self.classes_ = [0, 1]
        self.has_sychronized_encryption = False

    def fit(self, data_instances):
        LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}"
                    "batch_size: {}".format(self.alpha,
                                            self.eps, self.max_iter, self.batch_size))
        self.__init_parameters(data_instances)

        w = self.__init_model(data_instances)

        for iter_num in range(self.max_iter):
            # mini-batch
            LOGGER.debug("In iter: {}".format(iter_num))
            batch_data_generator = self.mini_batch_obj.mini_batch_data_generator()
            batch_num = 0
            total_loss = 0
            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                n = grad_loss.count()
                if not self.use_encrypt:
                    grad, loss = grad_loss.reduce(self.aggregator.aggregate_grad_loss)
                    grad = np.array(grad)
                    grad /= n
                    loss /= n
                    # gradient_regular, loss_regular = self.updater.compute(w)
                    if self.updater is not None:
                        loss_norm = self.updater.loss_norm(self.coef_)
                        total_loss += loss + loss_norm
                    # LOGGER.debug("iter: {}, grad: {}, loss: {}".format(iter_num, grad, loss))
                else:
                    grad, _ = grad_loss.reduce(self.aggregator.aggregate_grad)
                    grad = np.array(grad)
                    grad /= n
                    # gradient_regular = self.updater.gradient_norm(w)
                # grad += gradient_regular
                # grad = np.array(grad)
                self.update_model(grad)
                w = self.merge_model()

                batch_num += 1
                if self.use_encrypt and batch_num % self.re_encrypt_batches == 0:
                    to_encrypt_model_id = self.transfer_variable.generate_transferid(
                        self.transfer_variable.to_encrypt_model, iter_num, batch_num
                    )
                    federation.remote(w,
                                      name=self.transfer_variable.to_encrypt_model.name,
                                      tag=to_encrypt_model_id,
                                      role=consts.ARBITER,
                                      idx=0)

                    re_encrypted_model_id = self.transfer_variable.generate_transferid(
                        self.transfer_variable.re_encrypted_model, iter_num, batch_num
                    )
                    LOGGER.debug("re_encrypted_model_id: {}".format(re_encrypted_model_id))
                    w = federation.get(name=self.transfer_variable.re_encrypted_model.name,
                                       tag=re_encrypted_model_id,
                                       idx=0)
                    w = np.array(w)
                    self.set_coef_(w)

            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.host_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.host_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            if not self.use_encrypt:
                loss_transfer_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.host_loss, iter_num)
                federation.remote(total_loss,
                                  name=self.transfer_variable.host_loss.name,
                                  tag=loss_transfer_id,
                                  role=consts.ARBITER,
                                  idx=0)
            LOGGER.debug("model and loss sent")

            final_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=final_model_id,
                               idx=0)
            w = np.array(w)
            # LOGGER.debug("Recevide model from arbiter, model: {}".format(w))
            self.set_coef_(w)

            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(name=self.transfer_variable.converge_flag.name,
                                           tag=converge_flag_id,
                                           idx=0)
            self.n_iter_ = iter_num
            LOGGER.debug("converge_flag: {}".format(converge_flag))
            if converge_flag:
                break
                # self.save_model()

    def __init_parameters(self, data_instances):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.host_party_weight
        )
        # LOGGER.debug("party_weight_id: {}".format(party_weight_id))
        federation.remote(self.party_weight,
                          name=self.transfer_variable.host_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        self.__synchronize_encryption()

        # Send re-encrypt times
        self.mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size)
        if self.use_encrypt:
            # LOGGER.debug("Use encryption, send re_encrypt_times")
            total_batch_num = self.mini_batch_obj.batch_nums
            re_encrypt_times = total_batch_num // self.re_encrypt_batches
            transfer_id = self.transfer_variable.generate_transferid(self.transfer_variable.re_encrypt_times)
            federation.remote(re_encrypt_times,
                              name=self.transfer_variable.re_encrypt_times.name,
                              tag=transfer_id,
                              role=consts.ARBITER,
                              idx=0)
            LOGGER.info("sent re_encrypt_times: {}".format(re_encrypt_times))

    def __synchronize_encryption(self):
        """
        Communicate with hosts. Specify whether use encryption or not and transfer the public keys.
        """
        # Send if this host use encryption or not
        use_encryption_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.use_encrypt
        )
        federation.remote(self.use_encrypt,
                          name=self.transfer_variable.use_encrypt.name,
                          tag=use_encryption_id,
                          role=consts.ARBITER,
                          idx=0)

        # Set public key
        if self.use_encrypt:
            pubkey_id = self.transfer_variable.generate_transferid(self.transfer_variable.paillier_pubkey)
            pubkey = federation.get(name=self.transfer_variable.paillier_pubkey.name,
                                    tag=pubkey_id,
                                    idx=0)
            self.encrypt_operator.set_public_key(pubkey)
        LOGGER.info("Finish synchronized ecryption")
        self.has_sychronized_encryption = True

    def predict(self, data_instances, predict_param):
        if not self.has_sychronized_encryption:
            self.__synchronize_encryption()
            self.__load_arbiter_model()
        else:
            LOGGER.info("in predict, has synchronize encryption information")

        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)

        if self.use_encrypt:
            encrypted_wx_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_wx)
            # LOGGER.debug("predict_wd_id: {}".format(encrypted_wx_id))
            federation.remote(wx,
                              name=self.transfer_variable.predict_wx.name,
                              tag=encrypted_wx_id,
                              role=consts.ARBITER,
                              idx=0)
            predict_result_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_result)
            # LOGGER.debug("predict_result_id: {}".format(predict_result_id))
            predict_result = federation.get(name=self.transfer_variable.predict_result.name,
                                            tag=predict_result_id,
                                            idx=0)
            # local_predict_table = predict_result.collect()
            predict_result_table = predict_result.join(data_instances, lambda p, d: (d.label, None, p))
        else:
            pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
            pred_label = self.classified(pred_prob, predict_param.threshold)
            if predict_param.with_proba:
                predict_result = data_instances.mapValues(lambda x: x.label)
                predict_result = predict_result.join(pred_prob, lambda x, y: (x, y))
            else:
                predict_result = data_instances.mapValues(lambda x: (x.label, None))
            predict_result_table = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y))
        return predict_result_table

    def __init_model(self, data_instances):
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)

        w = self.encrypt_operator.encrypt_list(w)
        w = np.array(w)

        # LOGGER.debug("self use encryption: {}, w: {}, type of w: {}".format(self.use_encrypt, w, type(w)))
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0
        # LOGGER.debug("Type of coef: {}".format(type(self.coef_)))
        return w

    def __load_arbiter_model(self):
        final_model_id = self.transfer_variable.generate_transferid(self.transfer_variable.final_model, "predict")
        final_model = federation.get(name=self.transfer_variable.final_model.name,
                                     tag=final_model_id,
                                     idx=0)
        self.set_coef_(final_model)
예제 #7
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRGuest, self).__init__(params)
        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
        self.header = []
        self.penalty = params.penalty
        self.loss_history = []
        self.is_converged = False

    def fit(self, data_instances):
        self._abnormal_detection(data_instances)

        self.header = data_instances.schema.get(
            'header')  # ['x1', 'x2', 'x3' ... ]

        self.__init_parameters()

        self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)

        for iter_num in range(self.max_iter):
            # mini-batch
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0

            for batch_data in batch_data_generator:
                n = batch_data.count()

                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)

                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                delta_grad = self.optimizer.apply_gradients(grad)

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            self.loss_history.append(total_loss)
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # send loss

            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)
            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)

            w = np.array(w)
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)

            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                self.is_converged = True
                break

        self.show_meta()
        self.show_model()
        LOGGER.debug("in fit self coef: {}".format(self.coef_))
        return data_instances

    def __init_parameters(self):
        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        LOGGER.debug("coef: {}, intercept: {}".format(self.coef_,
                                                      self.intercept_))
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)
예제 #8
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        """
        :param penalty: l1 or l2
        :param alpha:
        :param lr:
        :param eps:
        :param max_iter:
        :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad']
        :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size
        """
        super(HomoLRGuest, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()

    def fit(self, data_instances):
        LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}"
                    "batch_size: {}".format(self.alpha, self.eps,
                                            self.max_iter, self.batch_size))
        self.__init_parameters()

        w = self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)
        for iter_num in range(self.max_iter):
            # mini-batch
            # LOGGER.debug("Enter iter_num: {}".format(iter_num))
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0
            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)
                n = grad_loss.count()
                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)
                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                # LOGGER.debug("before update: {}".format(grad))
                delta_grad = self.optimizer.apply_gradients(grad)
                # LOGGER.debug("after apply: {}".format(delta_grad))

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)
            # send loss
            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)
            w = np.array(w)
            # LOGGER.debug("Received final model: {}".format(w))
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)
            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                # self.save_model(w)
                break
        # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format(
        #     self.coef_, self.intercept_))

    def __init_parameters(self):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)
        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = self.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)