예제 #1
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        super(HomoLRGuest, self).__init__(params)
        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()
        self.header = []
        self.penalty = params.penalty
        self.loss_history = []
        self.is_converged = False

    def fit(self, data_instances):
        self._abnormal_detection(data_instances)

        self.header = data_instances.schema.get(
            'header')  # ['x1', 'x2', 'x3' ... ]

        self.__init_parameters()

        self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)

        for iter_num in range(self.max_iter):
            # mini-batch
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0

            for batch_data in batch_data_generator:
                n = batch_data.count()

                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)

                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)

                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                delta_grad = self.optimizer.apply_gradients(grad)

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            self.loss_history.append(total_loss)
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # send loss

            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)
            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)

            w = np.array(w)
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)

            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                self.is_converged = True
                break

        self.show_meta()
        self.show_model()
        LOGGER.debug("in fit self coef: {}".format(self.coef_))
        return data_instances

    def __init_parameters(self):
        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)

        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = data_overview.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        LOGGER.debug("coef: {}, intercept: {}".format(self.coef_,
                                                      self.intercept_))
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)
예제 #2
0
class HomoLRGuest(BaseLogisticRegression):
    def __init__(self, params: LogisticParam):
        """
        :param penalty: l1 or l2
        :param alpha:
        :param lr:
        :param eps:
        :param max_iter:
        :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad']
        :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size
        """
        super(HomoLRGuest, self).__init__(params)

        self.learning_rate = params.learning_rate
        self.aggregator = HomoFederatedAggregator
        self.gradient_operator = LogisticGradient()
        self.party_weight = params.party_weight

        self.optimizer = Optimizer(learning_rate=self.learning_rate,
                                   opt_method_name=params.optimizer)
        self.transfer_variable = HomoLRTransferVariable()
        self.initializer = Initializer()
        self.classes_ = [0, 1]

        self.evaluator = Evaluation()

    def fit(self, data_instances):
        LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}"
                    "batch_size: {}".format(self.alpha, self.eps,
                                            self.max_iter, self.batch_size))
        self.__init_parameters()

        w = self.__init_model(data_instances)

        mini_batch_obj = MiniBatch(data_inst=data_instances,
                                   batch_size=self.batch_size)
        for iter_num in range(self.max_iter):
            # mini-batch
            # LOGGER.debug("Enter iter_num: {}".format(iter_num))
            batch_data_generator = mini_batch_obj.mini_batch_data_generator()
            total_loss = 0
            batch_num = 0
            for batch_data in batch_data_generator:
                f = functools.partial(self.gradient_operator.compute,
                                      coef=self.coef_,
                                      intercept=self.intercept_,
                                      fit_intercept=self.fit_intercept)
                grad_loss = batch_data.mapPartitions(f)
                n = grad_loss.count()
                grad, loss = grad_loss.reduce(
                    self.aggregator.aggregate_grad_loss)
                grad /= n
                loss /= n

                if self.updater is not None:
                    loss_norm = self.updater.loss_norm(self.coef_)
                    total_loss += (loss + loss_norm)
                # LOGGER.debug("before update: {}".format(grad))
                delta_grad = self.optimizer.apply_gradients(grad)
                # LOGGER.debug("after apply: {}".format(delta_grad))

                self.update_model(delta_grad)
                batch_num += 1

            total_loss /= batch_num
            w = self.merge_model()
            LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss))
            # send model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_model, iter_num)
            federation.remote(w,
                              name=self.transfer_variable.guest_model.name,
                              tag=model_transfer_id,
                              role=consts.ARBITER,
                              idx=0)
            # send loss
            loss_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.guest_loss, iter_num)
            federation.remote(total_loss,
                              name=self.transfer_variable.guest_loss.name,
                              tag=loss_transfer_id,
                              role=consts.ARBITER,
                              idx=0)

            # recv model
            model_transfer_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.final_model, iter_num)

            w = federation.get(name=self.transfer_variable.final_model.name,
                               tag=model_transfer_id,
                               idx=0)
            w = np.array(w)
            # LOGGER.debug("Received final model: {}".format(w))
            self.set_coef_(w)

            # recv converge flag
            converge_flag_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.converge_flag, iter_num)
            converge_flag = federation.get(
                name=self.transfer_variable.converge_flag.name,
                tag=converge_flag_id,
                idx=0)
            self.n_iter_ = iter_num
            LOGGER.debug("converge flag is :{}".format(converge_flag))

            if converge_flag:
                # self.save_model(w)
                break
        # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format(
        #     self.coef_, self.intercept_))

    def __init_parameters(self):

        party_weight_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.guest_party_weight)
        federation.remote(self.party_weight,
                          name=self.transfer_variable.guest_party_weight.name,
                          tag=party_weight_id,
                          role=consts.ARBITER,
                          idx=0)
        # LOGGER.debug("party weight sent")
        LOGGER.info("Finish initialize parameters")

    def __init_model(self, data_instances):
        model_shape = self.get_features_shape(data_instances)

        LOGGER.info("Initialized model shape is {}".format(model_shape))

        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        if self.fit_intercept:
            self.coef_ = w[:-1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        # LOGGER.debug("Initialed model")
        return w

    def predict(self, data_instances, predict_param):
        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)
        pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
        pred_label = self.classified(pred_prob, predict_param.threshold)

        if predict_param.with_proba:
            predict_result = data_instances.mapValues(lambda x: x.label)
            predict_result = predict_result.join(pred_prob, lambda x, y:
                                                 (x, y))
        else:
            predict_result = data_instances.mapValues(lambda x:
                                                      (x.label, None))

        predict_result = predict_result.join(pred_label, lambda x, y:
                                             (x[0], x[1], y))
        return predict_result

    def set_flowid(self, flowid=0):
        self.transfer_variable.set_flowid(flowid)