Exemple #1
0
    def fit_binary(self, data_instances, validate_data):
        self._abnormal_detection(data_instances)

        validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)
        LOGGER.debug(
            f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}"
        )

        self.header = self.get_header(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        self.batch_generator.initialize_batch_generator(data_instances)
        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        LOGGER.debug("model_shape: {}, w shape: {}, w: {}".format(
            model_shape, w.shape, w))
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.init_param_obj.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:" + str(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            batch_index = 0
            self.optimizer.set_iters(self.n_iter_)
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)
                LOGGER.debug(
                    f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}"
                )

                optim_host_gradient, fore_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)
                LOGGER.debug(
                    'optim_host_gradient: {}'.format(optim_host_gradient))

                training_info = {
                    "iteration": self.n_iter_,
                    "batch_index": batch_index
                }
                self.update_local_model(fore_gradient, data_instances,
                                        self.model_weights.coef_,
                                        **training_info)

                self.gradient_loss_operator.compute_loss(
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index, self.cipher_operator)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(
                self.is_converged))

            validation_strategy.validate(self, self.n_iter_)

            self.n_iter_ += 1
            LOGGER.info("iter: {}, is_converged: {}".format(
                self.n_iter_, self.is_converged))
            if self.is_converged:
                break

        LOGGER.debug("Final lr weights: {}".format(self.model_weights.unboxed))
Exemple #2
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson model of role guest
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson_guest fit")
        self._abnormal_detection(data_instances)
        self.header = copy.deepcopy(self.get_header(data_instances))

        validation_strategy = self.init_validation_strategy(data_instances, validate_data)

        self.exposure_index = self.get_exposure_index(self.header, self.exposure_colname)
        if self.exposure_index > -1:
            self.header.pop(self.exposure_index)
            LOGGER.info("expsoure provided at Guest, colname is {}".format(self.exposure_colname))
        exposure = data_instances.mapValues(lambda v: self.load_exposure(v))
        data_instances = data_instances.mapValues(lambda v: self.load_instance(v))

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(data_instances, self.batch_size)
        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)
                # compute offset of this batch
                batch_offset = exposure.join(batch_feat_inst, lambda ei, d: self.safe_log(ei))

                # Start gradient procedure
                optimized_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst,
                    self.encrypted_calculator,
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index,
                    batch_offset
                )
                LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient))
                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(data_instances, self.model_weights, self.n_iter_,
                                                         batch_index, batch_offset, loss_norm)

                self.model_weights = self.optimizer.update_model(self.model_weights, optimized_gradient)

                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))
            LOGGER.info("iter: {},  is_converged: {}".format(self.n_iter_, self.is_converged))

            validation_strategy.validate(self, self.n_iter_)
            self.n_iter_ += 1
            if self.is_converged:
                break
Exemple #3
0
    def fit_binary(self, data_instances, validate_data=None):
        LOGGER.info("Enter hetero_fm_guest fit")
        self.header = self.get_header(data_instances)

        validation_strategy = self.init_validation_strategy(data_instances, validate_data)
        data_instances = data_instances.mapValues(HeteroFMGuest.load_data)
        LOGGER.debug(f"MODEL_STEP After load data, data count: {data_instances.count()}")
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(data_instances, self.batch_size)

        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]
        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)

        # intercept is initialized within FactorizationMachineWeights.
        # Skip initializer's intercept part.
        fit_intercept = False
        if self.init_param_obj.fit_intercept:
            fit_intercept = True
            self.init_param_obj.fit_intercept = False

        w_ = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
        embed_ = np.random.normal(scale=1 / np.sqrt(self.init_param_obj.embed_size),
                                  size=(model_shape, self.init_param_obj.embed_size))
        self.model_weights = \
            FactorizationMachineWeights(w_, embed_, fit_intercept=fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_data.count()}")
                # Start gradient procedure
                LOGGER.debug("iter: {}, before compute gradient, data count: {}".format(self.n_iter_,
                                                                                        batch_data.count()))
                # optim_guest_gradient, fore_gradient, host_forwards = self.gradient_loss_operator. \
                optim_guest_gradient, fore_gradient = self.gradient_loss_operator. \
                    compute_gradient_procedure(
                        batch_data,
                        self.encrypted_calculator,
                        self.model_weights,
                        self.optimizer,
                        self.n_iter_,
                        batch_index
                )
                LOGGER.debug('optim_guest_gradient: {}'.format(optim_guest_gradient))

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(data_instances, self.n_iter_, batch_index, loss_norm)

                # clip gradient
                if self.model_param.clip_gradient and self.model_param.clip_gradient > 0:
                    optim_guest_gradient = np.maximum(optim_guest_gradient, -self.model_param.clip_gradient)
                    optim_guest_gradient = np.minimum(optim_guest_gradient, self.model_param.clip_gradient)

                _model_weights = self.optimizer.update_model(self.model_weights, optim_guest_gradient)
                self.model_weights.update(_model_weights)
                batch_index += 1
                LOGGER.debug("fm_weight, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed))

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))
            LOGGER.info("iter: {},  is_converged: {}".format(self.n_iter_, self.is_converged))

            validation_strategy.validate(self, self.n_iter_)

            self.n_iter_ += 1
            if self.is_converged:
                break

        LOGGER.debug("Final fm weights: {}".format(self.model_weights.unboxed))
Exemple #4
0
    def fit_binary(self, data_instances, validate_data=None):
        LOGGER.info("Enter hetero_lr_guest fit")
        self.header = self.get_header(data_instances)

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)
        data_instances = data_instances.mapValues(HeteroLRGuest.load_data)
        LOGGER.debug(
            f"MODEL_STEP After load data, data count: {data_instances.count()}"
        )
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = batch_data
                # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}")

                # Start gradient procedure
                LOGGER.debug(
                    "iter: {}, before compute gradient, data count: {}".format(
                        self.n_iter_, batch_feat_inst.count()))
                optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                # LOGGER.debug('optim_guest_gradient: {}'.format(optim_guest_gradient))
                # training_info = {"iteration": self.n_iter_, "batch_index": batch_index}
                # self.update_local_model(fore_gradient, data_instances, self.model_weights.coef_, **training_info)

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    data_instances, self.model_weights, self.n_iter_,
                    batch_index, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_guest_gradient)
                batch_index += 1
                # LOGGER.debug("lr_weight, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed))

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            if self.validation_strategy:
                LOGGER.debug('LR guest running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            self.n_iter_ += 1

            if self.is_converged:
                break

        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
        self.set_summary(self.get_model_summary())
Exemple #5
0
    def fit(self, data_instances):
        """
        Train lr model of role guest
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_lr_guest fit")
        self._abnormal_detection(data_instances)

        self.header = self.get_header(data_instances)
        data_instances = data_instances.mapValues(HeteroLRGuest.load_data)

        # 获得密钥
        public_key = federation.get(
            name=self.transfer_variable.paillier_pubkey.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey),
            idx=0)
        LOGGER.info("Get public_key from arbiter:{}".format(public_key))
        self.encrypt_operator.set_public_key(public_key)

        LOGGER.info("Generate mini-batch from input data")
        mini_batch_obj = MiniBatch(data_instances, batch_size=self.batch_size)
        batch_num = mini_batch_obj.batch_nums
        if self.batch_size == -1:
            LOGGER.info(
                "batch size is -1, set it to the number of data in data_instances"
            )
            self.batch_size = data_instances.count()

        batch_info = {"batch_size": self.batch_size, "batch_num": batch_num}
        LOGGER.info("batch_info:{}".format(batch_info))
        federation.remote(batch_info,
                          name=self.transfer_variable.batch_info.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.batch_info),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("Remote batch_info to Host")
        federation.remote(batch_info,
                          name=self.transfer_variable.batch_info.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.batch_info),
                          role=consts.ARBITER,
                          idx=0)
        LOGGER.info("Remote batch_info to Arbiter")

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.encrypt_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(batch_num)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        weight = self.initializer.init_model(model_shape,
                                             init_params=self.init_param_obj)
        if self.init_param_obj.fit_intercept is True:
            self.coef_ = weight[:-1]
            self.intercept_ = weight[-1]
        else:
            self.coef_ = weight

        is_send_all_batch_index = False
        self.n_iter_ = 0
        index_data_inst_map = {}

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = mini_batch_obj.mini_batch_data_generator(
                result='index')

            batch_index = 0
            for batch_data_index in batch_data_generator:
                LOGGER.info("batch:{}".format(batch_index))
                if not is_send_all_batch_index:
                    LOGGER.info("remote mini-batch index to Host")
                    federation.remote(
                        batch_data_index,
                        name=self.transfer_variable.batch_data_index.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.batch_data_index,
                            self.n_iter_, batch_index),
                        role=consts.HOST,
                        idx=0)
                    if batch_index >= mini_batch_obj.batch_nums - 1:
                        is_send_all_batch_index = True

                # Get mini-batch train data
                if len(index_data_inst_map) < batch_num:
                    batch_data_inst = data_instances.join(
                        batch_data_index, lambda data_inst, index: data_inst)
                    index_data_inst_map[batch_index] = batch_data_inst
                else:
                    batch_data_inst = index_data_inst_map[batch_index]

                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data_inst)

                # guest/host forward
                self.compute_forward(batch_feat_inst, self.coef_,
                                     self.intercept_, batch_index)
                host_forward = federation.get(
                    name=self.transfer_variable.host_forward_dict.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_forward_dict, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_forward from host")
                aggregate_forward_res = self.aggregate_forward(host_forward)
                en_aggregate_wx = aggregate_forward_res.mapValues(
                    lambda v: v[0])
                en_aggregate_wx_square = aggregate_forward_res.mapValues(
                    lambda v: v[1])

                # compute [[d]]
                if self.gradient_operator is None:
                    self.gradient_operator = HeteroLogisticGradient(
                        self.encrypt_operator)
                fore_gradient = self.gradient_operator.compute_fore_gradient(
                    batch_feat_inst, en_aggregate_wx)
                federation.remote(
                    fore_gradient,
                    name=self.transfer_variable.fore_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.fore_gradient, self.n_iter_,
                        batch_index),
                    role=consts.HOST,
                    idx=0)

                LOGGER.info("Remote fore_gradient to Host")
                # compute guest gradient and loss
                guest_gradient, loss = self.gradient_operator.compute_gradient_and_loss(
                    batch_feat_inst, fore_gradient, en_aggregate_wx,
                    en_aggregate_wx_square, self.fit_intercept)

                # loss regulation if necessary
                if self.updater is not None:
                    guest_loss_regular = self.updater.loss_norm(self.coef_)
                    loss += self.encrypt_operator.encrypt(guest_loss_regular)

                federation.remote(
                    guest_gradient,
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote guest_gradient to arbiter")

                optim_guest_gradient = federation.get(
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    idx=0)
                LOGGER.info("Get optim_guest_gradient from arbiter")

                # update model
                LOGGER.info("update_model")
                self.update_model(optim_guest_gradient)

                # update local model that transforms features of raw input 'batch_data_inst'
                training_info = {
                    "iteration": self.n_iter_,
                    "batch_index": batch_index
                }
                self.update_local_model(fore_gradient, batch_data_inst,
                                        self.coef_, **training_info)

                # Get loss regulation from Host if regulation is set
                if self.updater is not None:
                    en_host_loss_regular = federation.get(
                        name=self.transfer_variable.host_loss_regular.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.host_loss_regular,
                            self.n_iter_, batch_index),
                        idx=0)
                    LOGGER.info("Get host_loss_regular from Host")
                    loss += en_host_loss_regular

                federation.remote(
                    loss,
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote loss to arbiter")

                # is converge of loss in arbiter
                batch_index += 1

                # temporary resource recovery and will be removed in the future
                rubbish_list = [
                    host_forward, aggregate_forward_res, en_aggregate_wx,
                    en_aggregate_wx_square, fore_gradient, self.guest_forward
                ]
                rubbish_clear(rubbish_list)

            is_stopped = federation.get(
                name=self.transfer_variable.is_stopped.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.is_stopped, self.n_iter_,
                    batch_index),
                idx=0)
            LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped))

            self.n_iter_ += 1
            if is_stopped:
                LOGGER.info(
                    "Get stop signal from arbiter, model is converged, iter:{}"
                    .format(self.n_iter_))
                break

        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
Exemple #6
0
    def fit(self,
            data_instances,
            node2id,
            local_instances=None,
            common_nodes=None):
        """
        Train node embedding for role guest
        Parameters
        ----------
        data_instances: DTable of target node and label, input data
        node2id: a dict which can map node name to id
        """
        LOGGER.info("samples number:{}".format(data_instances.count()))
        LOGGER.info("Enter network embedding procedure:")
        self.n_node = len(node2id)
        LOGGER.info("Bank A has {} nodes".format(self.n_node))

        data_instances = data_instances.mapValues(HeteroNEGuest.load_data)
        LOGGER.info("Transform input data to train instance")

        public_key = federation.get(
            name=self.transfer_variable.paillier_pubkey.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey),
            idx=0)
        LOGGER.info("Get public_key from arbiter:{}".format(public_key))
        self.encrypt_operator.set_public_key(public_key)

        # hetero network embedding
        LOGGER.info("Generate mini-batch from input data")
        mini_batch_obj = MiniBatch(data_instances, batch_size=self.batch_size)
        batch_num = mini_batch_obj.batch_nums

        LOGGER.info("samples number:{}".format(data_instances.count()))
        if self.batch_size == -1:
            LOGGER.info(
                "batch size is -1, set it to the number of data in data_instances"
            )
            self.batch_size = data_instances.count()

        ##############
        # horizontal federated learning
        LOGGER.info("Generate mini-batch for local instances in guest")
        mini_batch_obj_local = MiniBatch(local_instances,
                                         batch_size=self.batch_size)
        local_batch_num = mini_batch_obj_local.batch_nums
        common_node_instances = eggroll.parallelize(
            ((node, node) for node in common_nodes),
            include_key=True,
            name='common_nodes')
        ##############

        batch_info = {'batch_size': self.batch_size, "batch_num": batch_num}
        LOGGER.info("batch_info:{}".format(batch_info))
        federation.remote(batch_info,
                          name=self.transfer_variable.batch_info.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.batch_info),
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("Remote batch_info to Host")

        federation.remote(batch_info,
                          name=self.transfer_variable.batch_info.name,
                          tag=self.transfer_variable.generate_transferid(
                              self.transfer_variable.batch_info),
                          role=consts.ARBITER,
                          idx=0)
        LOGGER.info("Remote batch_info to Arbiter")

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.encrypt_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(batch_num)
        ]

        LOGGER.info("Start initialize model.")
        self.embedding_ = self.initializer.init_model((self.n_node, self.dim),
                                                      self.init_param_obj)
        LOGGER.info("Embedding shape={}".format(self.embedding_.shape))

        is_send_all_batch_index = False
        self.n_iter_ = 0
        index_data_inst_map = {}

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))

            #################
            local_batch_data_generator = mini_batch_obj_local.mini_batch_data_generator(
            )
            total_loss = 0
            local_batch_num = 0
            LOGGER.info("Enter the horizontally federated learning procedure:")
            for local_batch_data in local_batch_data_generator:
                n = local_batch_data.count()
                #LOGGER.info("Local batch data count:{}".format(n))
                E_Y = self.compute_local_embedding(local_batch_data,
                                                   self.embedding_, node2id)
                local_grads_e1, local_grads_e2, local_loss = self.local_gradient_operator.compute(
                    E_Y, 'E_1')
                local_grads_e1 = local_grads_e1.mapValues(
                    lambda g: self.local_optimizer.apply_gradients(g / n))
                local_grads_e2 = local_grads_e2.mapValues(
                    lambda g: self.local_optimizer.apply_gradients(g / n))
                e1id_join_grads = local_batch_data.join(
                    local_grads_e1, lambda v, g: (node2id[v[0]], g))
                e2id_join_grads = local_batch_data.join(
                    local_grads_e2, lambda v, g: (node2id[v[1]], g))
                self.update_model(e1id_join_grads)
                self.update_model(e2id_join_grads)

                local_loss = local_loss / n
                local_batch_num += 1
                total_loss += local_loss
                #LOGGER.info("gradient count:{}".format(e1id_join_grads.count()))

            guest_common_embedding = common_node_instances.mapValues(
                lambda node: self.embedding_[node2id[node]])
            federation.remote(
                guest_common_embedding,
                name=self.transfer_variable.guest_common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.guest_common_embedding,
                    self.n_iter_, 0),
                role=consts.ARBITER,
                idx=0)
            LOGGER.info("Remote the embedding of common node to arbiter!")

            common_embedding = federation.get(
                name=self.transfer_variable.common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.common_embedding, self.n_iter_, 0),
                idx=0)
            LOGGER.info(
                "Get the aggregated embedding of common node from arbiter!")

            self.update_common_nodes(common_embedding, common_nodes, node2id)

            total_loss /= local_batch_num
            LOGGER.info(
                "Iter {}, horizontally feaderated learning loss: {}".format(
                    self.n_iter_, total_loss))

            #################

            # verticallly feaderated learning
            # each iter will get the same batch_data_generator
            LOGGER.info("Enter the vertically federated learning:")
            batch_data_generator = mini_batch_obj.mini_batch_data_generator(
                result='index')

            batch_index = 0
            for batch_data_index in batch_data_generator:
                LOGGER.info("batch:{}".format(batch_index))

                # only need to send one times
                if not is_send_all_batch_index:
                    LOGGER.info("remote mini-batch index to Host")
                    federation.remote(
                        batch_data_index,
                        name=self.transfer_variable.batch_data_index.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.batch_data_index,
                            self.n_iter_, batch_index),
                        role=consts.HOST,
                        idx=0)
                    if batch_index >= mini_batch_obj.batch_nums - 1:
                        is_send_all_batch_index = True

                # in order to avoid joining in next iteration
                # Get mini-batch train data
                if len(index_data_inst_map) < batch_num:
                    batch_data_inst = data_instances.join(
                        batch_data_index, lambda data_inst, index: data_inst)
                    index_data_inst_map[batch_index] = batch_data_inst
                else:
                    batch_data_inst = index_data_inst_map[batch_index]

                # For inductive learning: transform node attributes to node embedding
                # self.transform(batch_data_inst)
                self.guest_forward = self.compute_forward(
                    batch_data_inst, self.embedding_, node2id, batch_index)

                host_forward = federation.get(
                    name=self.transfer_variable.host_forward_dict.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_forward_dict, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get host_forward from host")
                aggregate_forward_res = self.aggregate_forward(host_forward)
                en_aggregate_ee = aggregate_forward_res.mapValues(
                    lambda v: v[0])
                en_aggregate_ee_square = aggregate_forward_res.mapValues(
                    lambda v: v[1])

                # compute [[d]]
                if self.gradient_operator is None:
                    self.gradient_operator = HeteroNetworkEmbeddingGradient(
                        self.encrypt_operator)
                fore_gradient = self.gradient_operator.compute_fore_gradient(
                    batch_data_inst, en_aggregate_ee)

                host_gradient = self.gradient_operator.compute_gradient(
                    self.guest_forward.mapValues(
                        lambda v: Instance(features=v[1])), fore_gradient)
                federation.remote(
                    host_gradient,
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote host_gradient to arbiter")

                composed_data_inst = host_forward.join(
                    batch_data_inst,
                    lambda hf, d: Instance(features=hf[1], label=d.label))
                guest_gradient, loss = self.gradient_operator.compute_gradient_and_loss(
                    composed_data_inst, fore_gradient, en_aggregate_ee,
                    en_aggregate_ee_square)
                federation.remote(
                    guest_gradient,
                    name=self.transfer_variable.guest_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_gradient, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote guest_gradient to arbiter")

                optim_guest_gradient = federation.get(
                    name=self.transfer_variable.guest_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.guest_optim_gradient,
                        self.n_iter_, batch_index),
                    idx=0)
                LOGGER.info("Get optim_guest_gradient from arbiter")

                # update node embedding
                LOGGER.info("Update node embedding")
                nodeid_join_gradient = batch_data_inst.join(
                    optim_guest_gradient, lambda instance, gradient:
                    (node2id[instance.features], gradient))
                self.update_model(nodeid_join_gradient)

                # update local model that transform attribute to node embedding
                training_info = {
                    'iteration': self.n_iter_,
                    'batch_index': batch_index
                }
                self.update_local_model(fore_gradient, batch_data_inst,
                                        self.embedding_, **training_info)

                # loss need to be encrypted !!!!!!

                federation.remote(
                    loss,
                    name=self.transfer_variable.loss.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.loss, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote loss to arbiter")

                # is converge of loss in arbiter
                batch_index += 1

                # remove temporary resource
                rubbish_list = [
                    host_forward, aggregate_forward_res, en_aggregate_ee,
                    en_aggregate_ee_square, fore_gradient, self.guest_forward
                ]
                rubbish_clear(rubbish_list)

            ##########
            guest_common_embedding = common_node_instances.mapValues(
                lambda node: self.embedding_[node2id[node]])
            federation.remote(
                guest_common_embedding,
                name=self.transfer_variable.guest_common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.guest_common_embedding,
                    self.n_iter_, 1),
                role=consts.ARBITER,
                idx=0)

            common_embedding = federation.get(
                name=self.transfer_variable.common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.common_embedding, self.n_iter_, 1),
                idx=0)

            self.update_common_nodes(common_embedding, common_nodes, node2id)
            ##########

            is_stopped = federation.get(
                name=self.transfer_variable.is_stopped.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.is_stopped, self.n_iter_),
                idx=0)

            LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped))

            self.n_iter_ += 1
            if is_stopped:
                LOGGER.info(
                    "Get stop signal from arbiter, model is converged, iter:{}"
                    .format(self.n_iter_))
                break

        embedding_table = eggroll.table(name='guest',
                                        namespace='node_embedding',
                                        partition=10)
        id2node = dict(zip(node2id.values(), node2id.keys()))
        for id, embedding in enumerate(self.embedding_):
            embedding_table.put(id2node[id], embedding)
        embedding_table.save_as(name='guest',
                                namespace='node_embedding',
                                partition=10)
        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
Exemple #7
0
    def fit(self, data_instances, validate_data=None):
        """
        Train linear regression model of role host
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_linR host")
        self._abnormal_detection(data_instances)
        self.header = self.get_header(data_instances)
        self.callback_list.on_train_begin(data_instances, validate_data)

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        if self.transfer_variable.use_async.get(idx=0):
            LOGGER.debug(f"set_use_async")
            self.gradient_loss_operator.set_use_async()

        self.batch_generator.initialize_batch_generator(data_instances)
        self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums)

        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False

        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:" + str(self.n_iter_))
            self.optimizer.set_iters(self.n_iter_)
            batch_data_generator = self.batch_generator.generate_batch_data()
            batch_index = 0
            for batch_data in batch_data_generator:
                optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data,
                    self.encrypted_calculator,
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index)

                self.gradient_loss_operator.compute_loss(self.model_weights, self.optimizer, self.n_iter_, batch_index,
                                                         self.cipher_operator)

                self.model_weights = self.optimizer.update_model(self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1
            if self.stop_training:
                break

            LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged))
            if self.is_converged:
                break
        self.callback_list.on_train_end()

        self.set_summary(self.get_model_summary())
Exemple #8
0
    def fit(self,
            data_instances,
            node2id,
            local_instances=None,
            common_nodes=None):
        """
        Train ne model pf role host
        Parameters
        ----------
        data_instances: Dtable of anchor node, input data
        """
        LOGGER.info("Enter hetero_ne host")
        self.n_node = len(node2id)
        LOGGER.info("Host party has {} nodes".format(self.n_node))

        data_instances = data_instances.mapValues(HeteroNEHost.load_data)
        LOGGER.info("Transform input data to train instance")

        public_key = federation.get(
            name=self.transfer_variable.paillier_pubkey.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey),
            idx=0)
        LOGGER.info("Get Publick key from arbiter:{}".format(public_key))
        self.encrypt_operator.set_public_key(public_key)

        ##############
        # horizontal federated learning
        LOGGER.info("Generate mini-batch for local instances in guest")
        mini_batch_obj_local = MiniBatch(local_instances,
                                         batch_size=self.batch_size)
        common_node_instances = eggroll.parallelize(
            ((node, node) for node in common_nodes),
            include_key=True,
            name='common_nodes')
        ##############

        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest: {}".format(batch_info))

        self.batch_size = batch_info['batch_size']
        self.batch_num = batch_info['batch_num']
        if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1:
            raise ValueError(
                "Batch size get from guest should not less than 10, except -1, batch_size is {}"
                .format(self.batch_size))

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.encrypt_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_num)
        ]

        LOGGER.info("Start initilize model.")
        self.embedding_ = self.initializer.init_model((self.n_node, self.dim),
                                                      self.init_param_obj)

        self.n_iter_ = 0
        index_data_inst_map = {}

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter: {}".format(self.n_iter_))

            #################
            local_batch_data_generator = mini_batch_obj_local.mini_batch_data_generator(
            )
            total_loss = 0
            local_batch_num = 0
            LOGGER.info("Horizontally learning")
            for local_batch_data in local_batch_data_generator:
                n = local_batch_data.count()
                LOGGER.info("Local batch data count:{}".format(n))
                E_Y = self.compute_local_embedding(local_batch_data,
                                                   self.embedding_, node2id)
                local_grads_e1, local_grads_e2, local_loss = self.local_gradient_operator.compute(
                    E_Y, 'E_1')
                local_grads_e1 = local_grads_e1.mapValues(
                    lambda g: self.local_optimizer.apply_gradients(g / n))
                local_grads_e2 = local_grads_e2.mapValues(
                    lambda g: self.local_optimizer.apply_gradients(g / n))
                e1id_join_grads = local_batch_data.join(
                    local_grads_e1, lambda v, g: (node2id[v[0]], g))
                e2id_join_grads = local_batch_data.join(
                    local_grads_e2, lambda v, g: (node2id[v[1]], g))
                self.update_model(e1id_join_grads)
                self.update_model(e2id_join_grads)

                local_loss = local_loss / n
                local_batch_num += 1
                total_loss += local_loss
                LOGGER.info("gradient count:{}".format(
                    e1id_join_grads.count()))

            host_common_embedding = common_node_instances.mapValues(
                lambda node: self.embedding_[node2id[node]])
            federation.remote(
                host_common_embedding,
                name=self.transfer_variable.host_common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.host_common_embedding, self.n_iter_,
                    0),
                role=consts.ARBITER,
                idx=0)

            common_embedding = federation.get(
                name=self.transfer_variable.common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.common_embedding, self.n_iter_, 0),
                idx=0)

            self.update_common_nodes(common_embedding, common_nodes, node2id)

            total_loss /= local_batch_num
            LOGGER.info("Iter {}, Local loss: {}".format(
                self.n_iter_, total_loss))

            batch_index = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))

                # set batch_data
                # in order to avoid communicating in next iteration
                # in next iteration, the sequence of batches is the same
                if len(self.batch_index_list) < self.batch_num:
                    batch_data_index = federation.get(
                        name=self.transfer_variable.batch_data_index.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.batch_data_index,
                            self.n_iter_, batch_index),
                        idx=0)
                    LOGGER.info("Get batch_index from Guest")
                    self.batch_index_list.append(batch_index)
                else:
                    batch_data_index = self.batch_index_list[batch_index]

                # Get mini-batch train_data
                # in order to avoid joining for next iteration
                if len(index_data_inst_map) < self.batch_num:
                    batch_data_inst = batch_data_index.join(
                        data_instances, lambda g, d: d)
                    index_data_inst_map[batch_index] = batch_data_inst
                else:
                    batch_data_inst = index_data_inst_map[batch_data_index]

                LOGGER.info("batch_data_inst size:{}".format(
                    batch_data_inst.count()))

                #self.transform(data_inst)

                # compute forward
                host_forward = self.compute_forward(batch_data_inst,
                                                    self.embedding_, node2id,
                                                    batch_index)
                federation.remote(
                    host_forward,
                    name=self.transfer_variable.host_forward_dict.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_forward_dict, self.n_iter_,
                        batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote host_forward to guest")

                # Get optimize host gradient and update model
                optim_host_gradient = federation.get(
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    idx=0)
                LOGGER.info("Get optim_host_gradient from arbiter")

                nodeid_join_gradient = batch_data_inst.join(
                    optim_host_gradient, lambda instance, gradient:
                    (node2id[instance.features], gradient))
                LOGGER.info("update_model")
                self.update_model(nodeid_join_gradient)

                # update local model
                #training_info = {"iteration": self.n_iter_, "batch_index": batch_index}
                #self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info)

                batch_index += 1

                rubbish_list = [host_forward]
                rubbish_clear(rubbish_list)

            #######
            host_common_embedding = common_node_instances.mapValues(
                lambda node: self.embedding_[node2id[node]])
            federation.remote(
                host_common_embedding,
                name=self.transfer_variable.host_common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.host_common_embedding, self.n_iter_,
                    1),
                role=consts.ARBITER,
                idx=0)

            common_embedding = federation.get(
                name=self.transfer_variable.common_embedding.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.common_embedding, self.n_iter_, 1),
                idx=0)

            self.update_common_nodes(common_embedding, common_nodes, node2id)
            #######

            is_stopped = federation.get(
                name=self.transfer_variable.is_stopped.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.is_stopped,
                    self.n_iter_,
                ),
                idx=0)
            LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped))

            self.n_iter_ += 1
            if is_stopped:
                break

        LOGGER.info("Reach max iter {}, train mode finish!".format(
            self.max_iter))
        embedding_table = eggroll.table(name='host',
                                        namespace='node_embedding',
                                        partition=10)
        id2node = dict(zip(node2id.values(), node2id.keys()))
        for id, embedding in enumerate(self.embedding_):
            embedding_table.put(id2node[id], embedding)
        embedding_table.save_as(name='host',
                                namespace='node_embedding',
                                partition=10)
        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
    def fit(self, data_instances, validate_data=None):
        """
        Train linR model of role guest
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_linR_guest fit")
        self._abnormal_detection(data_instances)
        self.header = self.get_header(data_instances)
        self.callback_list.on_train_begin(data_instances, validate_data)
        # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data)

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        use_async = False
        if with_weight(data_instances):
            if self.model_param.early_stop == "diff":
                LOGGER.warning("input data with weight, please use 'weight_diff' for 'early_stop'.")
            data_instances = scale_sample_weight(data_instances)
            self.gradient_loss_operator.set_use_sample_weight()
            LOGGER.debug(f"instance weight scaled; use weighted gradient loss operator")
            # LOGGER.debug(f"data_instances after scale: {[v[1].weight for v in list(data_instances.collect())]}")
        elif len(self.component_properties.host_party_idlist) == 1:
            LOGGER.debug(f"set_use_async")
            self.gradient_loss_operator.set_use_async()
            use_async = True
        self.transfer_variable.use_async.remote(use_async)

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(data_instances, self.batch_size)
        self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums)

        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(w, fit_intercept=self.fit_intercept, raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # Start gradient procedure
                optim_guest_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data,
                    self.encrypted_calculator,
                    self.model_weights,
                    self.optimizer,
                    self.n_iter_,
                    batch_index
                )

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(batch_data, self.n_iter_, batch_index, loss_norm)

                self.model_weights = self.optimizer.update_model(self.model_weights, optim_guest_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))
            LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1
            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()

        self.set_summary(self.get_model_summary())
Exemple #10
0
    def fit(self, data_instances, node2id):
        """
        Train ne model pf role host
        Parameters
        ----------
        data_instances: Dtable of anchor node, input data
        """
        LOGGER.info("Enter hetero_ne host")
        self.n_node = len(node2id)
        LOGGER.info("Host party has {} nodes".format(self.n_node))

        data_instances = data_instances.mapValues(HeteroNEHost.load_data)
        LOGGER.info("Transform input data to train instance")

        public_key = federation.get(
            name=self.transfer_variable.paillier_pubkey.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey),
            idx=0)
        LOGGER.info("Get Publick key from arbiter:{}".format(public_key))
        self.encrypt_operator.set_public_key(public_key)

        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest: {}".format(batch_info))

        self.batch_size = batch_info['batch_size']
        self.batch_num = batch_info['batch_num']
        if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1:
            raise ValueError(
                "Batch size get from guest should not less than 10, except -1, batch_size is {}"
                .format(self.batch_size))

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.encrypt_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_num)
        ]

        LOGGER.info("Start initilize model.")
        self.embedding_ = self.initializer.init_model((self.n_node, self.dim),
                                                      self.init_param_obj)

        self.n_iter_ = 0
        index_data_inst_map = {}

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter: {}".format(self.n_iter_))
            batch_index = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))

                # set batch_data
                # in order to avoid communicating in next iteration
                # in next iteration, the sequence of batches is the same
                if len(self.batch_index_list) < self.batch_num:
                    batch_data_index = federation.get(
                        name=self.transfer_variable.batch_data_index.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.batch_data_index,
                            self.n_iter_, batch_index),
                        idx=0)
                    LOGGER.info("Get batch_index from Guest")
                    self.batch_index_list.append(batch_index)
                else:
                    batch_data_index = self.batch_index_list[batch_index]

                # Get mini-batch train_data
                # in order to avoid joining for next iteration
                if len(index_data_inst_map) < self.batch_num:
                    batch_data_inst = batch_data_index.join(
                        data_instances, lambda g, d: d)
                    index_data_inst_map[batch_index] = batch_data_inst
                else:
                    batch_data_inst = index_data_inst_map[batch_data_index]

                LOGGER.info("batch_data_inst size:{}".format(
                    batch_data_inst.count()))

                #self.transform(data_inst)

                # compute forward
                host_forward = self.compute_forward(batch_data_inst,
                                                    self.embedding_, node2id,
                                                    batch_index)
                federation.remote(
                    host_forward,
                    name=self.transfer_variable.host_forward_dict.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_forward_dict, self.n_iter_,
                        batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote host_forward to guest")

                # Get optimize host gradient and update model
                optim_host_gradient = federation.get(
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    idx=0)
                LOGGER.info("Get optim_host_gradient from arbiter")

                nodeid_join_gradient = batch_data_inst.join(
                    optim_host_gradient, lambda instance, gradient:
                    (node2id[instance.features], gradient))
                LOGGER.info("update_model")
                self.update_model(nodeid_join_gradient)

                # update local model
                #training_info = {"iteration": self.n_iter_, "batch_index": batch_index}
                #self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info)

                batch_index += 1

                rubbish_list = [host_forward]
                rubbish_clear(rubbish_list)

            is_stopped = federation.get(
                name=self.transfer_variable.is_stopped.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.is_stopped,
                    self.n_iter_,
                ))
            LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped))

            self.n_iter_ += 1
            if is_stopped:
                LOGGER.info("Reach max iter {}, train mode finish!".format(
                    self.max_iter))

        embedding_table = eggroll.table(name='host',
                                        namespace='node_embedding',
                                        partition=10)
        id2node = dict(zip(node2id.values(), node2id.keys()))
        for id, embedding in enumerate(self.embedding_):
            embedding_table.put(id2node[id], embedding)
        embedding_table.save_as(name='host',
                                namespace='node_embedding',
                                partition=10)
        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
Exemple #11
0
    def fit_binary(self, data_instances, validate_data):
        self._abnormal_detection(data_instances)

        validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)
        LOGGER.debug(
            f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}"
        )

        self.header = self.get_header(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        self.batch_generator.initialize_batch_generator(data_instances)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)

        # intercept is initialized within FactorizationMachineWeights.
        # Skip initializer's intercept part.
        fit_intercept = False
        if self.init_param_obj.fit_intercept:
            fit_intercept = True
            self.init_param_obj.fit_intercept = False

        w_ = self.initializer.init_model(model_shape,
                                         init_params=self.init_param_obj)
        embed_ = np.random.normal(
            scale=1 / np.sqrt(self.init_param_obj.embed_size),
            size=(model_shape, self.init_param_obj.embed_size))

        self.model_weights = \
            FactorizationMachineWeights(w_, embed_, fit_intercept=fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:" + str(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            batch_index = 0
            self.optimizer.set_iters(self.n_iter_)
            for batch_data in batch_data_generator:
                LOGGER.debug(
                    f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_data.count()}"
                )

                optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data, self.model_weights, self.encrypted_calculator,
                    self.optimizer, self.n_iter_, batch_index)
                LOGGER.debug(
                    'optim_host_gradient: {}'.format(optim_host_gradient))

                self.gradient_loss_operator.compute_loss(
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                # clip gradient
                if self.model_param.clip_gradient and self.model_param.clip_gradient > 0:
                    optim_host_gradient = np.maximum(
                        optim_host_gradient, -self.model_param.clip_gradient)
                    optim_host_gradient = np.minimum(
                        optim_host_gradient, self.model_param.clip_gradient)

                _model_weights = self.optimizer.update_model(
                    self.model_weights, optim_host_gradient)
                self.model_weights.update(_model_weights)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(
                self.is_converged))

            validation_strategy.validate(self, self.n_iter_)

            self.n_iter_ += 1
            LOGGER.info("iter: {}, is_converged: {}".format(
                self.n_iter_, self.is_converged))
            if self.is_converged:
                break

        LOGGER.debug("Final fm weights: {}".format(self.model_weights.unboxed))
Exemple #12
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson model of role guest
        Parameters
        ----------
        data_instances: Table of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson_guest fit")
        self._abnormal_detection(data_instances)
        self.header = copy.deepcopy(self.get_header(data_instances))
        self.callback_list.on_train_begin(data_instances, validate_data)

        # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data)
        if with_weight(data_instances):
            LOGGER.warning(
                "input data with weight. Poisson regression does not support weighted training."
            )

        self.exposure_index = self.get_exposure_index(self.header,
                                                      self.exposure_colname)
        exposure_index = self.exposure_index
        if exposure_index > -1:
            self.header.pop(exposure_index)
            LOGGER.info("Guest provides exposure value.")
        exposure = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_exposure(v, exposure_index))
        data_instances = data_instances.mapValues(
            lambda v: HeteroPoissonBase.load_instance(v, exposure_index))

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape,
                                            init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(
                w,
                fit_intercept=self.fit_intercept,
                raise_overflow_error=False)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # compute offset of this batch
                batch_offset = exposure.join(
                    batch_data, lambda ei, d: HeteroPoissonBase.safe_log(ei))

                # Start gradient procedure
                optimized_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_data, self.encrypted_calculator, self.model_weights,
                    self.optimizer, self.n_iter_, batch_index, batch_offset)
                # LOGGER.debug("iteration:{} Guest's gradient: {}".format(self.n_iter_, optimized_gradient))
                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    batch_data, self.model_weights, self.n_iter_, batch_index,
                    batch_offset, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optimized_gradient)

                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1

            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()
        self.set_summary(self.get_model_summary())
Exemple #13
0
    def fit_binary(self, data_instances, validate_data=None):
        LOGGER.info("Starting to hetero_sshe_logistic_regression")
        self.callback_list.on_train_begin(data_instances, validate_data)

        model_shape = self.get_features_shape(data_instances)
        instances_count = data_instances.count()

        if not self.component_properties.is_warm_start:
            w = self._init_weights(model_shape)
            self.model_weights = LinearModelWeights(
                l=w, fit_intercept=self.model_param.init_param.fit_intercept)
            last_models = copy.deepcopy(self.model_weights)
        else:
            last_models = copy.deepcopy(self.model_weights)
            w = last_models.unboxed
            self.callback_warm_start_init_iter(self.n_iter_)

        self.batch_generator.initialize_batch_generator(
            data_instances, batch_size=self.batch_size)

        with SPDZ(
                "sshe_lr",
                local_party=self.local_party,
                all_parties=self.parties,
                q_field=self.q_field,
                use_mix_rand=self.model_param.use_mix_rand,
        ) as spdz:
            spdz.set_flowid(self.flowid)
            self.secure_matrix_obj.set_flowid(self.flowid)
            if self.role == consts.GUEST:
                self.labels = data_instances.mapValues(
                    lambda x: np.array([x.label], dtype=int))

            w_self, w_remote = self.share_model(w, suffix="init")
            last_w_self, last_w_remote = w_self, w_remote
            LOGGER.debug(
                f"first_w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}"
            )

            batch_data_generator = self.batch_generator.generate_batch_data()

            self.cipher_tool = []
            encoded_batch_data = []
            for batch_data in batch_data_generator:
                if self.fit_intercept:
                    batch_features = batch_data.mapValues(lambda x: np.hstack(
                        (x.features, 1.0)))
                else:
                    batch_features = batch_data.mapValues(lambda x: x.features)
                self.batch_num.append(batch_data.count())

                encoded_batch_data.append(
                    fixedpoint_table.FixedPointTensor(
                        self.fixedpoint_encoder.encode(batch_features),
                        q_field=self.fixedpoint_encoder.n,
                        endec=self.fixedpoint_encoder))

                self.cipher_tool.append(
                    EncryptModeCalculator(
                        self.cipher, self.encrypted_mode_calculator_param.mode,
                        self.encrypted_mode_calculator_param.re_encrypted_rate)
                )

            while self.n_iter_ < self.max_iter:
                self.callback_list.on_epoch_begin(self.n_iter_)
                LOGGER.info(f"start to n_iter: {self.n_iter_}")

                loss_list = []

                self.optimizer.set_iters(self.n_iter_)
                if not self.reveal_every_iter:
                    self.self_optimizer.set_iters(self.n_iter_)
                    self.remote_optimizer.set_iters(self.n_iter_)

                for batch_idx, batch_data in enumerate(encoded_batch_data):
                    current_suffix = (str(self.n_iter_), str(batch_idx))

                    if self.reveal_every_iter:
                        y = self.forward(weights=self.model_weights,
                                         features=batch_data,
                                         suffix=current_suffix,
                                         cipher=self.cipher_tool[batch_idx])
                    else:
                        y = self.forward(weights=(w_self, w_remote),
                                         features=batch_data,
                                         suffix=current_suffix,
                                         cipher=self.cipher_tool[batch_idx])

                    if self.role == consts.GUEST:
                        error = y - self.labels

                        self_g, remote_g = self.backward(
                            error=error,
                            features=batch_data,
                            suffix=current_suffix,
                            cipher=self.cipher_tool[batch_idx])
                    else:
                        self_g, remote_g = self.backward(
                            error=y,
                            features=batch_data,
                            suffix=current_suffix,
                            cipher=self.cipher_tool[batch_idx])

                    # loss computing;
                    suffix = ("loss", ) + current_suffix
                    if self.reveal_every_iter:
                        batch_loss = self.compute_loss(
                            weights=self.model_weights,
                            suffix=suffix,
                            cipher=self.cipher_tool[batch_idx])
                    else:
                        batch_loss = self.compute_loss(
                            weights=(w_self, w_remote),
                            suffix=suffix,
                            cipher=self.cipher_tool[batch_idx])

                    if batch_loss is not None:
                        batch_loss = batch_loss * self.batch_num[batch_idx]
                    loss_list.append(batch_loss)

                    if self.reveal_every_iter:
                        # LOGGER.debug(f"before reveal: self_g shape: {self_g.shape}, remote_g_shape: {remote_g},"
                        #              f"self_g: {self_g}")

                        new_g = self.reveal_models(self_g,
                                                   remote_g,
                                                   suffix=current_suffix)

                        # LOGGER.debug(f"after reveal: new_g shape: {new_g.shape}, new_g: {new_g}"
                        #              f"self.model_param.reveal_strategy: {self.model_param.reveal_strategy}")

                        if new_g is not None:
                            self.model_weights = self.optimizer.update_model(
                                self.model_weights, new_g, has_applied=False)

                        else:
                            self.model_weights = LinearModelWeights(
                                l=np.zeros(self_g.shape),
                                fit_intercept=self.model_param.init_param.
                                fit_intercept)
                    else:
                        if self.optimizer.penalty == consts.L2_PENALTY:
                            self_g = self_g + self.self_optimizer.alpha * w_self
                            remote_g = remote_g + self.remote_optimizer.alpha * w_remote

                        # LOGGER.debug(f"before optimizer: {self_g}, {remote_g}")

                        self_g = self.self_optimizer.apply_gradients(self_g)
                        remote_g = self.remote_optimizer.apply_gradients(
                            remote_g)

                        # LOGGER.debug(f"after optimizer: {self_g}, {remote_g}")
                        w_self -= self_g
                        w_remote -= remote_g

                    LOGGER.debug(
                        f"w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}"
                    )

                if self.role == consts.GUEST:
                    loss = np.sum(loss_list) / instances_count
                    self.loss_history.append(loss)
                    if self.need_call_back_loss:
                        self.callback_loss(self.n_iter_, loss)
                else:
                    loss = None

                if self.converge_func_name in ["diff", "abs"]:
                    self.is_converged = self.check_converge_by_loss(
                        loss, suffix=(str(self.n_iter_), ))
                elif self.converge_func_name == "weight_diff":
                    if self.reveal_every_iter:
                        self.is_converged = self.check_converge_by_weights(
                            last_w=last_models.unboxed,
                            new_w=self.model_weights.unboxed,
                            suffix=(str(self.n_iter_), ))
                        last_models = copy.deepcopy(self.model_weights)
                    else:
                        self.is_converged = self.check_converge_by_weights(
                            last_w=(last_w_self, last_w_remote),
                            new_w=(w_self, w_remote),
                            suffix=(str(self.n_iter_), ))
                        last_w_self, last_w_remote = copy.deepcopy(
                            w_self), copy.deepcopy(w_remote)
                else:
                    raise ValueError(
                        f"Cannot recognize early_stop function: {self.converge_func_name}"
                    )

                LOGGER.info("iter: {},  is_converged: {}".format(
                    self.n_iter_, self.is_converged))
                self.callback_list.on_epoch_end(self.n_iter_)
                self.n_iter_ += 1

                if self.stop_training:
                    break

                if self.is_converged:
                    break

            # Finally reconstruct
            if not self.reveal_every_iter:
                new_w = self.reveal_models(w_self,
                                           w_remote,
                                           suffix=("final", ))
                if new_w is not None:
                    self.model_weights = LinearModelWeights(
                        l=new_w,
                        fit_intercept=self.model_param.init_param.fit_intercept
                    )

        LOGGER.debug(f"loss_history: {self.loss_history}")
        self.set_summary(self.get_model_summary())
    def fit(self, data_instances, validate_data=None):
        """
        Train linR model of role guest
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_linR_guest fit")
        self._abnormal_detection(data_instances)
        self.header = self.get_header(data_instances)

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)

        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        LOGGER.info("Generate mini-batch from input data")
        self.batch_generator.initialize_batch_generator(
            data_instances, self.batch_size)
        self.gradient_loss_operator.set_total_batch_nums(
            self.batch_generator.batch_nums)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        LOGGER.info("fit_intercept:{}".format(
            self.init_param_obj.fit_intercept))
        model_shape = self.get_features_shape(data_instances)
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:{}".format(self.n_iter_))
            # each iter will get the same batch_data_generator
            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)
            batch_index = 0
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data)

                # Start gradient procedure
                optim_guest_gradient, _, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                loss_norm = self.optimizer.loss_norm(self.model_weights)
                self.gradient_loss_operator.compute_loss(
                    data_instances, self.n_iter_, batch_index, loss_norm)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_guest_gradient)
                batch_index += 1
                # LOGGER.debug(
                #     "model_weights, iters: {}, update_model: {}".format(self.n_iter_, self.model_weights.unboxed))

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))
            LOGGER.info("iter: {},  is_converged: {}".format(
                self.n_iter_, self.is_converged))

            # LOGGER.debug("model weights is {}".format(self.model_weights.coef_))

            if self.validation_strategy:
                LOGGER.debug('LinR guest running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            self.n_iter_ += 1
            if self.is_converged:
                break
        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
Exemple #15
0
    def fit(self, data_instances):
        """
        Train lr model of role host
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_lr host")
        self._abnormal_detection(data_instances)

        self.header = self.get_header(data_instances)
        public_key = federation.get(
            name=self.transfer_variable.paillier_pubkey.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey),
            idx=0)

        LOGGER.info("Get public_key from arbiter:{}".format(public_key))
        self.encrypt_operator.set_public_key(public_key)

        batch_info = federation.get(
            name=self.transfer_variable.batch_info.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.batch_info),
            idx=0)
        LOGGER.info("Get batch_info from guest:" + str(batch_info))
        self.batch_size = batch_info["batch_size"]
        self.batch_num = batch_info["batch_num"]
        if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1:
            raise ValueError(
                "Batch size get from guest should not less than 10, except -1, batch_size is {}"
                .format(self.batch_size))

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.encrypt_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_num)
        ]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)

        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False

        if self.fit_intercept:
            self.fit_intercept = False

        self.coef_ = self.initializer.init_model(
            model_shape, init_params=self.init_param_obj)

        self.n_iter_ = 0
        index_data_inst_map = {}

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:" + str(self.n_iter_))
            batch_index = 0
            while batch_index < self.batch_num:
                LOGGER.info("batch:{}".format(batch_index))
                # set batch_data
                if len(self.batch_index_list) < self.batch_num:
                    batch_data_index = federation.get(
                        name=self.transfer_variable.batch_data_index.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.batch_data_index,
                            self.n_iter_, batch_index),
                        idx=0)
                    LOGGER.info("Get batch_index from Guest")
                    self.batch_index_list.append(batch_data_index)
                else:
                    batch_data_index = self.batch_index_list[batch_index]

                # Get mini-batch train data
                if len(index_data_inst_map) < self.batch_num:
                    batch_data_inst = batch_data_index.join(
                        data_instances, lambda g, d: d)
                    index_data_inst_map[batch_index] = batch_data_inst
                else:
                    batch_data_inst = index_data_inst_map[batch_index]

                LOGGER.info("batch_data_inst size:{}".format(
                    batch_data_inst.count()))
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = self.transform(batch_data_inst)

                # compute forward
                host_forward = self.compute_forward(batch_feat_inst,
                                                    self.coef_,
                                                    self.intercept_,
                                                    batch_index)
                federation.remote(
                    host_forward,
                    name=self.transfer_variable.host_forward_dict.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_forward_dict, self.n_iter_,
                        batch_index),
                    role=consts.GUEST,
                    idx=0)
                LOGGER.info("Remote host_forward to guest")

                # compute host gradient
                fore_gradient = federation.get(
                    name=self.transfer_variable.fore_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.fore_gradient, self.n_iter_,
                        batch_index),
                    idx=0)
                LOGGER.info("Get fore_gradient from guest")
                if self.gradient_operator is None:
                    self.gradient_operator = HeteroLogisticGradient(
                        self.encrypt_operator)
                host_gradient = self.gradient_operator.compute_gradient(
                    batch_feat_inst, fore_gradient, fit_intercept=False)
                # regulation if necessary
                if self.updater is not None:
                    loss_regular = self.updater.loss_norm(self.coef_)
                    en_loss_regular = self.encrypt_operator.encrypt(
                        loss_regular)
                    federation.remote(
                        en_loss_regular,
                        name=self.transfer_variable.host_loss_regular.name,
                        tag=self.transfer_variable.generate_transferid(
                            self.transfer_variable.host_loss_regular,
                            self.n_iter_, batch_index),
                        role=consts.GUEST,
                        idx=0)
                    LOGGER.info("Remote host_loss_regular to guest")

                federation.remote(
                    host_gradient,
                    name=self.transfer_variable.host_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_gradient, self.n_iter_,
                        batch_index),
                    role=consts.ARBITER,
                    idx=0)
                LOGGER.info("Remote host_gradient to arbiter")

                # Get optimize host gradient and update model
                optim_host_gradient = federation.get(
                    name=self.transfer_variable.host_optim_gradient.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_optim_gradient,
                        self.n_iter_, batch_index),
                    idx=0)
                LOGGER.info("Get optim_host_gradient from arbiter")

                LOGGER.info("update_model")
                self.update_model(optim_host_gradient)

                # update local model that transforms features of raw input 'batch_data_inst'
                training_info = {
                    "iteration": self.n_iter_,
                    "batch_index": batch_index
                }
                self.update_local_model(fore_gradient, batch_data_inst,
                                        self.coef_, **training_info)

                batch_index += 1

                # temporary resource recovery and will be removed in the future
                rubbish_list = [host_forward, fore_gradient]
                data_overview.rubbish_clear(rubbish_list)

            is_stopped = federation.get(
                name=self.transfer_variable.is_stopped.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.is_stopped, self.n_iter_,
                    batch_index),
                idx=0)
            LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped))

            self.n_iter_ += 1
            if is_stopped:
                LOGGER.info(
                    "Get stop signal from arbiter, model is converged, iter:{}"
                    .format(self.n_iter_))
                break

        LOGGER.info("Reach max iter {}, train model finish!".format(
            self.max_iter))
Exemple #16
0
    def fit(self, data_instances, validate_data=None):
        """
        Train poisson regression model of role host
        Parameters
        ----------
        data_instances: DTable of Instance, input data
        """

        LOGGER.info("Enter hetero_poisson host")
        self._abnormal_detection(data_instances)

        self.validation_strategy = self.init_validation_strategy(
            data_instances, validate_data)

        self.header = self.get_header(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        self.batch_generator.initialize_batch_generator(data_instances)

        self.encrypted_calculator = [
            EncryptModeCalculator(
                self.cipher_operator,
                self.encrypted_mode_calculator_param.mode,
                self.encrypted_mode_calculator_param.re_encrypted_rate)
            for _ in range(self.batch_generator.batch_nums)
        ]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False
        w = self.initializer.init_model(model_shape,
                                        init_params=self.init_param_obj)
        self.model_weights = LinearModelWeights(
            w, fit_intercept=self.fit_intercept)

        while self.n_iter_ < self.max_iter:
            LOGGER.info("iter:" + str(self.n_iter_))

            batch_data_generator = self.batch_generator.generate_batch_data()
            self.optimizer.set_iters(self.n_iter_)

            batch_index = 0
            for batch_data in batch_data_generator:
                batch_feat_inst = self.transform(batch_data)
                optim_host_gradient, _ = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator,
                    self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)

                self.gradient_loss_operator.compute_loss(
                    batch_feat_inst, self.model_weights,
                    self.encrypted_calculator, self.optimizer, self.n_iter_,
                    batch_index, self.cipher_operator)

                self.model_weights = self.optimizer.update_model(
                    self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(
                suffix=(self.n_iter_, ))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(
                self.is_converged))

            if self.validation_strategy:
                LOGGER.debug('Poisson host running validation')
                self.validation_strategy.validate(self, self.n_iter_)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            self.n_iter_ += 1
            LOGGER.info("iter: {}, is_converged: {}".format(
                self.n_iter_, self.is_converged))
            if self.is_converged:
                break

        if not self.is_converged:
            LOGGER.info("Reach max iter {}, train model finish!".format(
                self.max_iter))

        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)
        self.set_summary(self.get_model_summary())
Exemple #17
0
    def fit_binary(self, data_instances, validate_data):
        self._abnormal_detection(data_instances)
        self.check_abnormal_values(data_instances)
        self.check_abnormal_values(validate_data)
        # self.validation_strategy = self.init_validation_strategy(data_instances, validate_data)
        self.callback_list.on_train_begin(data_instances, validate_data)

        LOGGER.debug(f"MODEL_STEP Start fin_binary, data count: {data_instances.count()}")

        self.header = self.get_header(data_instances)
        self.cipher_operator = self.cipher.gen_paillier_cipher_operator()

        if self.transfer_variable.use_async.get(idx=0):
            LOGGER.debug(f"set_use_async")
            self.gradient_loss_operator.set_use_async()

        self.batch_generator.initialize_batch_generator(data_instances)
        self.gradient_loss_operator.set_total_batch_nums(self.batch_generator.batch_nums)

        self.encrypted_calculator = [EncryptModeCalculator(self.cipher_operator,
                                                           self.encrypted_mode_calculator_param.mode,
                                                           self.encrypted_mode_calculator_param.re_encrypted_rate) for _
                                     in range(self.batch_generator.batch_nums)]

        LOGGER.info("Start initialize model.")
        model_shape = self.get_features_shape(data_instances)
        if self.init_param_obj.fit_intercept:
            self.init_param_obj.fit_intercept = False

        if not self.component_properties.is_warm_start:
            w = self.initializer.init_model(model_shape, init_params=self.init_param_obj)
            self.model_weights = LinearModelWeights(w, fit_intercept=self.init_param_obj.fit_intercept)
        else:
            self.callback_warm_start_init_iter(self.n_iter_)

        while self.n_iter_ < self.max_iter:
            self.callback_list.on_epoch_begin(self.n_iter_)

            LOGGER.info("iter:" + str(self.n_iter_))
            batch_data_generator = self.batch_generator.generate_batch_data()
            batch_index = 0
            self.optimizer.set_iters(self.n_iter_)
            for batch_data in batch_data_generator:
                # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst'
                batch_feat_inst = batch_data
                # LOGGER.debug(f"MODEL_STEP In Batch {batch_index}, batch data count: {batch_feat_inst.count()}")

                optim_host_gradient = self.gradient_loss_operator.compute_gradient_procedure(
                    batch_feat_inst, self.encrypted_calculator, self.model_weights, self.optimizer, self.n_iter_,
                    batch_index)
                # LOGGER.debug('optim_host_gradient: {}'.format(optim_host_gradient))

                self.gradient_loss_operator.compute_loss(self.model_weights, self.optimizer,
                                                         self.n_iter_, batch_index, self.cipher_operator)

                self.model_weights = self.optimizer.update_model(self.model_weights, optim_host_gradient)
                batch_index += 1

            self.is_converged = self.converge_procedure.sync_converge_info(suffix=(self.n_iter_,))

            LOGGER.info("Get is_converged flag from arbiter:{}".format(self.is_converged))
            LOGGER.info("iter: {}, is_converged: {}".format(self.n_iter_, self.is_converged))
            LOGGER.debug(f"flowid: {self.flowid}, step_index: {self.n_iter_}")

            self.callback_list.on_epoch_end(self.n_iter_)
            self.n_iter_ += 1
            if self.stop_training:
                break

            if self.is_converged:
                break
        self.callback_list.on_train_end()
        self.set_summary(self.get_model_summary())