Ejemplo n.º 1
0
    def __init__(self, hetero_nn_param):
        super(HeteroNNKerasGuestModel, self).__init__()

        self.bottom_model = None
        self.interactive_model = None
        self.top_model = None
        self.bottom_nn_define = None
        self.top_nn_define = None
        self.interactive_layer_define = None
        self.config_type = None
        self.optimizer = None
        self.loss = None
        self.metrics = None
        self.hetero_nn_param = None
        self.transfer_variable = None
        self.model_builder = None
        self.bottom_model_input_shape = 0
        self.top_model_input_shape = None

        self.batch_size = None

        self.is_empty = False

        self.set_nn_meta(hetero_nn_param)
        self.model_builder = nn_model.get_nn_builder(
            config_type=self.config_type)
        self.data_converter = KerasSequenceDataConverter()

        self.selector = SelectorFactory.get_selector(
            hetero_nn_param.selector_param.method,
            hetero_nn_param.selector_param.selective_size,
            beta=hetero_nn_param.selector_param.beta,
            random_rate=hetero_nn_param.selector_param.random_state,
            min_prob=hetero_nn_param.selector_param.min_prob)
Ejemplo n.º 2
0
    def __init__(self, hetero_nn_param):
        super(HeteroNNKerasGuestModel, self).__init__()

        self.bottom_model = None
        self.interactive_model = None
        self.top_model = None
        self.bottom_nn_define = None
        self.top_nn_define = None
        self.interactive_layer_define = None
        self.config_type = None
        self.optimizer = None
        self.loss = None
        self.metrics = None
        self.hetero_nn_param = None
        self.transfer_variable = None
        self.model_builder = None
        self.bottom_model_input_shape = 0
        self.top_model_input_shape = None

        self.is_empty = False

        self.set_nn_meta(hetero_nn_param)
        self.model_builder = nn_model.get_nn_builder(
            config_type=self.config_type)
        self.data_converter = KerasSequenceDataConverter()
Ejemplo n.º 3
0
    def __init__(self):
        super(FTL, self).__init__()

        # input para
        self.nn_define = None
        self.alpha = None
        self.tol = None
        self.learning_rate = None
        self.n_iter_no_change = None
        self.validation_freqs = None
        self.early_stopping_rounds = None
        self.use_first_metric_only = None
        self.optimizer = None
        self.intersect_param = None
        self.config_type = None
        self.comm_eff = None
        self.local_round = 1

        self.encrypted_mode_calculator_param = None

        # runtime variable
        self.verbose = False
        self.nn: KerasNNModel = None
        self.nn_builder = None
        self.model_param = FTLParam()
        self.x_shape = None
        self.input_dim = None
        self.data_num = 0
        self.overlap_num = 0
        self.transfer_variable = FTLTransferVariable()
        self.data_convertor = KerasSequenceDataConverter()
        self.mode = 'plain'
        self.encrypt_calculators = []
        self.encrypter = None
        self.partitions = 16
        self.batch_size = None
        self.epochs = None
        self.store_header = None  # header of input data table

        self.cache_dataloader = {}

        self.validation_strategy = None
Ejemplo n.º 4
0
class FTL(ModelBase):
    def __init__(self):
        super(FTL, self).__init__()

        # input para
        self.nn_define = None
        self.alpha = None
        self.tol = None
        self.learning_rate = None
        self.n_iter_no_change = None
        self.validation_freqs = None
        self.early_stopping_rounds = None
        self.use_first_metric_only = None
        self.optimizer = None
        self.intersect_param = None
        self.config_type = None
        self.comm_eff = None
        self.local_round = 1

        self.encrypted_mode_calculator_param = None

        # runtime variable
        self.verbose = False
        self.nn: KerasNNModel = None
        self.nn_builder = None
        self.model_param = FTLParam()
        self.x_shape = None
        self.input_dim = None
        self.data_num = 0
        self.overlap_num = 0
        self.transfer_variable = FTLTransferVariable()
        self.data_convertor = KerasSequenceDataConverter()
        self.mode = 'plain'
        self.encrypt_calculators = []
        self.encrypter = None
        self.partitions = 16
        self.batch_size = None
        self.epochs = None
        self.store_header = None  # header of input data table

        self.cache_dataloader = {}

        self.validation_strategy = None

    def _init_model(self, param: FTLParam):

        self.nn_define = param.nn_define
        self.alpha = param.alpha
        self.tol = param.tol
        self.n_iter_no_change = param.n_iter_no_change
        self.validation_freqs = param.validation_freqs
        self.optimizer = param.optimizer
        self.intersect_param = param.intersect_param
        self.config_type = param.config_type
        self.batch_size = param.batch_size
        self.epochs = param.epochs
        self.mode = param.mode
        self.comm_eff = param.communication_efficient
        self.local_round = param.local_round

        assert 'learning_rate' in self.optimizer.kwargs, 'optimizer setting must contain learning_rate'
        self.learning_rate = self.optimizer.kwargs['learning_rate']

        if not self.comm_eff:
            self.local_round = 1
            LOGGER.debug(
                'communication efficient mode is not enabled, local_round set as 1'
            )

        self.encrypted_mode_calculator_param = param.encrypted_mode_calculator_param
        self.encrypter = self.generate_encrypter(param)
        self.predict_param = param.predict_param
        self.rng_generator = random_number_generator.RandomNumberGenerator()

    @staticmethod
    def debug_data_inst(data_inst):
        collect_data = list(data_inst.collect())
        LOGGER.debug('showing DTable')
        for d in collect_data:
            LOGGER.debug('key {} id {}, features {} label {}'.format(
                d[0], d[1].inst_id, d[1].features, d[1].label))

    @staticmethod
    def reset_label(inst, mapping):
        new_inst = copy.deepcopy(inst)
        new_inst.label = mapping[new_inst.label]
        return new_inst

    @staticmethod
    def check_label(data_inst):
        """
        check label. FTL only supports binary classification, and labels should be 1 or -1
        """

        LOGGER.debug('checking label')
        label_checker = ClassifyLabelChecker()
        num_class, class_set = label_checker.validate_label(data_inst)
        if num_class != 2:
            raise ValueError(
                'ftl only support binary classification, however {} labels are provided.'
                .format(num_class))

        if 1 in class_set and -1 in class_set:
            return data_inst
        else:
            soreted_class_set = sorted(list(class_set))
            new_label_mapping = {
                soreted_class_set[1]: 1,
                soreted_class_set[0]: -1
            }
            reset_label = functools.partial(FTL.reset_label,
                                            mapping=new_label_mapping)
            new_table = data_inst.mapValues(reset_label)
            new_table.schema = copy.deepcopy(data_inst.schema)
            return new_table

    def generate_encrypter(self, param) -> PaillierEncrypt:
        LOGGER.info("generate encrypter")
        if param.encrypt_param.method.lower() == consts.PAILLIER.lower():
            encrypter = PaillierEncrypt()
            encrypter.generate_key(param.encrypt_param.key_length)
        else:
            raise NotImplementedError("encrypt method not supported yet!!!")

        return encrypter

    def generated_encrypted_calculator(self):
        encrypted_calculator = EncryptModeCalculator(
            self.encrypter, self.encrypted_mode_calculator_param.mode,
            self.encrypted_mode_calculator_param.re_encrypted_rate)
        return encrypted_calculator

    def encrypt_tensor(self, components, return_dtable=True):
        """
        transform numpy array into Paillier tensor and encrypt
        """

        if len(self.encrypt_calculators) == 0:
            self.encrypt_calculators = [
                self.generated_encrypted_calculator() for i in range(3)
            ]
        encrypted_tensors = []
        for comp, calculator in zip(components, self.encrypt_calculators):
            encrypted_tensor = PaillierTensor(ori_data=comp,
                                              partitions=self.partitions)
            if return_dtable:
                encrypted_tensors.append(
                    encrypted_tensor.encrypt(calculator).get_obj())
            else:
                encrypted_tensors.append(encrypted_tensor.encrypt(calculator))

        return encrypted_tensors

    def init_validation_strategy(self, train_data=None, validate_data=None):
        validation_strategy = ValidationStrategy(self.role,
                                                 consts.HETERO,
                                                 self.validation_freqs,
                                                 self.early_stopping_rounds,
                                                 self.use_first_metric_only,
                                                 arbiter_comm=False)
        validation_strategy.set_train_data(train_data)
        validation_strategy.set_validate_data(validate_data)
        return validation_strategy

    def learning_rate_decay(self, learning_rate, epoch):
        """
        learning_rate decay
        """
        return learning_rate * 1 / np.sqrt(epoch + 1)

    def sync_stop_flag(self, num_round, stop_flag=None):
        """
        stop flag for n_iter_no_change
        """
        LOGGER.info("sync stop flag, boosting round is {}".format(num_round))
        if self.role == consts.GUEST:
            self.transfer_variable.stop_flag.remote(stop_flag,
                                                    role=consts.HOST,
                                                    idx=-1,
                                                    suffix=(num_round, ))
        elif self.role == consts.HOST:
            return self.transfer_variable.stop_flag.get(idx=0,
                                                        suffix=(num_round, ))

    def prepare_data(self, intersect_obj, data_inst, guest_side=False):
        """
        find intersect ids and prepare dataloader
        """
        if guest_side:
            data_inst = self.check_label(data_inst)

        overlap_samples = intersect_obj.run_intersect(
            data_inst)  # find intersect ids
        non_overlap_samples = data_inst.subtractByKey(overlap_samples)

        LOGGER.debug('num of overlap/non-overlap sampels: {}/{}'.format(
            overlap_samples.count(), non_overlap_samples.count()))

        if overlap_samples.count() == 0:
            raise ValueError('no overlap samples')

        if guest_side and non_overlap_samples == 0:
            raise ValueError('overlap samples are required in guest side')

        self.store_header = data_inst.schema['header']
        LOGGER.debug('data inst header is {}'.format(self.store_header))

        LOGGER.debug('has {} overlap samples'.format(overlap_samples.count()))

        batch_size = self.batch_size
        if self.batch_size == -1:
            batch_size = data_inst.count(
            ) + 1  # make sure larger than sample number
        data_loader = FTLDataLoader(non_overlap_samples=non_overlap_samples,
                                    batch_size=batch_size,
                                    overlap_samples=overlap_samples,
                                    guest_side=guest_side)

        LOGGER.debug("data details are :{}".format(
            data_loader.data_basic_info()))

        return data_loader, data_loader.x_shape, data_inst.count(), len(
            data_loader.get_overlap_indexes())

    def initialize_nn(self, input_shape):
        """
        initializing nn weights
        """

        loss = "keep_predict_loss"
        self.nn_builder = get_nn_builder(config_type=self.config_type)
        self.nn: NNModel = self.nn_builder(loss=loss,
                                           nn_define=self.nn_define,
                                           optimizer=self.optimizer,
                                           metrics=None,
                                           input_shape=input_shape)

        LOGGER.debug('printing nn layers structure')
        for layer in self.nn._model.layers:
            LOGGER.debug('input shape {}, output shape {}'.format(
                layer.input_shape, layer.output_shape))

    def generate_mask(self, shape):
        """
        generate random number mask
        """
        return self.rng_generator.generate_random_number(shape)

    def _batch_gradient_update(self, X, grads):
        """
        compute and update gradients for all samples
        """
        data = self.data_convertor.convert_data(X, grads)
        self.nn.train(data)

    def _get_mini_batch_gradient(self, X_batch, backward_grads_batch):
        """
        compute gradient for a mini batch
        """
        grads = self.nn.get_weight_gradients(X_batch, backward_grads_batch)
        return grads

    def update_nn_weights(self,
                          backward_grads,
                          data_loader: FTLDataLoader,
                          epoch_idx,
                          decay=False):
        """
        updating bottom nn model weights using backward gradients
        """

        LOGGER.debug('updating grads at epoch {}'.format(epoch_idx))

        assert len(data_loader.x) == len(backward_grads)

        weight_grads = []
        for i in range(len(data_loader)):
            start, end = data_loader.get_batch_indexes(i)
            batch_x = data_loader.x[start:end]
            batch_grads = backward_grads[start:end]
            batch_weight_grads = self._get_mini_batch_gradient(
                batch_x, batch_grads)
            if len(weight_grads) == 0:
                weight_grads.extend(batch_weight_grads)
            else:
                for w, bw in zip(weight_grads, batch_weight_grads):
                    w += bw

        if decay:
            new_learning_rate = self.learning_rate_decay(
                self.learning_rate, epoch_idx)
            self.nn.set_learning_rate(new_learning_rate)
            LOGGER.debug('epoch {} optimizer details are {}'.format(
                epoch_idx, self.nn.export_optimizer_config()))

        self.nn.apply_gradients(weight_grads)

    def export_nn(self):
        return self.nn.export_model()

    @staticmethod
    def get_dataset_key(data_inst):
        return id(data_inst)

    def get_model_meta(self):

        model_meta = FTLModelMeta()
        model_meta.config_type = self.config_type
        model_meta.nn_define = json.dumps(self.nn_define)
        model_meta.batch_size = self.batch_size
        model_meta.epochs = self.epochs
        model_meta.tol = self.tol
        model_meta.input_dim = self.input_dim

        predict_param = FTLPredictParam()

        optimizer_param = FTLOptimizerParam()
        optimizer_param.optimizer = self.optimizer.optimizer
        optimizer_param.kwargs = json.dumps(self.optimizer.kwargs)

        model_meta.optimizer_param.CopyFrom(optimizer_param)
        model_meta.predict_param.CopyFrom(predict_param)

        return model_meta

    def get_model_param(self):

        model_param = FTLModelParam()
        model_bytes = self.nn.export_model()
        model_param.model_bytes = model_bytes
        model_param.header.extend(list(self.store_header))

        return model_param

    def set_model_meta(self, model_meta):

        self.config_type = model_meta.config_type
        self.nn_define = json.loads(model_meta.nn_define)
        self.batch_size = model_meta.batch_size
        self.epochs = model_meta.epochs
        self.tol = model_meta.tol
        self.optimizer = FTLParam()._parse_optimizer(FTLParam().optimizer)
        self.input_dim = model_meta.input_dim

        self.optimizer.optimizer = model_meta.optimizer_param.optimizer
        self.optimizer.kwargs = json.loads(model_meta.optimizer_param.kwargs)

        self.initialize_nn((self.input_dim, ))

    def set_model_param(self, model_param):

        self.nn.restore_model(model_param.model_bytes)
        self.store_header = list(model_param.header)
        LOGGER.debug('stored header load, is {}'.format(self.store_header))