def __init__(
         self,
         penalty='L2',
         tol=1e-5,
         alpha=1.0,
         optimizer='sgd',
         batch_size=-1,
         learning_rate=0.01,
         init_param=InitParam(),
         max_iter=100,
         early_stop='diff',
         encrypt_param=EncryptParam(),
         encrypted_mode_calculator_param=EncryptedModeCalculatorParam(),
         predict_param=PredictParam(),
         cv_param=CrossValidationParam(),
         decay=1,
         decay_sqrt=True,
         multi_class='ovr',
         validation_freqs=None):
     super(HeteroLogisticParam,
           self).__init__(penalty=penalty,
                          tol=tol,
                          alpha=alpha,
                          optimizer=optimizer,
                          batch_size=batch_size,
                          learning_rate=learning_rate,
                          encrypt_param=encrypt_param,
                          init_param=init_param,
                          max_iter=max_iter,
                          early_stop=early_stop,
                          predict_param=predict_param,
                          cv_param=cv_param,
                          decay=decay,
                          decay_sqrt=decay_sqrt,
                          multi_class=multi_class,
                          validation_freqs=validation_freqs)
     self.encrypted_mode_calculator_param = encrypted_mode_calculator_param
Example #2
0
 def __init__(self, tree_param: DecisionTreeParam = DecisionTreeParam(), task_type=consts.CLASSIFICATION,
              objective_param=ObjectiveParam(),
              learning_rate=0.3, num_trees=5, subsample_feature_rate=1, n_iter_no_change=True,
              tol=0.0001, bin_num=32, predict_param=PredictParam(), cv_param=CrossValidationParam(),
              validation_freqs=None, use_missing=False, zero_as_missing=False, subsample_random_seed=None,
              binning_error=consts.DEFAULT_RELATIVE_ERROR
              ):
     super(HomoSecureBoostParam, self).__init__(task_type=task_type,
                                                objective_param=objective_param,
                                                learning_rate=learning_rate,
                                                num_trees=num_trees,
                                                subsample_feature_rate=subsample_feature_rate,
                                                n_iter_no_change=n_iter_no_change,
                                                tol=tol,
                                                bin_num=bin_num,
                                                predict_param=predict_param,
                                                cv_param=cv_param,
                                                validation_freqs=validation_freqs,
                                                subsample_random_seed=subsample_random_seed,
                                                binning_error=binning_error
                                                )
     self.use_missing = use_missing
     self.zero_as_missing = zero_as_missing
     self.tree_param = tree_param
Example #3
0
 def __init__(
         self,
         batch_size=-1,
         init_param=SVDInitParam(),
         max_iter=100,
         early_stop: typing.Union[str, dict,
                                  SimpleNamespace] = {"early_stop": "diff"},
         optimizer: typing.Union[str, dict, SimpleNamespace] = {
             "optimizer": "SGD",
             "learning_rate": 0.01
         },
         predict_param=PredictParam(),
         cv_param=CrossValidationParam(),
         aggregate_iters=1,
         validation_freqs=None):
     super(HeteroSVDParam, self).__init__(optimizer=optimizer,
                                          batch_size=batch_size,
                                          init_param=init_param,
                                          max_iter=max_iter,
                                          early_stop=early_stop,
                                          predict_param=predict_param,
                                          cv_param=cv_param,
                                          validation_freqs=validation_freqs)
     self.aggregate_iters = aggregate_iters
Example #4
0
    def __init__(self,  task_type=consts.CLASSIFICATION,
                 objective_param=ObjectiveParam(),
                 learning_rate=0.3, num_trees=5, subsample_feature_rate=1, n_iter_no_change=True,
                 tol=0.0001, bin_num=32,
                 predict_param=PredictParam(), cv_param=CrossValidationParam(),
                 validation_freqs=None, metrics=None, subsample_random_seed=None,
                 binning_error=consts.DEFAULT_RELATIVE_ERROR):

        super(BoostingParam, self).__init__()

        self.task_type = task_type
        self.objective_param = copy.deepcopy(objective_param)
        self.learning_rate = learning_rate
        self.num_trees = num_trees
        self.subsample_feature_rate = subsample_feature_rate
        self.n_iter_no_change = n_iter_no_change
        self.tol = tol
        self.bin_num = bin_num
        self.predict_param = copy.deepcopy(predict_param)
        self.cv_param = copy.deepcopy(cv_param)
        self.validation_freqs = validation_freqs
        self.metrics = metrics
        self.subsample_random_seed = subsample_random_seed
        self.binning_error = binning_error
Example #5
0
 def __init__(
         self,
         penalty='L2',
         tol=1e-5,
         alpha=1.0,
         optimizer='sgd',
         batch_size=-1,
         learning_rate=0.01,
         init_param=InitParam(),
         max_iter=100,
         early_stop='diff',
         predict_param=PredictParam(),
         encrypt_param=EncryptParam(),
         encrypted_mode_calculator_param=EncryptedModeCalculatorParam(),
         cv_param=CrossValidationParam(),
         decay=1,
         decay_sqrt=True,
         validation_freqs=None):
     super(LinearParam, self).__init__()
     self.penalty = penalty
     self.tol = tol
     self.alpha = alpha
     self.optimizer = optimizer
     self.batch_size = batch_size
     self.learning_rate = learning_rate
     self.init_param = copy.deepcopy(init_param)
     self.max_iter = max_iter
     self.early_stop = early_stop
     self.encrypt_param = encrypt_param
     self.encrypted_mode_calculator_param = copy.deepcopy(
         encrypted_mode_calculator_param)
     self.cv_param = copy.deepcopy(cv_param)
     self.predict_param = copy.deepcopy(predict_param)
     self.decay = decay
     self.decay_sqrt = decay_sqrt
     self.validation_freqs = validation_freqs
Example #6
0
    def __init__(
            self,
            tree_param: DecisionTreeParam = DecisionTreeParam(),
            task_type=consts.CLASSIFICATION,
            objective_param=ObjectiveParam(),
            learning_rate=0.3,
            num_trees=5,
            subsample_feature_rate=1.0,
            n_iter_no_change=True,
            tol=0.0001,
            encrypt_param=EncryptParam(),
            bin_num=32,
            encrypted_mode_calculator_param=EncryptedModeCalculatorParam(),
            predict_param=PredictParam(),
            cv_param=CrossValidationParam(),
            validation_freqs=None,
            early_stopping_rounds=None,
            use_missing=False,
            zero_as_missing=False,
            complete_secure=False,
            metrics=None,
            use_first_metric_only=False,
            random_seed=100,
            binning_error=consts.DEFAULT_RELATIVE_ERROR,
            sparse_optimization=False,
            run_goss=False,
            top_rate=0.2,
            other_rate=0.1,
            cipher_compress_error=None,
            cipher_compress=True,
            new_ver=True,
            boosting_strategy=consts.STD_TREE,
            work_mode=None,
            tree_num_per_party=1,
            guest_depth=2,
            host_depth=3,
            callback_param=CallbackParam(),
            multi_mode=consts.SINGLE_OUTPUT,
            EINI_inference=False,
            EINI_random_mask=False,
            EINI_complexity_check=False):

        super(HeteroSecureBoostParam,
              self).__init__(task_type,
                             objective_param,
                             learning_rate,
                             num_trees,
                             subsample_feature_rate,
                             n_iter_no_change,
                             tol,
                             encrypt_param,
                             bin_num,
                             encrypted_mode_calculator_param,
                             predict_param,
                             cv_param,
                             validation_freqs,
                             early_stopping_rounds,
                             metrics=metrics,
                             use_first_metric_only=use_first_metric_only,
                             random_seed=random_seed,
                             binning_error=binning_error)

        self.tree_param = copy.deepcopy(tree_param)
        self.zero_as_missing = zero_as_missing
        self.use_missing = use_missing
        self.complete_secure = complete_secure
        self.sparse_optimization = sparse_optimization
        self.run_goss = run_goss
        self.top_rate = top_rate
        self.other_rate = other_rate
        self.cipher_compress_error = cipher_compress_error
        self.cipher_compress = cipher_compress
        self.new_ver = new_ver
        self.EINI_inference = EINI_inference
        self.EINI_random_mask = EINI_random_mask
        self.EINI_complexity_check = EINI_complexity_check
        self.boosting_strategy = boosting_strategy
        self.work_mode = work_mode
        self.tree_num_per_party = tree_num_per_party
        self.guest_depth = guest_depth
        self.host_depth = host_depth
        self.callback_param = copy.deepcopy(callback_param)
        self.multi_mode = multi_mode
Example #7
0
    def __init__(
            self,
            tree_param: DecisionTreeParam = DecisionTreeParam(),
            task_type=consts.CLASSIFICATION,
            objective_param=ObjectiveParam(),
            learning_rate=0.3,
            num_trees=5,
            subsample_feature_rate=1,
            n_iter_no_change=True,
            tol=0.0001,
            encrypt_param=EncryptParam(),
            bin_num=32,
            encrypted_mode_calculator_param=EncryptedModeCalculatorParam(),
            predict_param=PredictParam(),
            cv_param=CrossValidationParam(),
            validation_freqs=None,
            early_stopping_rounds=None,
            use_missing=False,
            zero_as_missing=False,
            complete_secure=False,
            tree_num_per_party=1,
            guest_depth=1,
            host_depth=1,
            work_mode='mix',
            metrics=None,
            sparse_optimization=False,
            subsample_random_seed=None,
            binning_error=consts.DEFAULT_RELATIVE_ERROR):
        """
        work_modeļ¼š
            mix:  alternate using guest/host features to build trees. For example, the first 'tree_num_per_party' trees use guest features,
                  the second k trees use host features, and so on
            layered: only support 2 party, when running layered mode, first 'host_depth' layer will use host features,
                     and then next 'guest_depth' will only use guest features
        tree_num_per_party: every party will alternate build 'tree_num_per_party' trees until reach max tree num, this param is valid when work_mode is
            mix
        guest_depth: guest will build last guest_depth of a decision tree using guest features, is valid when work mode
            is layered
        host depth: host will build first host_depth of a decision tree using host features, is valid when work mode is
            layered

        other params are the same as HeteroSecureBoost
        """

        super(HeteroFastSecureBoostParam,
              self).__init__(tree_param,
                             task_type,
                             objective_param,
                             learning_rate,
                             num_trees,
                             subsample_feature_rate,
                             n_iter_no_change,
                             tol,
                             encrypt_param,
                             bin_num,
                             encrypted_mode_calculator_param,
                             predict_param,
                             cv_param,
                             validation_freqs,
                             early_stopping_rounds,
                             use_missing,
                             zero_as_missing,
                             complete_secure,
                             metrics=metrics,
                             subsample_random_seed=subsample_random_seed,
                             sparse_optimization=sparse_optimization,
                             binning_error=binning_error)

        self.tree_num_per_party = tree_num_per_party
        self.guest_depth = guest_depth
        self.host_depth = host_depth
        self.work_mode = work_mode
Example #8
0
    def __init__(self):

        super(Boosting, self).__init__()

        # input hyper parameter
        self.task_type = None
        self.learning_rate = None
        self.boosting_round = None
        self.n_iter_no_change = None
        self.tol = 0.0
        self.bin_num = None
        self.calculated_mode = None
        self.cv_param = None
        self.validation_freqs = None
        self.feature_name_fid_mapping = {}
        self.mode = None
        self.predict_param = PredictParam()
        self.objective_param = ObjectiveParam()
        self.model_param = BoostingParam()
        self.subsample_feature_rate = 1.0
        self.subsample_random_seed = None
        self.model_name = 'default'  # model name
        self.early_stopping_rounds = None
        self.use_first_metric_only = False
        self.binning_error = consts.DEFAULT_RELATIVE_ERROR

        # running variable

        # random seed
        self.random_seed = 100

        # data
        self.data_inst = None  # original input data
        self.binning_class = None  # class used for data binning
        self.binning_obj = None  # instance of self.binning_class
        self.data_bin = None  # data with transformed features
        self.bin_split_points = None  # feature split points
        self.bin_sparse_points = None  # feature sparse points
        self.use_missing = False  # should handle missing value or not
        self.zero_as_missing = False  # set missing value as value or not

        # booster
        self.booster_dim = 1  # booster dimension
        self.booster_meta = None  # booster's hyper parameters
        self.boosting_model_list = []  # list hol\ds boosters

        # training
        self.feature_num = None  # feature number
        self.init_score = None  # init score
        self.num_classes = 1  # number of classes
        self.convergence = None  # function to check loss convergence
        self.classes_ = []  # list of class indices
        self.y = None  # label
        self.y_hat = None  # accumulated predict value
        self.loss = None  # loss func
        self.predict_y_hat = None  # accumulated predict value for predicting mode
        self.history_loss = []  # list holds loss history
        self.validation_strategy = None
        self.metrics = None
        self.is_converged = False

        # cache and header alignment
        self.predict_data_cache = PredictDataCache()
        self.data_alignment_map = {}

        # federation
        self.transfer_variable = None
Example #9
0
    def __init__(self,
                 alpha=1,
                 tol=0.000001,
                 n_iter_no_change=False,
                 validation_freqs=None,
                 optimizer={
                     'optimizer': 'Adam',
                     'learning_rate': 0.01
                 },
                 nn_define={},
                 epochs=1,
                 intersect_param=IntersectParam(consts.RSA),
                 config_type='keras',
                 batch_size=-1,
                 encrypte_param=EncryptParam(),
                 encrypted_mode_calculator_param=EncryptedModeCalculatorParam(
                     mode="confusion_opt"),
                 predict_param=PredictParam(),
                 mode='plain',
                 communication_efficient=False,
                 local_round=5,
                 callback_param=CallbackParam()):
        """
        Parameters
        ----------
        alpha : float
            a loss coefficient defined in paper, it defines the importance of alignment loss
        tol : float
            loss tolerance
        n_iter_no_change : bool
            check loss convergence or not
        validation_freqs : None or positive integer or container object in python
            Do validation in training process or Not.
            if equals None, will not do validation in train process;
            if equals positive integer, will validate data every validation_freqs epochs passes;
            if container object in python, will validate data if epochs belong to this container.
            e.g. validation_freqs = [10, 15], will validate data when epoch equals to 10 and 15.
            The default value is None, 1 is suggested. You can set it to a number larger than 1 in order to
            speed up training by skipping validation rounds. When it is larger than 1, a number which is
            divisible by "epochs" is recommended, otherwise, you will miss the validation scores
            of last training epoch.
        optimizer : str or dict
            optimizer method, accept following types:
            1. a string, one of "Adadelta", "Adagrad", "Adam", "Adamax", "Nadam", "RMSprop", "SGD"
            2. a dict, with a required key-value pair keyed by "optimizer",
                with optional key-value pairs such as learning rate.
            defaults to "SGD"
        nn_define : dict
            a dict represents the structure of neural network, it can be output by tf-keras
        epochs : int
            epochs num
        intersect_param
            define the intersect method
        config_type : {'tf-keras'}
            config type
        batch_size : int
            batch size when computing transformed feature embedding, -1 use full data.
        encrypte_param
            encrypted param
        encrypted_mode_calculator_param
            encrypted mode calculator param:
        predict_param
            predict param
        mode: {"plain", "encrypted"}
            plain: will not use any encrypt algorithms, data exchanged in plaintext
            encrypted: use paillier to encrypt gradients
        communication_efficient: bool
            will use communication efficient or not. when communication efficient is enabled, FTL model will
            update gradients by several local rounds using intermediate data
        local_round: int
            local update round when using communication efficient
        """

        super(FTLParam, self).__init__()
        self.alpha = alpha
        self.tol = tol
        self.n_iter_no_change = n_iter_no_change
        self.validation_freqs = validation_freqs
        self.optimizer = optimizer
        self.nn_define = nn_define
        self.epochs = epochs
        self.intersect_param = copy.deepcopy(intersect_param)
        self.config_type = config_type
        self.batch_size = batch_size
        self.encrypted_mode_calculator_param = copy.deepcopy(
            encrypted_mode_calculator_param)
        self.encrypt_param = copy.deepcopy(encrypte_param)
        self.predict_param = copy.deepcopy(predict_param)
        self.mode = mode
        self.communication_efficient = communication_efficient
        self.local_round = local_round
        self.callback_param = copy.deepcopy(callback_param)
    def check(self):

        descr = "workflow param's "

        self.method = self.check_and_change_lower(self.method, [
            'train', 'predict', 'cross_validation', 'intersect', 'binning',
            'feature_select', 'one_vs_rest_train', "one_vs_rest_predict"
        ], descr)

        if self.method in ['train', 'binning', 'feature_select']:
            if type(self.train_input_table).__name__ != "str":
                raise ValueError(
                    "workflow param's train_input_table {} not supported, should be str type"
                    .format(self.train_input_table))

            if type(self.train_input_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's train_input_namespace {} not supported, should be str type"
                    .format(self.train_input_namespace))

        if self.method in ["train", "predict", "cross_validation"]:
            if type(self.model_table).__name__ != "str":
                raise ValueError(
                    "workflow param's model_table {} not supported, should be str type"
                    .format(self.model_table))

            if type(self.model_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's model_namespace {} not supported, should be str type"
                    .format(self.model_namespace))

        if self.method == 'predict':
            if type(self.predict_input_table).__name__ != "str":
                raise ValueError(
                    "workflow param's predict_input_table {} not supported, should be str type"
                    .format(self.predict_input_table))

            if type(self.predict_input_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's predict_input_namespace {} not supported, should be str type"
                    .format(self.predict_input_namespace))

            if type(self.predict_output_table).__name__ != "str":
                raise ValueError(
                    "workflow param's predict_output_table {} not supported, should be str type"
                    .format(self.predict_output_table))

            if type(self.predict_output_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's predict_output_namespace {} not supported, should be str type"
                    .format(self.predict_output_namespace))

        if self.method in ["train", "predict", "cross_validation"]:
            if type(self.predict_result_partition).__name__ != "int":
                raise ValueError(
                    "workflow param's predict_result_partition {} not supported, should be int type"
                    .format(self.predict_result_partition))

            if type(self.evaluation_output_table).__name__ != "str":
                raise ValueError(
                    "workflow param's evaluation_output_table {} not supported, should be str type"
                    .format(self.evaluation_output_table))

            if type(self.evaluation_output_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's evaluation_output_namespace {} not supported, should be str type"
                    .format(self.evaluation_output_namespace))

        if self.method == 'cross_validation':
            if type(self.data_input_table).__name__ != "str":
                raise ValueError(
                    "workflow param's data_input_table {} not supported, should be str type"
                    .format(self.data_input_table))

            if type(self.data_input_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's data_input_namespace {} not supported, should be str type"
                    .format(self.data_input_namespace))

            if type(self.n_splits).__name__ != "int":
                raise ValueError(
                    "workflow param's n_splits {} not supported, should be int type"
                    .format(self.n_splits))
            elif self.n_splits <= 0:
                raise ValueError(
                    "workflow param's n_splits must be greater or equal to 1")

        if self.intersect_data_output_table is not None:
            if type(self.intersect_data_output_table).__name__ != "str":
                raise ValueError(
                    "workflow param's intersect_data_output_table {} not supported, should be str type"
                    .format(self.intersect_data_output_table))

        if self.intersect_data_output_namespace is not None:
            if type(self.intersect_data_output_namespace).__name__ != "str":
                raise ValueError(
                    "workflow param's intersect_data_output_namespace {} not supported, should be str type"
                    .format(self.intersect_data_output_namespace))

        DataIOParam.check(self.dataio_param)

        if type(self.work_mode).__name__ != "int":
            raise ValueError(
                "workflow param's work_mode {} not supported, should be int type"
                .format(self.work_mode))
        elif self.work_mode not in [0, 1]:
            raise ValueError(
                "workflow param's work_mode must be 0 (represent to standalone mode) or 1 (represent to cluster mode)"
            )

        if self.method in ["train", "predict", "cross_validation"]:
            PredictParam.check(self.predict_param)
            EvaluateParam.check(self.evaluate_param)

        LOGGER.debug("Finish workerflow parameter check!")
        return True