def __init__( self, penalty='L2', tol=1e-5, alpha=1.0, optimizer='sgd', batch_size=-1, learning_rate=0.01, init_param=InitParam(), max_iter=100, early_stop='diff', encrypt_param=EncryptParam(), encrypted_mode_calculator_param=EncryptedModeCalculatorParam(), predict_param=PredictParam(), cv_param=CrossValidationParam(), decay=1, decay_sqrt=True, multi_class='ovr', validation_freqs=None): super(HeteroLogisticParam, self).__init__(penalty=penalty, tol=tol, alpha=alpha, optimizer=optimizer, batch_size=batch_size, learning_rate=learning_rate, encrypt_param=encrypt_param, init_param=init_param, max_iter=max_iter, early_stop=early_stop, predict_param=predict_param, cv_param=cv_param, decay=decay, decay_sqrt=decay_sqrt, multi_class=multi_class, validation_freqs=validation_freqs) self.encrypted_mode_calculator_param = encrypted_mode_calculator_param
def __init__(self, tree_param: DecisionTreeParam = DecisionTreeParam(), task_type=consts.CLASSIFICATION, objective_param=ObjectiveParam(), learning_rate=0.3, num_trees=5, subsample_feature_rate=1, n_iter_no_change=True, tol=0.0001, bin_num=32, predict_param=PredictParam(), cv_param=CrossValidationParam(), validation_freqs=None, use_missing=False, zero_as_missing=False, subsample_random_seed=None, binning_error=consts.DEFAULT_RELATIVE_ERROR ): super(HomoSecureBoostParam, self).__init__(task_type=task_type, objective_param=objective_param, learning_rate=learning_rate, num_trees=num_trees, subsample_feature_rate=subsample_feature_rate, n_iter_no_change=n_iter_no_change, tol=tol, bin_num=bin_num, predict_param=predict_param, cv_param=cv_param, validation_freqs=validation_freqs, subsample_random_seed=subsample_random_seed, binning_error=binning_error ) self.use_missing = use_missing self.zero_as_missing = zero_as_missing self.tree_param = tree_param
def __init__( self, batch_size=-1, init_param=SVDInitParam(), max_iter=100, early_stop: typing.Union[str, dict, SimpleNamespace] = {"early_stop": "diff"}, optimizer: typing.Union[str, dict, SimpleNamespace] = { "optimizer": "SGD", "learning_rate": 0.01 }, predict_param=PredictParam(), cv_param=CrossValidationParam(), aggregate_iters=1, validation_freqs=None): super(HeteroSVDParam, self).__init__(optimizer=optimizer, batch_size=batch_size, init_param=init_param, max_iter=max_iter, early_stop=early_stop, predict_param=predict_param, cv_param=cv_param, validation_freqs=validation_freqs) self.aggregate_iters = aggregate_iters
def __init__(self, task_type=consts.CLASSIFICATION, objective_param=ObjectiveParam(), learning_rate=0.3, num_trees=5, subsample_feature_rate=1, n_iter_no_change=True, tol=0.0001, bin_num=32, predict_param=PredictParam(), cv_param=CrossValidationParam(), validation_freqs=None, metrics=None, subsample_random_seed=None, binning_error=consts.DEFAULT_RELATIVE_ERROR): super(BoostingParam, self).__init__() self.task_type = task_type self.objective_param = copy.deepcopy(objective_param) self.learning_rate = learning_rate self.num_trees = num_trees self.subsample_feature_rate = subsample_feature_rate self.n_iter_no_change = n_iter_no_change self.tol = tol self.bin_num = bin_num self.predict_param = copy.deepcopy(predict_param) self.cv_param = copy.deepcopy(cv_param) self.validation_freqs = validation_freqs self.metrics = metrics self.subsample_random_seed = subsample_random_seed self.binning_error = binning_error
def __init__( self, penalty='L2', tol=1e-5, alpha=1.0, optimizer='sgd', batch_size=-1, learning_rate=0.01, init_param=InitParam(), max_iter=100, early_stop='diff', predict_param=PredictParam(), encrypt_param=EncryptParam(), encrypted_mode_calculator_param=EncryptedModeCalculatorParam(), cv_param=CrossValidationParam(), decay=1, decay_sqrt=True, validation_freqs=None): super(LinearParam, self).__init__() self.penalty = penalty self.tol = tol self.alpha = alpha self.optimizer = optimizer self.batch_size = batch_size self.learning_rate = learning_rate self.init_param = copy.deepcopy(init_param) self.max_iter = max_iter self.early_stop = early_stop self.encrypt_param = encrypt_param self.encrypted_mode_calculator_param = copy.deepcopy( encrypted_mode_calculator_param) self.cv_param = copy.deepcopy(cv_param) self.predict_param = copy.deepcopy(predict_param) self.decay = decay self.decay_sqrt = decay_sqrt self.validation_freqs = validation_freqs
def __init__( self, tree_param: DecisionTreeParam = DecisionTreeParam(), task_type=consts.CLASSIFICATION, objective_param=ObjectiveParam(), learning_rate=0.3, num_trees=5, subsample_feature_rate=1.0, n_iter_no_change=True, tol=0.0001, encrypt_param=EncryptParam(), bin_num=32, encrypted_mode_calculator_param=EncryptedModeCalculatorParam(), predict_param=PredictParam(), cv_param=CrossValidationParam(), validation_freqs=None, early_stopping_rounds=None, use_missing=False, zero_as_missing=False, complete_secure=False, metrics=None, use_first_metric_only=False, random_seed=100, binning_error=consts.DEFAULT_RELATIVE_ERROR, sparse_optimization=False, run_goss=False, top_rate=0.2, other_rate=0.1, cipher_compress_error=None, cipher_compress=True, new_ver=True, boosting_strategy=consts.STD_TREE, work_mode=None, tree_num_per_party=1, guest_depth=2, host_depth=3, callback_param=CallbackParam(), multi_mode=consts.SINGLE_OUTPUT, EINI_inference=False, EINI_random_mask=False, EINI_complexity_check=False): super(HeteroSecureBoostParam, self).__init__(task_type, objective_param, learning_rate, num_trees, subsample_feature_rate, n_iter_no_change, tol, encrypt_param, bin_num, encrypted_mode_calculator_param, predict_param, cv_param, validation_freqs, early_stopping_rounds, metrics=metrics, use_first_metric_only=use_first_metric_only, random_seed=random_seed, binning_error=binning_error) self.tree_param = copy.deepcopy(tree_param) self.zero_as_missing = zero_as_missing self.use_missing = use_missing self.complete_secure = complete_secure self.sparse_optimization = sparse_optimization self.run_goss = run_goss self.top_rate = top_rate self.other_rate = other_rate self.cipher_compress_error = cipher_compress_error self.cipher_compress = cipher_compress self.new_ver = new_ver self.EINI_inference = EINI_inference self.EINI_random_mask = EINI_random_mask self.EINI_complexity_check = EINI_complexity_check self.boosting_strategy = boosting_strategy self.work_mode = work_mode self.tree_num_per_party = tree_num_per_party self.guest_depth = guest_depth self.host_depth = host_depth self.callback_param = copy.deepcopy(callback_param) self.multi_mode = multi_mode
def __init__( self, tree_param: DecisionTreeParam = DecisionTreeParam(), task_type=consts.CLASSIFICATION, objective_param=ObjectiveParam(), learning_rate=0.3, num_trees=5, subsample_feature_rate=1, n_iter_no_change=True, tol=0.0001, encrypt_param=EncryptParam(), bin_num=32, encrypted_mode_calculator_param=EncryptedModeCalculatorParam(), predict_param=PredictParam(), cv_param=CrossValidationParam(), validation_freqs=None, early_stopping_rounds=None, use_missing=False, zero_as_missing=False, complete_secure=False, tree_num_per_party=1, guest_depth=1, host_depth=1, work_mode='mix', metrics=None, sparse_optimization=False, subsample_random_seed=None, binning_error=consts.DEFAULT_RELATIVE_ERROR): """ work_mode: mix: alternate using guest/host features to build trees. For example, the first 'tree_num_per_party' trees use guest features, the second k trees use host features, and so on layered: only support 2 party, when running layered mode, first 'host_depth' layer will use host features, and then next 'guest_depth' will only use guest features tree_num_per_party: every party will alternate build 'tree_num_per_party' trees until reach max tree num, this param is valid when work_mode is mix guest_depth: guest will build last guest_depth of a decision tree using guest features, is valid when work mode is layered host depth: host will build first host_depth of a decision tree using host features, is valid when work mode is layered other params are the same as HeteroSecureBoost """ super(HeteroFastSecureBoostParam, self).__init__(tree_param, task_type, objective_param, learning_rate, num_trees, subsample_feature_rate, n_iter_no_change, tol, encrypt_param, bin_num, encrypted_mode_calculator_param, predict_param, cv_param, validation_freqs, early_stopping_rounds, use_missing, zero_as_missing, complete_secure, metrics=metrics, subsample_random_seed=subsample_random_seed, sparse_optimization=sparse_optimization, binning_error=binning_error) self.tree_num_per_party = tree_num_per_party self.guest_depth = guest_depth self.host_depth = host_depth self.work_mode = work_mode
def __init__(self): super(Boosting, self).__init__() # input hyper parameter self.task_type = None self.learning_rate = None self.boosting_round = None self.n_iter_no_change = None self.tol = 0.0 self.bin_num = None self.calculated_mode = None self.cv_param = None self.validation_freqs = None self.feature_name_fid_mapping = {} self.mode = None self.predict_param = PredictParam() self.objective_param = ObjectiveParam() self.model_param = BoostingParam() self.subsample_feature_rate = 1.0 self.subsample_random_seed = None self.model_name = 'default' # model name self.early_stopping_rounds = None self.use_first_metric_only = False self.binning_error = consts.DEFAULT_RELATIVE_ERROR # running variable # random seed self.random_seed = 100 # data self.data_inst = None # original input data self.binning_class = None # class used for data binning self.binning_obj = None # instance of self.binning_class self.data_bin = None # data with transformed features self.bin_split_points = None # feature split points self.bin_sparse_points = None # feature sparse points self.use_missing = False # should handle missing value or not self.zero_as_missing = False # set missing value as value or not # booster self.booster_dim = 1 # booster dimension self.booster_meta = None # booster's hyper parameters self.boosting_model_list = [] # list hol\ds boosters # training self.feature_num = None # feature number self.init_score = None # init score self.num_classes = 1 # number of classes self.convergence = None # function to check loss convergence self.classes_ = [] # list of class indices self.y = None # label self.y_hat = None # accumulated predict value self.loss = None # loss func self.predict_y_hat = None # accumulated predict value for predicting mode self.history_loss = [] # list holds loss history self.validation_strategy = None self.metrics = None self.is_converged = False # cache and header alignment self.predict_data_cache = PredictDataCache() self.data_alignment_map = {} # federation self.transfer_variable = None
def __init__(self, alpha=1, tol=0.000001, n_iter_no_change=False, validation_freqs=None, optimizer={ 'optimizer': 'Adam', 'learning_rate': 0.01 }, nn_define={}, epochs=1, intersect_param=IntersectParam(consts.RSA), config_type='keras', batch_size=-1, encrypte_param=EncryptParam(), encrypted_mode_calculator_param=EncryptedModeCalculatorParam( mode="confusion_opt"), predict_param=PredictParam(), mode='plain', communication_efficient=False, local_round=5, callback_param=CallbackParam()): """ Parameters ---------- alpha : float a loss coefficient defined in paper, it defines the importance of alignment loss tol : float loss tolerance n_iter_no_change : bool check loss convergence or not validation_freqs : None or positive integer or container object in python Do validation in training process or Not. if equals None, will not do validation in train process; if equals positive integer, will validate data every validation_freqs epochs passes; if container object in python, will validate data if epochs belong to this container. e.g. validation_freqs = [10, 15], will validate data when epoch equals to 10 and 15. The default value is None, 1 is suggested. You can set it to a number larger than 1 in order to speed up training by skipping validation rounds. When it is larger than 1, a number which is divisible by "epochs" is recommended, otherwise, you will miss the validation scores of last training epoch. optimizer : str or dict optimizer method, accept following types: 1. a string, one of "Adadelta", "Adagrad", "Adam", "Adamax", "Nadam", "RMSprop", "SGD" 2. a dict, with a required key-value pair keyed by "optimizer", with optional key-value pairs such as learning rate. defaults to "SGD" nn_define : dict a dict represents the structure of neural network, it can be output by tf-keras epochs : int epochs num intersect_param define the intersect method config_type : {'tf-keras'} config type batch_size : int batch size when computing transformed feature embedding, -1 use full data. encrypte_param encrypted param encrypted_mode_calculator_param encrypted mode calculator param: predict_param predict param mode: {"plain", "encrypted"} plain: will not use any encrypt algorithms, data exchanged in plaintext encrypted: use paillier to encrypt gradients communication_efficient: bool will use communication efficient or not. when communication efficient is enabled, FTL model will update gradients by several local rounds using intermediate data local_round: int local update round when using communication efficient """ super(FTLParam, self).__init__() self.alpha = alpha self.tol = tol self.n_iter_no_change = n_iter_no_change self.validation_freqs = validation_freqs self.optimizer = optimizer self.nn_define = nn_define self.epochs = epochs self.intersect_param = copy.deepcopy(intersect_param) self.config_type = config_type self.batch_size = batch_size self.encrypted_mode_calculator_param = copy.deepcopy( encrypted_mode_calculator_param) self.encrypt_param = copy.deepcopy(encrypte_param) self.predict_param = copy.deepcopy(predict_param) self.mode = mode self.communication_efficient = communication_efficient self.local_round = local_round self.callback_param = copy.deepcopy(callback_param)
def check(self): descr = "workflow param's " self.method = self.check_and_change_lower(self.method, [ 'train', 'predict', 'cross_validation', 'intersect', 'binning', 'feature_select', 'one_vs_rest_train', "one_vs_rest_predict" ], descr) if self.method in ['train', 'binning', 'feature_select']: if type(self.train_input_table).__name__ != "str": raise ValueError( "workflow param's train_input_table {} not supported, should be str type" .format(self.train_input_table)) if type(self.train_input_namespace).__name__ != "str": raise ValueError( "workflow param's train_input_namespace {} not supported, should be str type" .format(self.train_input_namespace)) if self.method in ["train", "predict", "cross_validation"]: if type(self.model_table).__name__ != "str": raise ValueError( "workflow param's model_table {} not supported, should be str type" .format(self.model_table)) if type(self.model_namespace).__name__ != "str": raise ValueError( "workflow param's model_namespace {} not supported, should be str type" .format(self.model_namespace)) if self.method == 'predict': if type(self.predict_input_table).__name__ != "str": raise ValueError( "workflow param's predict_input_table {} not supported, should be str type" .format(self.predict_input_table)) if type(self.predict_input_namespace).__name__ != "str": raise ValueError( "workflow param's predict_input_namespace {} not supported, should be str type" .format(self.predict_input_namespace)) if type(self.predict_output_table).__name__ != "str": raise ValueError( "workflow param's predict_output_table {} not supported, should be str type" .format(self.predict_output_table)) if type(self.predict_output_namespace).__name__ != "str": raise ValueError( "workflow param's predict_output_namespace {} not supported, should be str type" .format(self.predict_output_namespace)) if self.method in ["train", "predict", "cross_validation"]: if type(self.predict_result_partition).__name__ != "int": raise ValueError( "workflow param's predict_result_partition {} not supported, should be int type" .format(self.predict_result_partition)) if type(self.evaluation_output_table).__name__ != "str": raise ValueError( "workflow param's evaluation_output_table {} not supported, should be str type" .format(self.evaluation_output_table)) if type(self.evaluation_output_namespace).__name__ != "str": raise ValueError( "workflow param's evaluation_output_namespace {} not supported, should be str type" .format(self.evaluation_output_namespace)) if self.method == 'cross_validation': if type(self.data_input_table).__name__ != "str": raise ValueError( "workflow param's data_input_table {} not supported, should be str type" .format(self.data_input_table)) if type(self.data_input_namespace).__name__ != "str": raise ValueError( "workflow param's data_input_namespace {} not supported, should be str type" .format(self.data_input_namespace)) if type(self.n_splits).__name__ != "int": raise ValueError( "workflow param's n_splits {} not supported, should be int type" .format(self.n_splits)) elif self.n_splits <= 0: raise ValueError( "workflow param's n_splits must be greater or equal to 1") if self.intersect_data_output_table is not None: if type(self.intersect_data_output_table).__name__ != "str": raise ValueError( "workflow param's intersect_data_output_table {} not supported, should be str type" .format(self.intersect_data_output_table)) if self.intersect_data_output_namespace is not None: if type(self.intersect_data_output_namespace).__name__ != "str": raise ValueError( "workflow param's intersect_data_output_namespace {} not supported, should be str type" .format(self.intersect_data_output_namespace)) DataIOParam.check(self.dataio_param) if type(self.work_mode).__name__ != "int": raise ValueError( "workflow param's work_mode {} not supported, should be int type" .format(self.work_mode)) elif self.work_mode not in [0, 1]: raise ValueError( "workflow param's work_mode must be 0 (represent to standalone mode) or 1 (represent to cluster mode)" ) if self.method in ["train", "predict", "cross_validation"]: PredictParam.check(self.predict_param) EvaluateParam.check(self.evaluate_param) LOGGER.debug("Finish workerflow parameter check!") return True