Beispiel #1
0
def main(args, max_evals):
    # Hyperparameters space
    model_path = f"./checkpoint/{args.experiment_id}_ckpt.pth"
    trials_path = f"./results/{args.experiment_id}_trials.p"
    display_step = args.iterations//100 if args.iterations > 1000 else 10 # relative display steps
    space = {#------------------------------------- Architecture -------------------------------------#
             'experiment_id': hp.choice(label='experiment_id', options=[args.experiment_id]),
             'input_size': hp.choice(label='input_size', options=[SIZE]),
             'n_classes': hp.choice(label='n_classes', options=[4_000]),
             #------------------------------ Optimization Regularization -----------------------------#
             'iterations': hp.choice(label='iterations', options=[args.iterations]),
             'display_step': scope.int(hp.choice(label='display_step', options=[display_step])),
             'batch_size': scope.int(hp.choice(label='batch_size', options=[512])),
             #'initial_lr': hp.loguniform(label='lr', low=np.log(5e-3), high=np.log(0.1)),
             'initial_lr': scope.float(hp.choice(label='initial_lr', options=[0.1])),
             'lr_decay': scope.float(hp.choice(label='lr_decay', options=[0.5])),
             'adjust_lr_step': hp.choice(label='adjust_lr_step', options=[300_000//3]),
             'weight_decay': hp.choice(label='weight_decay', options=[5e-4]),
             'with_center_loss': hp.choice(label='with_center_loss', options=[bool(args.with_center_loss)]),
             'initial_clr': hp.choice(label='initial_clr', options=[0.01, 0.05, 0.1, 0.5]),
             'alpha': hp.choice(label='alpha', options=[0.1, 0.01]),
             #'display_step': scope.int(hp.choice(label='eval_epochs', options=[3_000])),
             #--------------------------------------   Others   --------------------------------------#
             'path': hp.choice(label='path', options=[model_path]),
             'trials_path': hp.choice(label='trials_path', options=[trials_path]),
             'random_seed': scope.int(hp.quniform('random_seed', 1, 10, 1))}

    # Hyperparameters search
    trials = Trials()
    fmin_objective = partial(fit_and_log, trials=trials, verbose=True)
    best_model = fmin(fmin_objective, space=space, algo=tpe.suggest, max_evals=max_evals, trials=trials)

    # Save output
    with open(trials_path, "wb") as f:
        pickle.dump(trials, f)
def bayes_tune(plot_results):
    space = {
        "hidden_layers":
        hp.choice("options", [{
            "hidden_layers":
            1,
            "network": (X_train_std.shape[1],
                        scope.int(hp.quniform("1_hidden_1", 5, 32, 1)), 1)
        }, {
            "hidden_layers":
            2,
            "network": (X_train_std.shape[1],
                        scope.int(hp.quniform("2_hidden_1", 5, 32, 1)),
                        scope.int(hp.quniform("2_hidden_2", 5, 32, 1)), 1)
        }]),
        "learning_rate":
        hp.loguniform("learning_rate", np.log(0.0001), np.log(0.2)),
        "iterations":
        scope.int(hp.quniform("iterations", 500, 5000, 1)),
        "reg_param":
        hp.loguniform("reg_param", np.log(0.001), np.log(1))
    }

    bayes_trials = Trials()
    best = fmin(objective,
                space,
                algo=tpe.suggest,
                max_evals=250,
                trials=bayes_trials)

    if plot_results:
        dataplot.plot_results_bayes(bayes_trials)
Beispiel #3
0
    def tune_hyperparameters(self, X, y):
        space = {
            'num_iterations':
            scope.int(hp.quniform('num_iterations', 100, 2000, 1)),
            'n_estimators':
            scope.int(hp.quniform('n_estimators', 100, 1000, 1)),
            'learning_rate':
            hp.choice('learning_rate', [0.005, 0.01, 0.05]),
            'num_leaves':
            hp.choice('num_leaves', [15, 31, 60, 90]),
            'max_depth':
            hp.choice('max_depth', range(2, 20, 2)),
            'bagging_fraction':
            hp.quniform('bagging_fraction', 0.5, 0.9, 1),
            'min_split_gain':
            hp.quniform('min_split_gain', 0.4, 0.9, 1),
            # 'min_data_in_leaf': hp.quniform('min_data_in_leaf', range(4, 80, 10)),
        }

        ts = Trials()
        best = fmin(lambda params: self.objective(params, X, y),
                    space,
                    algo=partial(tpe.suggest, n_startup_jobs=15),
                    max_evals=self.TUNIING_PARAMS["max_evals"],
                    verbose=0,
                    trials=ts)
        self.best_param = best
        self.is_tuned = True
Beispiel #4
0
def _classification_incremental_sgd_classifier() -> dict:
    return {
        'loss':
        hp.choice('loss', [
            'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron',
            'squared_loss', 'huber', 'epsilon_insensitive',
            'squared_epsilon_insensitive'
        ]),
        'penalty':
        hp.choice('penalty', [None, 'l1', 'l2', 'elasticnet']),
        'alpha':
        hp.uniform('alpha', 0.0001, 0.5),
        'l1_ratio':
        hp.uniform('l1_ratio', 0.15, 1.0),
        'fit_intercept':
        hp.choice('fit_intercept', [True, False]),
        'tol':
        hp.uniform('tol', 1e-3, 0.5),
        'epsilon':
        hp.uniform('epsilon', 1e-3, 0.5),
        'learning_rate':
        hp.choice('learning_rate',
                  ['constant', 'optimal', 'invscaling', 'adaptive']),
        'eta0':
        scope.int(hp.quniform('eta0', 4, 30, 1)),
        'power_t':
        hp.uniform('power_t', 0.3, 0.7),
        # 'early_stopping': hp.choice('early_stopping', [True, False]), #needs to be false with partial_fit
        'n_iter_no_change':
        scope.int(hp.quniform('n_iter_no_change', 5, 30, 5)),
        'validation_fraction':
        0.1,
        'average':
        hp.choice('average', [True, False])
    }
Beispiel #5
0
def get_parameters(train_data,
                   kFold,
                   iterations,
                   save=False,
                   filepath='./result/loss_time_tpe.csv'):
    def objective(parameters):
        if save:
            loss, timepoint = object_function.cv_method(
                parameters, train_data, kFold, start)
            timepoint_dic.append(timepoint)
            loss_dic.append(loss)

        else:
            loss = object_function.cv_method(parameters, train_data, kFold)

        return loss

    configspace = {
        'boosting_type': hp.choice('boosting_type', ['gbdt']),
        'objective': hp.choice('objective', ['regression_l2']),
        # hp.quniform('max_depth', -10, 10, 1),
        'num_leaves': scope.int(hp.quniform('num_leaves', 10, 35, 1)),
        'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 1, 12,
                                                  1)),
        'max_bin': scope.int(hp.quniform('max_bin', 20, 255, 10)),
        'lambda_l2': scope.int(hp.quniform('lambda_l2', 0, 70, 5)),
        'feature_fraction': hp.uniform('feature_fraction', 0.01, 1),
        'min_gain_to_split': hp.uniform('min_gain_to_split', 0.0, 1),
        'feature_fraction': hp.uniform('feature_fraction', 0.005, 0.5),
        'verbose': hp.choice('verbose', [-1])
    }

    if save:
        trials = Trials()
        start = time.time()
        timepoint_dic = []
        loss_dic = []
        best = fmin(objective,
                    configspace,
                    algo=tpe.suggest,
                    max_evals=iterations,
                    trials=trials)

        best_parameters = space_eval(configspace, best)
        best_loss = trials.best_trial['result']['loss']

        save_to_csv.save(filepath, timepoint_dic, loss_dic)

    else:
        trials = Trials()
        best = fmin(objective,
                    configspace,
                    algo=tpe.suggest,
                    max_evals=iterations,
                    trials=trials)

        best_parameters = space_eval(configspace, best)
        best_loss = trials.best_trial['result']['loss']

    return best_parameters, best_loss
Beispiel #6
0
 def __init__(self):
     self.search_space = {
         'learning_rate':
         hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
         'L1_flag':
         hp.choice('L1_flag', [True, False]),
         'hidden_size':
         scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),
                                  1)),
         'batch_size':
         scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),
                                  1)),
         'lmbda':
         hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
         'optimizer':
         hp.choice('optimizer', ["adam", "sgd", 'rms']),
         'margin':
         hp.uniform('margin', 0.5, 8.0),
         'distance_measure':
         hp.choice('distance_measure',
                   ["kl_divergence", "expected_likelihood"]),
         'cmax':
         hp.loguniform('cmax', np.log(0.05), np.log(0.2)),
         'cmin':
         hp.loguniform('cmin', np.log(1), np.log(5)),
         'epochs':
         hp.choice('epochs', [10])  # always choose 10 training epochs.
     }
Beispiel #7
0
def flat_get_hyperspace(combination):
    # reate hyperspace for optimisation.
    param_hyperopt = {}

    if combination == 'DT':
        hyper = {
            'DT_criterion': hp.choice('DT_criterion', ['gini', 'entropy']),
            'DT_max_depth': scope.int(hp.quniform('DT_max_depth', 5, 15, 1))
        }
    elif combination == 'RF':
        hyper = {
            'RF_max_depth': scope.int(hp.quniform('RF_max_depth', 5, 15, 1)),
            'RF_n_estimators':
            scope.int(hp.quniform('RF_n_estimators', 10, 50, 5))
        }
    elif combination == 'NN':
        hyper = {
            'NN_dropout': hp.uniform('NN_dropout', 0, 0.5),
            'NN_nodes': scope.int(hp.quniform('NN_nodes', 5, 50, 5)),
            'NN_layers': scope.int(hp.quniform('NN_layers', 1, 2, 1))
        }
    elif combination == 'LR':
        hyper = {'LR_penalty': hp.choice('LR_penalty', ['l1', 'l2'])}

    param_hyperopt = {**param_hyperopt, **hyper}

    return param_hyperopt
Beispiel #8
0
    def _config_tuning_space(tuning_space_raw):
        if tuning_space_raw is None:
            return None

        hyper_obj = {}
        if "learning_rate" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"learning_rate": hp.loguniform('learning_rate', np.log(tuning_space_raw['learning_rate']['min']), np.log(tuning_space_raw['learning_rate']['max']))}}
        if "hidden_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"hidden_size": scope.int(hp.qloguniform('hidden_size', np.log(tuning_space_raw['hidden_size']['min']), np.log(tuning_space_raw['hidden_size']['max']), 1))}}
        if "ent_hidden_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"ent_hidden_size": scope.int(hp.qloguniform("ent_hidden_size", np.log(tuning_space_raw['ent_hidden_size']['min']), np.log(tuning_space_raw['ent_hidden_size']['max']), 1))}}
        if "rel_hidden_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"rel_hidden_size": scope.int(hp.qloguniform("rel_hidden_size", np.log(tuning_space_raw['rel_hidden_size']['min']), np.log(tuning_space_raw['rel_hidden_size']['max']), 1))}}
        if "batch_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"batch_size": scope.int(hp.qloguniform("batch_size", np.log(tuning_space_raw['batch_size']['min']), np.log(tuning_space_raw['batch_size']['max']), 1))}}
        if "margin" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"margin": hp.uniform("margin", tuning_space_raw["margin"]["min"], tuning_space_raw["margin"]["max"])}}
        if "lmbda" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"lmbda": hp.loguniform('lmbda', np.log(tuning_space_raw["lmbda"]["min"]), np.log(tuning_space_raw["lmbda"]["max"]))}}
        if "distance_measure" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"distance_measure": hp.choice('distance_measure', tuning_space_raw["distance_measure"])}}
        if "cmax" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"cmax": hp.loguniform('cmax', np.log(tuning_space_raw["cmax"]["min"]), np.log(tuning_space_raw["cmax"]["max"]))}}
        if "cmin" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"cmin": hp.loguniform('cmin', np.log(tuning_space_raw["cmin"]["min"]), np.log(tuning_space_raw["cmin"]["max"]))}}
        if "optimizer" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"optimizer": hp.choice("optimizer", tuning_space_raw["optimizer"])}}
        if "bilinear" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"bilinear": hp.choice('bilinear', tuning_space_raw["bilinear"])}}
        if "epochs" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"epochs": hp.choice("epochs", tuning_space_raw["epochs"])}}

        return hyper_obj
Beispiel #9
0
    def get_default_parameter_space():
        """
        Return:
            dict of DistributionWrappers
        """

        return {
            'n_estimators':
            scope.int(hp.quniform('n_estimators', 5, 10, 5)),
            'learning_rate':
            hp.loguniform('learning_rate', -7, 0),
            'num_leaves':
            scope.int(hp.qloguniform('num_leaves', 1, 7, 1)),
            'feature_fraction':
            hp.uniform('feature_fraction', 0.5, 1),
            'bagging_fraction':
            hp.uniform('bagging_fraction', 0.5, 1),
            'min_data_in_leaf':
            scope.int(hp.qloguniform('min_data_in_leaf', 0, 6, 1)),
            'min_sum_hessian_in_leaf':
            hp.loguniform('min_sum_hessian_in_leaf', -16, 5),
            'lambda_l1':
            hp.loguniform('lambda_l1', -16, 2),
            'lambda_l2':
            hp.loguniform('lambda_l2', -16, 2),
        }
Beispiel #10
0
 def __init__(self):
     self.search_space = {
       'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
       'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
       'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
       'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
       'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
       'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
     }
Beispiel #11
0
    def __call__(self):
        print_exp(self.exp_name)
        if self.only_q:
            pv = ParamValues(lr=hp.loguniform("lr", np.log(1e-4),
                                              np.log(1e-4)),
                             q=(18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
                                30),
                             epochs=(250, ),
                             batch=(32, ))
        elif self.only_batch:
            pv = ParamValues(lr=hp.loguniform("lr", np.log(1e-4),
                                              np.log(1e-3)),
                             q=(23, ),
                             epochs=(250, ),
                             batch=(16, 32, 50, 100, 128, 200))
        elif self.only_epochs:
            pv = ParamValues(lr=hp.loguniform("lr", np.log(1e-4),
                                              np.log(1e-4)),
                             q=(23, ),
                             epochs=(100, 120, 150, 170, 200, 250, 300, 400,
                                     500),
                             batch=(32, ))
        elif self.only_lr:
            pv = ParamValues(lr=hp.loguniform("lr", np.log(1e-4),
                                              np.log(1e-3)),
                             q=(23, ),
                             epochs=(250, ),
                             batch=(32, ))
        else:
            pv = ParamValues(
                lr=hp.loguniform("lr", np.log(1e-4), np.log(1e-3)),
                q=scope.int(hp.qloguniform("q", np.log(10), np.log(100), 1)),
                #q=(18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,35),
                epochs=scope.int(
                    hp.qloguniform("epochs", np.log(10), np.log(500), 10)),
                #epochs=(100, 125, 150, 175, 200, 250, 300, 400, 500, 700),
                batch=(4, 8, 16, 32, 64, 128, 256))

        hyper_params = {
            "data": {},
            "model": {
                "lr": pv.lr,
                #"encoding_dim": hp.choice("encoding_dim", pv.q),  ##
                "encoding_dim": pv.q,
            },
            "fit": {
                #"epochs": hp.choice("epochs", pv.epochs),  #
                "epochs": pv.epochs,
                "batch_size": hp.choice("batch_size", pv.batch)
            }
        }

        run = RunFN(self.metric, hyper_params, pv, self.data_path, self.sep,
                    self.run_on_mongodb, self.start_mongodb, self.db_name,
                    self.exp_name, self.ip, self.port, self.nr_of_trials,
                    self.nr_of_workers)
        run()
Beispiel #12
0
 def __init__(self):
     self.search_space = {
       'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
       'L1_flag': hp.choice('L1_flag', [True, False]),
       'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(128),1)),
       'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
       'margin': hp.uniform('margin', 0.0, 2.0),
       'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
       'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
     }
Beispiel #13
0
def main():
    base_name = 'stg2/01_lgb'
    seed = 201_000

    args = parse_args()
    cmd = CmdFactory(_CV)

    if not os.path.exists('artifacts/stg1'):
        os.makedirs('artifacts/stg1')

    X, y = load_train_data()
    X_test, _ = load_test_data()

    if args.type == 'hpt':
        space = {
            'learning_rate': hp.uniform('learning_rate', 0.01, 0.5),
            'max_depth': scope.int(hp.quniform("max_depth", 1, 7, 1)),
            'num_leaves': scope.int(hp.quniform('num_leaves', 2, 20, 1)),
            'feature_fraction': hp.uniform('feature_fraction', 0.4, 0.8),
            'bagging_fraction': hp.uniform('bagging_fraction', 0.4, 1.0),
            'max_bin': scope.int(hp.quniform("max_bin", 400, 700, 1)),
        }
        cmd.hpt(
            train_data=(X, y),
            space=space,
            seed=seed,
            trials_file='artifacts/{}_trials.pickle'.format(base_name),
            max_iter=args.max_iter,
            steps=args.trial_steps,
            n_class=10,
        )
    elif args.type == 'pred':
        params = {
            'learning_rate': 0.029191104512834937,
            'max_depth': 4,
            'num_leaves': 9,
            'feature_fraction': 0.7115094613220588,
            'bagging_fraction': 0.4897073688073762,
            'max_bin': 639,
        }
        cmd.pred(
            train_data=(X, y),
            test_data=X_test,
            params=params,
            seed=seed,
            out_tr='artifacts/{}_train.npy'.format(base_name),
            out_test='artifacts/{}_test.npy'.format(base_name),
            n_bags=args.n_bags,
            n_class=10,
        )
    else:
        raise ValueError('type must be hpt or pred.')
Beispiel #14
0
def _classification_incremental_perceptron() -> dict:
    return {
        'penalty': hp.choice('penalty', [None, 'l1', 'l2', 'elasticnet']),
        'alpha': hp.uniform('alpha', 0.0001, 0.5),
        'fit_intercept': hp.choice('fit_intercept', [True, False]),
        'tol': hp.uniform('tol', 1e-3, 0.5),
        'shuffle': hp.choice('shuffle', [True, False]),
        'eta0': scope.int(hp.quniform('eta0', 4, 30, 1)),
        # 'early_stopping': hp.choice('early_stopping', [True, False]), #needs to be false with partial_fit
        'validation_fraction': 0.1,
        'n_iter_no_change': scope.int(hp.quniform('n_iter_no_change', 5, 30,
                                                  5))
    }
Beispiel #15
0
 def run(self):
     """
     Hyper-parameter optimization with hyperopt.
     """
     if self.pp['net']:
         space = {
             # Qlearnnet
             'net_lr':
             hp.loguniform('net_lr', np.log(5e-7), np.log(1e-4)),
             'net_lr_decay':
             hp.loguniform('net_lr_decay', np.log(0.90), np.log(0.99)),
             # Singh
             # 'net_lr': hp.loguniform('net_lr', np.log(1e-7), np.log(5e-4)),
             'beta':
             hp.uniform('beta', 16, 30),
             # Double
             'net_copy_iter':
             hp.loguniform('net_copy_iter', np.log(5), np.log(150)),
             'net_creep_tau':
             hp.loguniform('net_creep_tau', np.log(0.01), np.log(0.7)),
             # Exp. replay
             'batch_size':
             scope.int(hp.uniform('batch_size', 8, 16)),
             'buffer_size':
             scope.int(hp.uniform('buffer_size', 2000, 10000)),
             # N-step
             'n_step':
             scope.int(hp.uniform('n_step', 3, 40)),
             # Policy
             'vf_coeff':
             hp.uniform('vf_coeff', 0.005, 0.5),
             'entropy_coeff':
             hp.uniform('entropy_coeff', 1.0, 100.0)
         }
     else:
         space = {
             'beta': hp.uniform('beta', 7, 23),
             'alpha': hp.uniform('alpha', 0.0001, 0.4),
             'alpha_decay': hp.uniform('alpha_decay', 0.9999, 0.9999999),
             'epsilon': hp.loguniform('epsilon', np.log(0.2), np.log(0.8)),
             'epsilon_decay': hp.uniform('epsilon_decay', 0.9995,
                                         0.9999999),
             'gamma': hp.uniform('gamma', 0.7, 0.90),
             'lambda': hp.uniform('lambda', 0.0, 1.0)
         }
     # Only optimize parameters specified in args
     space = {param: space[param] for param in self.pp['hopt']}
     if self.pp['hopt_fname'].startswith('mongo:'):
         self._hopt_mongo(space)
     else:
         self._hopt_pickle(space)
Beispiel #16
0
 def get_default_parameter_space():
     """
     Return:
         dict of DistributionWrappers
     """
     return {
         'iterations': scope.int(hp.quniform('iterations', 5, 10, 5)),
         'depth': scope.int(hp.quniform('depth', 1, 11, 1)),
         'learning_rate': hp.loguniform('learning_rate', -5, -1),
         'rsm': hp.uniform('rsm', 0, 1),
         'leaf_estimation_method': hp.choice('leaf_estimation_method', ['Newton', 'Gradient']),
         'l2_leaf_reg': scope.int(hp.quniform('l2_leaf_reg', 1, 10, 1)),
         'bagging_temperature': hp.uniform('bagging_temperature', 0, 2),
     }
Beispiel #17
0
    def _config_tuning_space(tuning_space_raw):
        if tuning_space_raw is None:
            return None

        hyper_obj = {}
        if "learning_rate" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"learning_rate": hp.loguniform('learning_rate', np.log(tuning_space_raw['learning_rate']['min']), np.log(tuning_space_raw['learning_rate']['max']))}}
        if "hidden_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"hidden_size": scope.int(hp.qloguniform('hidden_size', np.log(tuning_space_raw['hidden_size']['min']), np.log(tuning_space_raw['hidden_size']['max']), 1))}}
        if "ent_hidden_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"ent_hidden_size": scope.int(hp.qloguniform("ent_hidden_size", np.log(tuning_space_raw['ent_hidden_size']['min']), np.log(tuning_space_raw['ent_hidden_size']['max']), 1))}}
        if "rel_hidden_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"rel_hidden_size": scope.int(hp.qloguniform("rel_hidden_size", np.log(tuning_space_raw['rel_hidden_size']['min']), np.log(tuning_space_raw['rel_hidden_size']['max']), 1))}}
        if "batch_size" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"batch_size": scope.int(hp.qloguniform("batch_size", np.log(tuning_space_raw['batch_size']['min']), np.log(tuning_space_raw['batch_size']['max']), 1))}}
        if "margin" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"margin": hp.uniform("margin", tuning_space_raw["margin"]["min"], tuning_space_raw["margin"]["max"])}}
        if "lmbda" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"lmbda": hp.loguniform('lmbda', np.log(tuning_space_raw["lmbda"]["min"]), np.log(tuning_space_raw["lmbda"]["max"]))}}
        if "distance_measure" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"distance_measure": hp.choice('distance_measure', tuning_space_raw["distance_measure"])}}
        if "cmax" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"cmax": hp.loguniform('cmax', np.log(tuning_space_raw["cmax"]["min"]), np.log(tuning_space_raw["cmax"]["max"]))}}
        if "cmin" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"cmin": hp.loguniform('cmin', np.log(tuning_space_raw["cmin"]["min"]), np.log(tuning_space_raw["cmin"]["max"]))}}
        if "optimizer" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"optimizer": hp.choice("optimizer", tuning_space_raw["optimizer"])}}
        if "bilinear" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"bilinear": hp.choice('bilinear', tuning_space_raw["bilinear"])}}
        if "epochs" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"epochs": hp.choice("epochs", tuning_space_raw["epochs"])}}
        if "feature_map_dropout" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"feature_map_dropout": hp.choice('feature_map_dropout', tuning_space_raw["feature_map_dropout"])}}
        if "input_dropout" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"input_dropout": hp.choice('input_dropout', tuning_space_raw["input_dropout"])}}
        if "hidden_dropout" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"hidden_dropout": hp.choice('hidden_dropout', tuning_space_raw["hidden_dropout"])}}
        if "use_bias" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"use_bias": hp.choice('use_bias', tuning_space_raw["use_bias"])}}
        if "label_smoothing" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"label_smoothing": hp.choice('label_smoothing', tuning_space_raw["label_smoothing"])}}
        if "lr_decay" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"lr_decay": hp.choice('lr_decay', tuning_space_raw["lr_decay"])}}
        if "l1_flag" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"l1_flag": hp.choice('l1_flag', tuning_space_raw["l1_flag"])}}
        if "sampling" in tuning_space_raw:
            hyper_obj = {**hyper_obj, **{"sampling": hp.choice('sampling', tuning_space_raw["sampling"])}}

        return hyper_obj
def create_hyperopt_space(string_space):
    """Convert the parameters passed from the command line into the corresponding hyperopt space definition.

    WARNING: Uses eval() to convert strings to names in the namespace.

    For example,

        {'C': 'float:hp.lognormal:[0, 1]', 'kernel': 'rbf', 'gamma': 'float:hp.lognormal:[0, 1]'}

    gets converted to

        {'C': float(hp.lognormal('C', 0, 1)), 'kernel': 'rbf', 'gamma': float(hp.lognormal('gamma', 0, 1))}

    Args:
        string_space (dict): dictionary of space definitions taken from the raw JSON.
    Returns:
        (dict): space dictionary ready to be used by hyperopt.
    """
    space = {}
    for k, v in string_space.items():
        if ':' in v:  # contains specifications that need to be converted to hyperopt ranges
            p_type, p_func, p_params = v.split(':')
            p_params = ast.literal_eval(p_params)
            if p_type == 'int':
                # convert to integer after evaluation
                space[k] = scope.int(eval(p_func)(k, *p_params))  # pylint: disable=eval-used
            elif p_type == 'float':
                # no need to convert because hyperopt natively returns floats
                space[k] = eval(p_func)(k, *p_params)  # pylint: disable=eval-used
            else:
                raise TypeError("parameter types must be int or float")
        else:  # is a constant string
            space[k] = v
    return space
Beispiel #19
0
class Profile:

    LGBM_ORIGINAL_NAME = 'LGBM_ORIGINAL'

    LGBM_ORIGINAL = {
        'fixed_hyperparameters': {
            'learning_rate': 0.01,
            'num_leaves': 60,
            'feature_fraction': 0.6,
            'bagging_fraction': 0.6,
            'bagging_freq': 2,
            'num_iterations': 600,
            'min_data_in_leaf': 20,
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'boost_from_average': False
        },
        'search_space': {
            'learning_rate':
            hp.loguniform('learning_rate', np.log(0.001), np.log(0.01)),
            'num_leaves':
            scope.int(hp.quniform('num_leaves', 10, 80, 5)),
            'feature_fraction':
            hp.loguniform('feature_fraction', np.log(0.6), np.log(0.9)),
            'bagging_fraction':
            hp.loguniform('bagging_fraction', np.log(0.6), np.log(0.9)),
            'bagging_freq':
            scope.int(hp.quniform('bagging_freq', 2, 10, 1)),
            'num_iterations':
            scope.int(hp.quniform('num_iterations', 100, 800, 50)),
            'min_data_in_leaf':
            scope.int(hp.quniform('min_data_in_leaf', 20, 50, 10)),
            'boosting_type':
            'gbdt',
            'objective':
            'binary',
            'boost_from_average':
            False
        }
    }

    NAME_PROFILE_MAP = {LGBM_ORIGINAL_NAME: LGBM_ORIGINAL}

    @staticmethod
    def parse_profile(profile_name):
        profile = Profile.NAME_PROFILE_MAP[profile_name]
        return profile['fixed_hyperparameters'], profile['search_space']
Beispiel #20
0
def _regression_random_forest() -> dict:
    return {
        'n_estimators': hp.choice('n_estimators',
                                  np.arange(150, 1000, dtype=int)),
        'max_features': hp.choice('max_features',
                                  ['sqrt', 'log2', 'auto', None]),
        'max_depth': scope.int(hp.quniform('max_depth', 4, 30, 1)),
    }
 def get_default_parameter_space():
     """
     Return:
         dict of DistributionWrappers
     """
     return {
         'n_estimators': scope.int(hp.quniform('n_estimators', 5, 10, 5)),
         'eta': hp.loguniform('eta', -7, 0),
         'max_depth': scope.int(hp.quniform('max_depth', 2, 10, 1)),
         'subsample': hp.uniform('subsample', 0.5, 1),
         'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
         'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
         'min_child_weight': hp.loguniform('min_child_weight', -16, 5),
         'gamma': hp.loguniform('gamma', -16, 2),
         'lambdax': hp.loguniform('lambdax', -16, 2),
         'alpha': hp.loguniform('alpha', -16, 2)
     }
def get_search_space():
    return {
        'batch_size': scope.int(hp.uniform('batch_size', 30, 40)),
        'epochs': hp.choice('epochs', [2, 5]),
        'activation': hp.choice('activation', ['relu', 'elu']),
        'optimizer': hp.choice('optimizer', ['adam', 'sgd']),
        'dropout': hp.uniform('dropout', 0, 1)
    }
    def BayesianOptOnCV(self):
        """
        We tried Bayesian Optimization with k-fold cross validation
        Here it is implemented as a mehtod
        """
        
        skf = StratifiedKFold(n_splits=5)
        train = self.train
        y = self.y
        
        
        param_hyperopt= {
                        'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(1)),
                        'max_depth': scope.int(hp.quniform('max_depth', 3, 5, 1)),
                        'n_estimators': 80,
                        'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
                        'reg_lambda': hp.quniform('reg_lambda', 700, 900, 10),
                        'gamma' : hp.quniform('gamma', 0, 20, 1),
                        'tree_method' : "gpu_hist"
                        }
        
        cross_val_ndgc = []
        def objective_function(params):
            model = xgb.XGBClassifier(**params)
            for train_index, validation_index in skf.split(X, y):

                X_train = train.iloc[train_index]
                y_train = y.iloc[train_index]
                X_test = X.iloc[validation_index]
                y_test = y.iloc[validation_index].to_numpy()

                self.model.fit(X_train, y_train) 
                validation_probs = self.model.predict_proba(X_test)
                validation_preds = create_preds(validation_probs)


                cross_val_ndgc.append(ndgc(validation_preds, y_test))  

            score = np.mean(cross_val_ndgc)
            return {'loss': -score, 'status': STATUS_OK}
    
        
        def bayesian_optimization():
            trials = Trials()
            best_param = fmin(objective_function, 
                              param_hyperopt, 
                              algo=tpe.suggest, 
                              max_evals=20, 
                              trials=trials,
                              rstate= np.random.RandomState(1))

            loss = [x['result']['loss'] for x in trials.trials]
            best_param_values = [x for x in best_param.values()]
            return best_param
        
        
        best_param = bayesian_optimization()
Beispiel #24
0
def _classification_decision_tree() -> dict:
    return {
        'max_depth':
        scope.int(hp.quniform('max_depth', 4, 30, 1)),
        'min_samples_split':
        hp.choice('min_samples_split', np.arange(2, 10, dtype=int)),
        'min_samples_leaf':
        hp.choice('min_samples_leaf', np.arange(1, 10, dtype=int)),
    }
Beispiel #25
0
def get_shift_space(indi, prefix):
    space = {
        'start': 1,
        'stop':
        scope.int(hp.quniform(indi + prefix + '__shift__stop', 2, 15, 1)),
        'step':
        1  # scope.int(hp.quniform(indi+prefix+'__shift__step', 1, 5, 1))
    }
    return hp.choice(indi + prefix + '__shift__params', [None, space])
Beispiel #26
0
def hyperopt_bayesian_optimization(X, y):

    param_space = {
        'max_depth': scope.int(hp.quniform('max_depth', 3, 15, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 100, 600, 1)),
        'criterion': hp.choice('criterion', ['gini', 'entropy']),
        'max_features': hp.uniform('max_features', 0.01, 1)
    }

    optimization_function = partial(optimize, x=X, y=y)

    trials = Trials()

    result = fmin(fn=optimization_function,
                  space=param_space,
                  algo=tpe.suggest,
                  max_evals=15,
                  trials=trials)

    print(result)
Beispiel #27
0
def test_hyperopt2skopt_space():
    hyperopt_space = {
        'int_uniform': scope.int(hp.uniform('l_int_uniform', 1, 7)),
        'randint': hp.randint('l_randint', 7),
        'uniform': hp.uniform('l_uniform', -3, 3),
        'uniform_named': hp.uniform('l_uniform_named', low=1, high=10),
        'uniform_part_named': hp.uniform('l_uniform_part_named', 1, high=10),
        'unsupported': hp.loguniform('l_unsupported', -1, 5),
        'choice': hp.choice('choice', ['a', 'b', 4]),
        'random_param': 'just_one_val',
    }

    space, ind2names = hyperopt2skopt_space(hyperopt_space, sample_size=100)
    assert len(space) == len(ind2names)
    named_space = {ind2names[i]: space[i] for i in range(len(space))}

    int_uniform = named_space['int_uniform']
    assert isinstance(int_uniform, Integer)
    assert int_uniform.low == 1
    assert int_uniform.high == 7

    randint = named_space['randint']
    assert isinstance(randint, Integer)
    assert randint.low == 0
    assert randint.high == 7

    uniform = named_space['uniform']
    assert isinstance(uniform, Real)
    assert uniform.low == -3
    assert uniform.high == 3

    uniform_named = named_space['uniform_named']
    assert isinstance(uniform_named, Real)
    assert uniform_named.low == 1
    assert uniform_named.high == 10

    uniform_part_named = named_space['uniform_part_named']
    assert isinstance(uniform_part_named, Real)
    assert uniform_part_named.low == 1
    assert uniform_part_named.high == 10

    unsupported = named_space['unsupported']
    assert isinstance(unsupported, Categorical)
    assert len(unsupported.categories) == 100
    assert all([np.exp(-1) <= x <= np.exp(5) for x in unsupported.categories])

    choice = named_space['choice']
    assert isinstance(choice, Categorical)
    assert set(choice.categories) == {'a', 'b', 4}

    random_param = named_space['random_param']
    assert isinstance(random_param, Categorical)
    assert set(random_param.categories) == {'just_one_val'}
    def get_default_parameter_space():
        """
        Return:
            dict of DistributionWrappers
        """

        return {
            'n_estimators':
            scope.int(hp.quniform('n_estimators', 100, 2000, 200)),
            'max_samples': hp.uniform('max_samples', 0.1, 1),
            'contamination': hp.loguniform('contamination', -5, 0),
        }
Beispiel #29
0
def stage_2_optimization_guidedlda(
        data_vect,
        seed_topics,
        dictionary,
        feature_names,
        corpus,
        param_dict={},  # add **params_optimal_stage_1 in deploy
        metric_to_optimize='coherence_consistent',
        stage_2_params={
            'n_topics_min': 14,
            'n_topics_max': 16,
            'n_iter_param': 2000,
            'refresh_param': 200,
            'seed_confidence': 0,
            'max_eval_param': 1000,
            'timeout_param': 86400,
        },
        random_seed=420):
    '''
    This function 
    '''

    # Define some values of Stage 2 optimization

    # Param space for Stage 2.
    param_space_stage2 = {
        'n_topics':
        scope.int(
            hp.quniform('n_topics', stage_2_params['n_topics_min'],
                        stage_2_params['n_topics_max'], 1)),
    }

    # Stage 2 optimization with fixed optimal params from Stage 1.
    result_stage_2, trials_stage_2 = optimization_hyper_params_guidedlda(
        data_vect=data_vect,
        seed_topics=seed_topics,
        dictionary=dictionary,
        feature_names=feature_names,
        corpus=corpus,
        param_space=param_space_stage2,
        param_dict=param_dict,
        n_iter_param=stage_2_params['n_iter_param'],
        refresh_param=stage_2_params['refresh_param'],
        seed_confidence=stage_2_params['seed_confidence'],
        random_seed=random_seed,
        max_eval_param=stage_2_params['max_eval_param'],
        timeout_param=stage_2_params['timeout_param'],
        metric_to_optimize=metric_to_optimize)

    # Define optimal params values as joined dictionary from both Stages
    optimal_params_result = {**param_dict, **result_stage_2}

    return optimal_params_result, trials_stage_2
Beispiel #30
0
def get_hyperspace(combination):
    # create hyperspace for optimisation (currently supports DT, RF, NN and LR).
    param_hyperopt = {}

    for node, clf in enumerate(combination):

        if clf == 'DT':
            hyper = {
                'DT_criterion_' + str(node):
                hp.choice('DT_criterion_' + str(node), ['gini', 'entropy']),
                'DT_max_depth_' + str(node):
                scope.int(hp.quniform('DT_max_depth_' + str(node), 5, 15, 1))
            }
        elif clf == 'RF':
            hyper = {
                'RF_max_depth_' + str(node):
                scope.int(hp.quniform('RF_max_depth_' + str(node), 5, 15, 1)),
                'RF_n_estimators_' + str(node):
                scope.int(
                    hp.quniform('RF_n_estimators_' + str(node), 10, 50, 5))
            }
        elif clf == 'NN':
            hyper = {
                'NN_dropout_' + str(node):
                hp.uniform('NN_dropout_' + str(node), 0, 0.5),
                'NN_nodes_' + str(node):
                scope.int(hp.quniform('NN_nodes_' + str(node), 5, 50, 5)),
                'NN_layers_' + str(node):
                scope.int(hp.quniform('NN_layers_' + str(node), 1, 2, 1))
            }
        elif clf == 'LR':
            hyper = {
                'LR_penalty_' + str(node):
                hp.choice('LR_penalty_' + str(node), ['l1', 'l2'])
            }

        param_hyperopt = {**param_hyperopt, **hyper}

    return param_hyperopt