Esempio n. 1
0
    def run(self):

        # split text
        Xs = self.split_lex_mturk()

        # optimize on the val data and test on the testing data
        self.evaluator.eval_data, self.evaluator.eval_targets, self.evaluator.eval_candidates, self.evaluator.eval_pos_tags = [x[0] for x in Xs]
        # lines = self.config['fdata'].read_text()
        # sens = [line.split() for line in lines.split('\n')[:-1]]
        # self.eval_data, self.eval_targets, self.eval_candidates, self.eval_pos_tags = sens,self.targets,self.candidates,self.pos_tags
        x0 = [10,5,0.03]
        space = [
            Integer(2, 50),
            Integer(2, 10),
            Real(10 ** -6, 10 ** -1,'log-uniform')
        ]
        res = self.minimize(space, x0=x0, n_calls=40, verbose=True)

        # check the res and the evaluator consistency
        # print(res.x,res.fun)
        # print(self.evaluator.best_parameters,self.evaluator.best_score)

        # replace the eval data with the test data
        self.evaluator.eval_data, self.evaluator.eval_targets, self.evaluator.eval_candidates, self.evaluator.eval_pos_tags = [x[1] for x in Xs]
        # print(f'data acc: {-res.fun}')
        best_emb = self.evaluator.best_emb
        best_parameters = self.evaluator.best_parameters
        test_acc = self.evaluator.evaluate_emb(best_emb,best_parameters)
        with open('test_acc.pickle', 'wb') as f_acc:
            pickle.dump(test_acc,f_acc,pickle.HIGHEST_PROTOCOL)
        print(f'test acc: {test_acc}')
        dump(res,'res-hyp.pickle',store_objective=False)
Esempio n. 2
0
def run(results_dir, n_calls=200, acq_optimizer="lbfgs"):
    bounds = np.tile((-5., 5.), (7, 1))
    optimizers = [("gp_minimize", gp_minimize),
                  ("forest_minimize", forest_minimize),
                  ("gbrt_minimize", gbrt_minimize),
                  ("dummy_minimize", dummy_minimize)]

    for name, optimizer in optimizers:
        print(name)
        #        model_dir = os.path.join(results_dir, name)
        if not os.path.exists(results_dir):
            os.makedirs(results_dir, exist_ok=True)

        if name == "gp_minimize":
            res = optimizer(stybtang,
                            bounds,
                            random_state=0,
                            n_calls=n_calls,
                            noise=1e-10,
                            verbose=True,
                            acq_optimizer=acq_optimizer,
                            n_jobs=-1)
        elif name == "dummy_minimize":
            res = optimizer(stybtang, bounds, random_state=0, n_calls=n_calls)
        else:
            res = optimizer(stybtang, bounds, random_state=0, n_calls=n_calls)

        dump(res, name)
 def optimize_all_xgb_params(self):
     res = forest_minimize(
         self.cv_test_comb_xgb_params,
         [
             (0.015, 0.035),
             (3, 8),
             (0.6, 0.9),
             (0.5, 1),
             (0, 0.5),
             (0.015, 0.035),
             (3, 8),
             (0.6, 0.9),
             (0.5, 1),
             (0, 0.5),
             (0.015, 0.035),
             (3, 8),
             (0.6, 0.9),
             (0.5, 1),
             (0, 0.5),
             #(0.015, 0.035), (3, 8), (0.6, 0.9), (0.5, 1), (0, 0.5),
             #(0.015, 0.035), (3, 8), (0.6, 0.9), (0.5, 1), (0, 0.5)
         ],
         x0=[
             0.0245, 5, 0.8, 1, 0, 0.03, 5, 0.8, 0.8, 0.4, 0.03, 5, 0.8, 1,
             0, 0.037, 5, 0.8, 0.8, 0.4, 0.033, 6, 0.8, 1, 0
         ],
         y0=0.05323593162735551,
         n_calls=75)
     dump(res, "xgb_all_opt.gz")
     print(res.x)
     print(res.fun)
     print("")
Esempio n. 4
0
def main():
    if rank == 0:
        hyperparameters = {
            'kernelSize1': np.arange(2, 10),
            'stride1': np.arange(1, 5),
            'dropout1': np.linspace(0.0, 0.8),
            'kernelSize2': np.arange(2, 10),
            'stride2': np.arange(1, 5),
            'dropout2': np.linspace(0.0, 0.8),
            'learningRate': np.linspace(0.001, 0.1)
        }

        hyperspace = HyperSpace(hyperparameters)
        all_intervals = hyperspace.fold_space()
        hyperspaces = hyperspace.hyper_permute(all_intervals)
        subspace_keys, subspace_boundaries = hyperspace.format_hyperspace(
            hyperspaces)
    else:
        subspace_keys, subspace_boundaries = None, None

    space = comm.scatter(subspace_boundaries, root=0)

    deadline = DeadlineStopper(18000)
    # Gaussian process minimization (see scikit-optimize skopt module for other optimizers)
    res_gp = gp_minimize(objective,
                         space,
                         n_calls=50,
                         callback=deadline,
                         random_state=0,
                         verbose=True)
    # Each worker will write their results to disk
    dump(res_gp, 'hyper_results/gp_subspace_' + str(rank))
Esempio n. 5
0
def test_hyper_search_example_trainer():
    list_data = generate_toy_data(100000)
    data_split_hash = ExampleTrainer(list_data, seed=42).data_split_hash

    @use_named_args(space)
    def run_example_trainer(**kwargs):
        cfg = AttrDict(kwargs)
        cfg.batch_size = int(round(cfg.batch_size))
        cfg.act_name = 'leaky_relu'
        cfg.act_params = (cfg.leaky_param,)
        del cfg['leaky_param']
        cfg.max_epochs = 1024

        trainer = ExampleTrainer(list_data, seed=42, nbr_readouts=0, **cfg)
        assert data_split_hash == trainer.data_split_hash
        makedirs('data/hyper', exist_ok=True)
        path = cfg.get_hashed_path('data/hyper')
        try:
            trainer.restore_best_state(path)
        except EnvironmentError:
            trainer.train(path)
        return trainer.best_validation_score

    res_gp = skopt.gp_minimize(run_example_trainer, space, n_calls=1,
                               random_state=0, n_random_starts=1)
    print([d.name for d in res_gp.space.dimensions])
    ts = get_time_stamp()
    plt.close()
    plot_objective(res_gp)
    plt.savefig('data/hyper/%s-objective.png' % ts)
    plt.close()
    plot_evaluations(res_gp)
    plt.savefig('data/hyper/%s-evaluations.png' % ts)
    skopt.dump(res_gp, 'data/hyper/%s-result.gz' % ts, store_objective=False)
Esempio n. 6
0
def write_bo_policy(cost, cross_val=True):
    empirical_states = read_state_actions(cost)['states']
    train_envs = make_envs(cost, initial_states=empirical_states, n=N_TRAIN)
    pol, result = bo_policy(train_envs, max_cost=len(train_envs[0].tree),
                            normalize_voi=True, n_random_starts=10,
                            n_calls=N_CALLS, n_jobs=N_JOBS, return_result=True,)
    result.specs['args'].pop('func')  # can't pickle
    result.specs['info'] = {
        'cost': cost,
        'n_train': N_TRAIN,
        'n_calls': N_CALLS,
        'theta': pol.theta
    }

    if cross_val:
        cross_envs = make_envs(cost, initial_states=empirical_states, n=N_CROSS_VAL)
        n_consider = 5
        idx = result.func_vals.argsort()[:n_consider]
        top_x = np.array(result.x_iters)[idx]
        top_theta = [x2theta(x, True) for x in top_x]
        theta = max(top_theta, key=
                    lambda th: get_util(LiederPolicy(th), cross_envs, parallel=joblib.Parallel(N_JOBS)))
        result.specs['info']['theta'] = theta
    skopt.dump(result, filename(cost, note='human_states'))
    return result
def run_parallel_optimizer(optimizer, save_path='optimizer.p'):
    if n_initial_random_samples > 0:
        rand_results = Pool(n_jobs).map(get_random_samples, range(n_jobs))
        for res in rand_results:
            for x, y in zip(res.x_iters, res.func_vals):
                optimizer.tell(x, y)

    prior_xs = pd.read_csv('prior-xs.csv')[[dim.name for dim in space]]
    prior_xs.layer_sizes = prior_xs.layer_sizes.apply(
        lambda l: tuple(eval(l)))  # TODO dangerous
    prior_xs.q_clip = prior_xs.q_clip.apply(lambda s: int(s.split()[1][:-1]))
    prior_xs = prior_xs.values

    prior_ys = pd.read_csv('prior-ys.csv').values
    n_out_of_bounds = 0
    for x, y, in zip(prior_xs, prior_ys):
        try:
            optimizer.tell(list(x), float(y[0]))
        except ValueError:
            n_out_of_bounds += 1
    print('out of bounds:', n_out_of_bounds, '/', len(prior_ys))

    while True:  # will run until interrupted
        try:
            xs = optimizer.ask(n_points=n_jobs)  # get suggestion
            ys = Pool(n_jobs).map(score_config, xs)  # report goodness
            opt_result = optimizer.tell(xs, ys)
            dump(opt_result, save_path, compress=9)
        except KeyboardInterrupt:
            print('Stopping hyper-parameter optimization process.')
            break
        except Exception as e:
            print(e, str(e))
Esempio n. 8
0
def train_models(models, params, Xtrain, Ytrain, kfold, filename):
    """
  train_models performs kfold bayesian hyperparameter tuning for different 
  models, and saves the output for model persistence.

  :param models: A single sklearn model object or list of sklearn model objects.
  :param params: A dictionary or list of dictionaries containing hyperparameters 
                to tune.
  :param Xtrain: A numpy array or pandas dataframe containing the training data.
  :param Ytrain: A numpy array or pandas dataframe containing the output data.
  :param kfold:  An integer or sklearn object determining the kfold operation 
                performed.
  :param filename: A string or list of paths to save the models (pickle).

  """
    no_of_cpus = multiprocessing.cpu_count()

    with parallel_backend('threading', n_jobs=no_of_cpus):
        for i in range(len(models)):
            opt = BayesSearchCV(estimator=models[i],
                                search_spaces=params[i],
                                n_iter=30,
                                cv=kfold,
                                n_jobs=-1,
                                random_state=0)

            mdls = []
            #bar.start()
            for j in range(Ytrain.shape[1]):
                _ = opt.fit(Xtrain, Ytrain[:, j])
                mdls.append(opt)
                dump(res=mdls, filename=filename[i])
Esempio n. 9
0
def main():
    if rank == 0:
        hyperparameters = {
            'kernelSize1': np.arange(2, 12),
            'stride1': np.arange(1, 10),
            'kernelSize2': np.arange(2, 12),
            'stride2': np.arange(1, 10),
            'kernelSize3': np.arange(2, 12),
            'kernelSize4': np.arange(1, 12),
            'kernelSize5': np.arange(2, 12)
        }

        hyperspace = HyperSpace(hyperparameters)
        all_intervals = hyperspace.fold_space()
        hyperspaces = hyperspace.hyper_permute(all_intervals)
        subspace_keys, subspace_boundaries = hyperspace.format_hyperspace(
            hyperspaces)
    else:
        subspace_keys, subspace_boundaries = None, None

    space = comm.scatter(subspace_boundaries, root=0)
    deadline = DeadlineStopper(18000)
    # Gaussian process (see scikit-optimize skopt module for other optimizers)
    res_gp = gp_minimize(objective,
                         space,
                         n_calls=20,
                         callback=deadline,
                         random_state=0,
                         verbose=True)
    dump(res_gp, 'hyper_results/gp_subspace_' + str(rank))
Esempio n. 10
0
def hyperbelt(objective,
              hyperparameters,
              results_path,
              max_iter=100,
              eta=3,
              verbose=True,
              n_evaluations=None,
              random_state=0):
    """
    Distributed HyperBand with SMBO - one hyperspace per node.

    Parameters
    ----------
    * `objective` [function]:
        User defined function which calls a learner
        and returns a metric of interest.

    * `hyperparameters` [list, shape=(n_hyperparameters,)]:

    * `results_path` [string]
        Path to save optimization results

    * `n_iterations` [int, default=50]
        Number of optimization iterations

    * `verbose` [bool, default=False]
        Verbosity of optimization.

    * `random_state` [int, default=0]
        Random state for reproducibility.
    """
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    # Setup savefile
    if rank < 10:
        # Ensure results are sorted by rank
        filename = 'hyperspace' + str(0) + str(rank)
    else:
        filename = 'hyperspace' + str(rank)

    if not os.path.exists(results_path):
        os.makedirs(results_path, exist_ok=True)

    savefile = os.path.join(results_path, filename)

    if rank == 0:
        hyperspace = create_hyperspace(hyperparameters)
    else:
        hyperspace = None

    space = comm.scatter(hyperspace, root=0)

    result = hyperband(objective, space, max_iter, eta, random_state, verbose,
                       n_evaluations, rank)

    # Each worker will independently write their results to disk
    dump(result, savefile)
 def __call__(self, res):
     """
     Parameters
     ----------
     * `res` [`OptimizeResult`, scipy object]:
         The optimization as a OptimizeResult object.
     """
     skopt.dump(res, self.checkpoint_path, **self.dump_options)
Esempio n. 12
0
 def save_res(self):
     if 'minimizer_res' in self.res:
         results_fname = '_'.join(['res-hyp', self.name])
         dump(self.res['minimizer_res'],
              results_fname + '.pkl',
              store_objective=False)
         self.res.pop('minimizer_res')
     with open('_'.join(['res-exp', self.name]) + '.pkl', 'wb') as f:
         pickle.dump(self.res, f, pickle.HIGHEST_PROTOCOL)
def bayes_opt(objective_func, param_grid):
    res = gp_minimize(objective_func, param_grid, n_jobs=-1, acq_func='EI', n_calls=100, verbose=False)
    print('Best Hyperparameters: ')
    print_hyperparams(res)

    print('Best Hyperparameters MSE: ', res.fun)
    dump(res, RES_FILE_NAME)
    
    return res
Esempio n. 14
0
def find_best_hyperparameters(model, X, y, dynamic_params_space, scoring, plot, nfold, **HPO_params):
    
    # filter these warnings - they are not consistent, arise even for float features
    from warnings import filterwarnings
    # simplefilter("ignore", UserWarning)
    filterwarnings("ignore", message="The objective has been evaluated at this point before", category=UserWarning)
  
    # Get model name
    model_name = model.__class__.__name__
    
    # Get dynamic parameters names: 
    @use_named_args(dynamic_params_space)
    def get_params_names(**dynamic_params):
        return list(dynamic_params.keys())    
    param_names = get_params_names(dynamic_params_space)
        
    # Define an objective function
    @use_named_args(dynamic_params_space)
    def objective(**dynamic_params):
        #model.set_params(**static_params)
        model.set_params(**dynamic_params) 
        cv = StratifiedKFold(n_splits=nfold, random_state=seed, shuffle=True)
        scores = cross_validate(model, X, y, cv=cv, scoring = scoring, n_jobs=-1)
        val_score = np.mean(scores['test_score'])
        return -val_score
    
    print(model_name, 'model training...')
    # Load previously trained results and get starting point (x0) as best model from previous run
    try:
        res = load(r'output/models/'+model_name)
        x0 = res.x       
    # If not trained before -> no initial point provided
    except:
        x0 = None
    
    res = forest_minimize(objective, dynamic_params_space, x0 = x0, **HPO_params)
    
    # add attribute - parameters names to the res
    res.param_names = param_names

    print('Optimized parameters:    ', res.param_names)
    print('Previous best parameters:', x0)
    print('Current  best parameters:', res.x)
    print('Best score:', -res.fun)
    
    # Saved optimization result  
    dump(res, r'output/models/'+model_name, store_objective=False)
        
    if plot == True:
        plt.figure(figsize=(5,2))
        plot_convergence(res)
        try:
            # plot_objective would not work if only one parameter was searched for
            plot_objective(res)
        except:
            pass
    plt.show()
Esempio n. 15
0
def test_dump_and_load_optimizer():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1,
                    acq_optimizer="sampling")

    opt.run(bench1, n_iter=3)

    with tempfile.TemporaryFile() as f:
        dump(opt, f)
        load(f)
Esempio n. 16
0
def optimize(param_names, fname):
  from parametersConfig import dbounds, N_TRIALS, optimizer, train_tools, train_actions

  pbounds = [dbounds[param] for param in param_names]

  with tqdm(total=N_TRIALS-1, file=sys.stdout) as pbar:
        run_experiment = gen_run_experiment(pbar, param_names, train_tools, train_actions)
        res = optimizer(run_experiment, pbounds, n_calls=N_TRIALS)
        res.specs['args']['func'] = None #  function can't be saved because it has pbar as input
  dump(res, fname, store_objective=False)
  return res
def output_results(filepaths, hyperp_of_interest_dict, hyperp_opt_result):

    ##################################
    #   Display Optimal Parameters   #
    ##################################
    print('=================================================')
    print('      Hyperparameter Optimization Complete')
    print('=================================================')
    print('Optimized Validation Loss: {}\n'.format(hyperp_opt_result.fun))
    print('Optimized Parameters:')
    hyperp_of_interest_list = list(hyperp_of_interest_dict.keys())
    for n, parameter_name in enumerate(hyperp_of_interest_list):
        print(parameter_name + ': {}'.format(hyperp_opt_result.x[n]))

    #####################################
    #   Save Optimization Information   #
    #####################################
    #=== Creating Directory for Outputs ===#
    if not os.path.exists(filepaths.directory_hyperp_opt_outputs):
        os.makedirs(filepaths.directory_hyperp_opt_outputs)

    #=== Save .pkl File ===#
    dump(hyperp_opt_result,
         filepaths.hyperp_opt_skopt_res,
         store_objective=False)

    #=== Write Optimal Set Hyperparameters ===#
    with open(filepaths.hyperp_opt_optimal_parameters, 'w') as optimal_set_txt:
        optimal_set_txt.write('Optimized Validation Loss: {}\n'.format(
            hyperp_opt_result.fun))
        optimal_set_txt.write('\n')
        optimal_set_txt.write('Optimized parameters:\n')
        for n, parameter_name in enumerate(hyperp_of_interest_list):
            optimal_set_txt.write(parameter_name +
                                  ': {}\n'.format(hyperp_opt_result.x[n]))

    #=== Write List of Scenarios Trained ===#
    with open(filepaths.hyperp_opt_scenarios_trained,
              'w') as scenarios_trained_txt:
        for scenario in hyperp_opt_result.x_iters:
            scenarios_trained_txt.write("%s\n" % scenario)

    #=== Write List of Validation Losses ===#
    validation_losses_dict = {}
    validation_losses_dict['validation_losses'] = hyperp_opt_result.func_vals
    df_validation_losses = pd.DataFrame(validation_losses_dict)
    df_validation_losses.to_csv(filepaths.hyperp_opt_validation_losses,
                                index=False)

    #=== Convergence Plot ===#
    plot_convergence(hyperp_opt_result)
    plt.savefig(filepaths.hyperp_opt_convergence)

    print('Outputs Saved')
Esempio n. 18
0
def save_result(tag: str,
                result: OptimizeResult,
                path: PosixPath = CACHE_ROOTPATH,
                extension: str = "pkl"):
    filepath = path / f"{tag}.{extension}"

    if os.path.exists(path) is False:
        os.makedirs(path)

    del result.specs['args']['func']
    dump(result, filepath, compress=True)
Esempio n. 19
0
def test_dump_and_load_optimizer():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1,
                    acq_optimizer="sampling")

    opt.run(bench1, n_iter=3)

    with tempfile.TemporaryFile() as f:
        dump(opt, f)
        f.seek(0)
        load(f)
Esempio n. 20
0
def main():
    parser = argparse.ArgumentParser(description='Setup experiment.')
    parser.add_argument('--results_dir', type=str, help='Path to results directory.')
    args = parser.parse_args()

    hparams = [(2, 10),             # max_depth
               (10.0**-2, 10.0**0), # learning_rate
               (1, 10)]             # max_features

    res = hyperband(objective, hparams, max_iter=100, eta=3, verbose=True, random_state=0)
    results_path = os.path.join(args.results_dir, 'hyperband_gbm.pkl')
    skopt.dump(res, results_path)
Esempio n. 21
0
def run_exp(model, train, user_map, item_map, validation):
    if HYPER_PARAM_SEARCH:
        checkpoint_saver = CheckpointSaver(CHECKPOINT_NAME)
        res_gp = gp_minimize(objective,
                             space,
                             n_calls=HYPER_PARAM_SEARCH_N_ITER,
                             random_state=SEED,
                             callback=[checkpoint_saver])
        skopt.dump(res_gp, HYPER_PARAM_FILE_NAME, store_objective=False)
        plot_convergence(res_gp)
    else:
        model.fit(train, user_map, item_map, validation)
def bayes_opt(objective_func, param_grid, res_file):
    res = gp_minimize(objective_func,
                      param_grid,
                      n_jobs=-1,
                      acq_func='EI',
                      n_calls=100,
                      verbose=False)

    print('Best Hyperparameters MSE: ', res.fun)
    dump(res, res_file)  # Save results of hyperparameter tuning

    return res
Esempio n. 23
0
def reload_multiple(scenario,
                    init_iters,
                    addtl_iters,
                    seeds=range(5),
                    func_names=["gp", "dummy", "forest", "gbrt"],
                    verb_model=False,
                    verb=False,
                    mute_reload=True):
    """ Call reload() across several functions and seeds.
        Automatically dump results.

        PARAMETERS
        ----------
        scenario [int]:
            id of the scenario.
            Used to get_params().
            Determines the directory to save in.

        init_iters [int]:
            Iteration count of optimizations to load.

        addtl_iters [int]:
            Number of additional iterations to run the optimization for.
            New total is used in filenames upon saving.

        seeds [list of int]:
            Values to be passed as seeds.
            Used in filenames upon saving.

        func_names [list of str]:
            Names of optimization functions to reload.
            Used in filenames upon saving.
    """
    opt_params = get_params(scenario)
    for seed in seeds:
        print("Seed: " + str(seed))
        for func_name in func_names:
            print(func_name + ':')
            result_loaded = load("optims/scenario" + str(scenario) + '/' +
                                 func_name + '_' + str(seed) + "_" +
                                 str(init_iters) + ".opt")
            result = reload(result_loaded,
                            opt_params,
                            addtl_iters,
                            seed,
                            verb_model=verb_model,
                            verb=verb,
                            mute_reload=mute_reload)
            dump(
                result, "optims/scenario" + str(scenario) + '/' + func_name +
                '_' + str(seed) + "_" + str(init_iters + addtl_iters) + ".opt")
Esempio n. 24
0
def optimize_multiple(
        scenario,
        iterations,
        seeds=range(5),
        functions={
            "gp": gp_minimize,
            "dummy": dummy_minimize,
            "forest": forest_minimize,
            "gbrt": gbrt_minimize
        },
        verb_model=False,
        verb=False):
    """ Call optimize() across several functions and seeds.
        Automatically dump results.

        PARAMETERS
        ----------
        scenario [int]:
            id of the scenario.
            Used to get_params().
            Determines the directory to save in.

        iterations [int]:
            Number of iterations to run the optimization for.
            Used in filenames upon saving.

        seeds [list of int]:
            Values to be passed as seeds.
            Used in filenames upon saving.

        functions [dict of str:callable]:
            Optimization functions to call and their respective names.
            Names are used in the filenames upon saving.
    """
    opt_params = get_params(scenario)
    if (not os.path.isdir("optims/scenario" + str(scenario))):
        os.mkdir("optims/scenario" + str(scenario))

    for seed in seeds:
        print("Seed: " + str(seed))
        for func_name in functions.keys():
            print(func_name + ':')
            result = optimize(functions[func_name],
                              opt_params,
                              iterations,
                              seed,
                              verb_model=verb_model,
                              verb=verb)
            dump(
                result, "optims/scenario" + str(scenario) + '/' + func_name +
                '_' + str(seed) + "_" + str(iterations) + ".opt")
Esempio n. 25
0
 def checkpoint_callback(_result: skopt.utils.OptimizeResult) -> None:
     try:
         if len(_result.x_iters) % 10 == 1:
             _res = copy.deepcopy(_result)
             del _res.specs['args']['callback']
             del _res.specs['args']['func']
             skopt.dump(res=_res,
                        filename=os.path.join(args.output,
                                              "checkpoint.pkl"),
                        compress=True)
     except Exception as ex:
         message = f"Couldn't save checkpoint due to exception: {ex}. Skipping."
         logger.exception(message) if progress.disable else progress.write(
             message)
 def optimize_xgb_comb_params(self):
     res = forest_minimize(
         self.cv_test_combined_models_with_xgb_comb_params, [(0.015, 0.035),
                                                             (3, 6),
                                                             (0.6, 0.9),
                                                             (0.5, 1),
                                                             (0, 0.5)],
         x0=[0.03, 5, 0.8, 0.8, 0.4],
         y0=0.053262479044449806,
         n_calls=10)
     dump(res, "xgb_com_opt.gz")
     print(res.x)
     print(res.fun)
     print("")
Esempio n. 27
0
def main():
    parser = argparse.ArgumentParser(description='Setup experiment.')
    parser.add_argument('--results_dir', type=str, help='Path to results directory.')
    args = parser.parse_args()

    bounds = np.tile((-5., 5.), (5, 1))

    results = dummy_minimize(stybtang,
                        bounds,
                        verbose=True,
                        n_calls=1,
                        random_state=0)

    results_path = os.path.join(args.results_dir, 'hyperband_stybtang.pkl')
    dump(results, results_path)
def run_search(data, algo, out_name, evaluate):
    opts = dict()
    algo_mod = evaluate.module
    opts.update(getattr(algo_mod, 'options', {}))

    afn = dt.afname(algo)
    if not out_name:
        out_name = afn
    ddir = data_dir / data
    tdir = ddir / 'tuning'
    ofile = tdir / f'{out_name}.opt'
    cpfile = tdir / f'{out_name}.cp'

    if cpfile.exists():
        _log.info('loading checkpoint file %s', cpfile)
        initial = skopt.load(cpfile)
        opts['x0'] = initial.x_iters
        opts['y0'] = initial.func_vals
        opts['n_random_starts'] = max(0, 10 - len(initial.x_iters))
        opts['n_calls'] = 100 - len(initial.x_iters)
        _log.info('checkpoint has %d iterations', len(initial.x_iters))

    saver = skopt.callbacks.CheckpointSaver(cpfile)
    stopper = ImprovementStopper(0.01, min_runs=20)
    timer = skopt.callbacks.TimerCallback()

    with LogFile(tdir / f'{afn}-search.log'):
        res = skopt.gp_minimize(evaluate,
                                algo_mod.dimensions,
                                callback=[timer, saver, stopper],
                                **opts)
        _log.info('%s: optimal MRR of %f at %s after %d searches', algo,
                  -res.fun, res.x, len(res.x_iters))

    res.iter_time = timer.iter_time
    _log.info('writing results to %s', ofile)
    skopt.dump(res, ofile)
    with (tdir / f'{out_name}.json').open('w') as jsf:
        json.dump(
            {
                'params': [x.item() for x in res.x],
                'iters': len(res.x_iters),
                'MRR': -res.fun
            }, jsf)

    _log.info('removing checkpoint file %s', cpfile)
    if cpfile.exists():
        cpfile.unlink()
Esempio n. 29
0
            def process_results(res, oldDF):
                # Save best parameters
                best_params = {
                    par.name: res.x[i]
                    for i, par in enumerate(SPACE)
                }
                print('best parameters: ', best_params)

                newDF = pd.DataFrame(best_params, index=[iteration])
                df = oldDF.append(newDF) if iteration > 0 else newDF

                # Save tuning results
                resFP = tuningDir / '{}.gz'.format(iteration)
                skopt.dump(res, resFP, compress=9, store_objective=False)
                print('Saved tuning results to', resFP)
                return df
Esempio n. 30
0
def runBayes():
    """ Launch bayesian optimization in order to tune Tensorflow model's
        hyperparameter
    """
    print('==================================================')
    print('Bayesian optimization using Gaussian processes ...')
    print('Experiment version %s.%s.%s-%s-%s'
          % (config.VERSION
             , config.REVISION
             , config.MINOR_REVISION
             , config.POSITION
             , config.USER))
    print('==================================================')


    start = timeit.default_timer()  # -----------------
    r = skopt.gp_minimize(
        objective,
        space,
        n_calls=config.N_CALLS,
        random_state=config.SEED,
        n_jobs=config.N_JOBS_bayes,
        verbose=True)
    stop = timeit.default_timer()   # -----------------
    print('Bayesian Optimization took')
    print(stop - start)


    # save the model to disk
    f = os.path.join(
        # VERSION,  # folder
        # 'bayesOptResults' + MINOR_VERSION + '.' + MAJOR_VERSION + '.sav')
        config.experimentsfolder,
        'bayesOptResults.' \
        + config.VERSION \
        + '.' + config.REVISION \
        + '.' + config.MINOR_REVISION \
        + '-' + config.POSITION \
        + '-' + config.USER \
        + '-' + config.day_out \
        + '.sav')

    skopt.dump(r, open(f, 'wb'))

    print('OK')
Esempio n. 31
0
def saveoptresults(search_result, parameter_df, forecastdays, approach):

    alpha = search_result.x[0]

    parameter_df.loc[forecastdays, 'auc'] = sorted(
        zip(search_result.func_vals, search_result.x_iters))[0][0]

    parameter_df.loc[forecastdays, 'lambda_val'] = alpha

    parameterdfdir = 'parameter_df_' + reg + '_reg_appr' + str(
        approach) + ".csv"
    with open(parameterdfdir, 'w') as csv_file:
        parameter_df.to_csv(path_or_buf=csv_file, index=False)

    optimizationdirect = 'optresult_' + reg + '_reg_appr' + str(
        approach) + 'n' + str(forecastdays) + '.pkl'
    dump(search_result, optimizationdirect)
    return parameter_df
Esempio n. 32
0
def test_dump_and_load():
    res = gp_minimize(bench3,
                      [(-2.0, 2.0)],
                      x0=[0.],
                      acq_func="LCB",
                      n_calls=2,
                      n_random_starts=0,
                      random_state=1)

    # Test normal dumping and loading
    with tempfile.TemporaryFile() as f:
        dump(res, f)
        f.seek(0)
        res_loaded = load(f)
    check_optimization_results_equality(res, res_loaded)
    assert_true("func" in res_loaded.specs["args"])

    # Test dumping without objective function
    with tempfile.TemporaryFile() as f:
        dump(res, f, store_objective=False)
        f.seek(0)
        res_loaded = load(f)
    check_optimization_results_equality(res, res_loaded)
    assert_true(not ("func" in res_loaded.specs["args"]))

    # Delete the objective function and dump the modified object
    del res.specs["args"]["func"]
    with tempfile.TemporaryFile() as f:
        dump(res, f, store_objective=False)
        f.seek(0)
        res_loaded = load(f)
    check_optimization_results_equality(res, res_loaded)
    assert_true(not ("func" in res_loaded.specs["args"]))