Ejemplo n.º 1
0
 def fit(self, X, y, X_test=None):
     try:
         fmin(
             get_objective_function(
                 X,
                 y,
                 X_test,
                 self.metrics_getter,
                 self.results_dumper,
                 callback=lambda result: self.results_dumper.add_result(result),
             ),
             space=self.params,
             algo=tpe.suggest,
             max_evals=self.max_evals,
         )
         min_loss = 10.0
         for el in trials.results:
             if el['status'] == STATUS_OK:
                 if el['loss'] < min_loss:
                     self._result_model = el['model']
                     min_loss = el['loss']
         print self._result_model
     finally:
         self.results_dumper.flush()
     self._result_model.fit(X, y)
     return self
Ejemplo n.º 2
0
def random_driver(host=None, port=None, max_evals=10000, max_n_per_class=None):
    hyperopt.fmin(
            fn=slm_visitor_lfw_partial(max_n_per_class),
            space=search_space(bagging_fraction=1.0),
            algo=hyperopt.rand.suggest,
            max_evals=max_evals,
            trials=make_trials(host, port, exp_key='random'))
Ejemplo n.º 3
0
 def work(self):
     fmin(
         fn=passthrough,
         space=self.bandit.expr,
         algo=partial(suggest_algos.ei,
                      warmup_cutoff=3),
         max_evals=10)
Ejemplo n.º 4
0
    def run_hyperopt(self, config_file=None):
        ''' convenience function for running hyperopt '''

        best = None
        if self.space_type == 'modern':
            hyperspace = self.set_multilayer_dropout_space()
            best = fmin(self.compute_multilayer_dropout_objective, hyperspace, algo=tpe.suggest,
                        max_evals=100)
            return best
        elif self.space_type == 'old':
            hyperspace = self.set_old_space()
            best = fmin(self.compute_old_objective, hyperspace, algo=tpe.suggest,
                        max_evals=100)
            return best
        elif self.space_type == 'pretrain-finetune':
            if config_file is None:
                sys.exit(
                    'Cannot pre-train & fine-tune a network without its original config file')
            else:
                pretrain_best = self.learn_pretrain_settings(config_file)
                hyperspace = self.set_finetune_space(config_file)
                finetune_best = fmin(self.compute_finetune_objective, hyperspace, algo=tpe.suggest,
                                     max_evals=100)

                return pretrain_best, finetune_best

        else:
            sys.exit(
                'Space type not specified correctly, your choices are: "modern","old", or "pretrain-finetune"')

        return best
Ejemplo n.º 5
0
def simple_hp(dataset_info, hp_algo, learning_algo, hp_space,
              n_startup_trials, n_ok_trials, checkpoint_fname,
              host=None, port=None):

    search_space = build_search_space(dataset_info,
                                      learning_algo,
                                      hp_space,
                                      n_startup_trials,
                                      n_ok_trials,
                                      checkpoint_fname=checkpoint_fname)

    if 'tpe' in hp_algo.__globals__['__name__']:
        hp_algo=functools.partial(
                    hp_algo,
                    n_startup_jobs=n_startup_trials)

    trials = make_trials(host, port, exp_key=mongo_dbname)

    # -- minimize the objective over the space
    fmin(fn=objective,
         space=search_space,
         algo=hp_algo,
         max_evals=max_evals,
         trials=trials,
         rstate=np.random.RandomState(seed=63))

    return trials
Ejemplo n.º 6
0
def test_har6(suggest=hp_gpsmbo.hpsuggest.suggest, seed=1, iters=10):
    # -- see shovel/hps.py for this test with debugging scaffolding
    #    run it by typing e.g.
    # 
    #       shovel hps.run_har6 --seed=9
    #
    #    That should do a run that fails by only getting to -3.2
    mins = []
    for ii in range(int(seed), int(seed) + int(iters)):
        print 'SEED', ii
        space = {
            'a': hp.uniform('a', 0, 1),
            'b': hp.uniform('b', 0, 1),
            'c': hp.uniform('c', 0, 1),
            'x': hp.uniform('x', 0, 1),
            'y': hp.uniform('y', 0, 1),
            'z': hp.uniform('z', 0, 1),
        }
        trials = hyperopt.Trials()
        hyperopt.fmin(
            fn=har6.har6,
            space=space,
            trials=trials,
            algo=partial(suggest, stop_at=-3.32),
            rstate=np.random.RandomState(ii),
            max_evals=100)
        mins.append(min(trials.losses()))

    assert np.sum(mins > -3.32) < 3
Ejemplo n.º 7
0
def base_minimizer(model, data, algo, max_evals, trials, rseed=1337, full_model_string=None):

    if full_model_string is not None:
        model_str = full_model_string
    else:
        model_str = get_hyperopt_model_string(model, data)
    write_temp_files(model_str)

    try:
        from temp_model import keras_fmin_fnct, get_space
    except:
        print("Unexpected error: {}".format(sys.exc_info()[0]))
        raise
    try:
        os.remove('./temp_model.py')
        os.remove('./temp_model.pyc')
    except OSError:
        pass

    try:  # for backward compatibility.
        best_run = fmin(keras_fmin_fnct,
                        space=get_space(),
                        algo=algo,
                        max_evals=max_evals,
                        trials=trials,
                        rseed=rseed)
    except TypeError:
        best_run = fmin(keras_fmin_fnct,
                        space=get_space(),
                        algo=algo,
                        max_evals=max_evals,
                        trials=trials,
                        rstate=np.random.RandomState(rseed))

    return best_run
Ejemplo n.º 8
0
 def setUp(self):
     domain = self.domain = many_dists()
     trials = self.trials = Trials()
     fmin(lambda x: x,
         space=domain.expr,
         trials=trials,
         algo=rand.suggest,
         max_evals=200)
Ejemplo n.º 9
0
 def setUp(self):
     bandit = self.bandit = hyperopt.bandits.many_dists()
     trials = self.trials = Trials()
     fmin(lambda x: x,
         space=bandit.expr,
         trials=trials,
         algo=rand.suggest,
         max_evals=200)
Ejemplo n.º 10
0
def test_duplicate_label_is_error():
    trials = Trials()

    def fn(xy):
        x, y = xy
        return x ** 2 + y ** 2

    fmin(fn=fn, space=[hp.uniform("x", -5, 5), hp.uniform("x", -5, 5)], algo=rand.suggest, max_evals=500, trials=trials)
Ejemplo n.º 11
0
 def work(self):
     trials = Trials()
     space = self.bandit.expr
     fmin(
         fn=passthrough,
         space=space,
         trials=trials,
         algo=anneal.suggest,
         max_evals=10)
Ejemplo n.º 12
0
 def work(self):
     # -- smoke test that things simply run,
     #    for each type of several search spaces.
     trials = Trials()
     fmin(passthrough,
         space=self.bandit.expr,
         algo=partial(tpe.suggest, n_EI_candidates=3),
         trials=trials,
         max_evals=10)
Ejemplo n.º 13
0
def optimize(experimental_fmwk, space, max_evals, trials):

    global eval_number
    eval_number = 0

    fmin(experimental_fmwk, space, algo=tpe.suggest,
         trials=trials, max_evals=max_evals)

    return get_best(trials)
Ejemplo n.º 14
0
    def work(self):
        bandit = self.bandit
        assert bandit.name is not None
        algo = partial(
            tree.suggest,
            # XXX (begin)
            n_trees=10,
            logprior_strength=1.0,
            # XXX (end)
                )
        LEN = self.LEN.get(bandit.name, 75)

        trials = Trials()
        fmin(fn=passthrough,
            space=self.bandit.expr,
            trials=trials,
            algo=algo,
            max_evals=LEN)
        assert len(trials) == LEN

        if 1:
            rtrials = Trials()
            fmin(fn=passthrough,
                space=self.bandit.expr,
                trials=rtrials,
                algo=rand.suggest,
                max_evals=LEN)
            print 'RANDOM BEST 6:', list(sorted(rtrials.losses()))[:6]

        if 0:
            plt.subplot(2, 2, 1)
            plt.scatter(range(LEN), trials.losses())
            plt.title('TPE losses')
            plt.subplot(2, 2, 2)
            plt.scatter(range(LEN), ([s['x'] for s in trials.specs]))
            plt.title('TPE x')
            plt.subplot(2, 2, 3)
            plt.title('RND losses')
            plt.scatter(range(LEN), rtrials.losses())
            plt.subplot(2, 2, 4)
            plt.title('RND x')
            plt.scatter(range(LEN), ([s['x'] for s in rtrials.specs]))
            plt.show()
        if 0:
            plt.hist(
                    [t['x'] for t in self.experiment.trials],
                    bins=20)

        #print trials.losses()
        print 'OPT BEST 6:', list(sorted(trials.losses()))[:6]
        #logx = np.log([s['x'] for s in trials.specs])
        #print 'TPE MEAN', np.mean(logx)
        #print 'TPE STD ', np.std(logx)
        thresh = self.thresholds[bandit.name]
        print 'Thresh', thresh
        assert min(trials.losses()) < thresh
Ejemplo n.º 15
0
def test_bug1_anneal():
    space = hp.choice('preprocess_choice', [
        {'pwhiten': hp.pchoice('whiten_randomPCA',
                               [(.3, False), (.7, True)])},
        {'palgo': False},
        {'pthree': 7}])
    fmin(fn=lambda x: 1,
         space=space,
         algo=anneal.suggest,
         max_evals=50)
Ejemplo n.º 16
0
 def test_basic(self):
     domain = self._domain_cls()
     # print 'domain params', domain.params, domain
     # print 'algo params', algo.vh.params
     trials = Trials()
     fmin(lambda x: x, domain.expr,
          trials=trials,
          algo=suggest,
          max_evals=self._n_steps)
     assert trials.average_best_error(domain) - domain.loss_target < .2
Ejemplo n.º 17
0
 def test_catch_eval_exceptions_False(self):
     with self.assertRaises(TestFmin.SomeError):
         fmin(self.eval_fn,
              space=hp.uniform('x', 0, 1),
              algo=rand.suggest,
              trials=self.trials,
              max_evals=2,
              catch_eval_exceptions=False)
     print(len(self.trials))
     assert len(self.trials) == 0
     assert len(self.trials._dynamic_trials) == 1
Ejemplo n.º 18
0
def tpe_driver(host=None, port=None, max_evals=10000, max_n_per_class=None):
    tpe_suggest=partial(
        hyperopt.tpe.suggest,
        n_startup_jobs=50, # -- number of random jobs before optimization
        )
    hyperopt.fmin(
            fn=slm_visitor_lfw_partial(max_n_per_class),
            space=search_space(bagging_fraction=1.0),
            algo=tpe_suggest,
            max_evals=max_evals,
            trials=make_trials(host, port, exp_key='random'))
Ejemplo n.º 19
0
    def test_anneal(self):
        N = 100
        fmin(self.objective,
            space=self.space,
            trials=self.trials,
            algo=partial(anneal.suggest),
            max_evals=N)

        a_vals = [t['misc']['vals']['a'][0] for t in self.trials.trials]
        counts = np.bincount(a_vals)
        print(counts)
        assert counts[3] > N * .6
Ejemplo n.º 20
0
    def test_tpe(self):
        N = 100
        fmin(self.objective,
            space=self.space,
            trials=self.trials,
            algo=partial(tpe.suggest, n_startup_jobs=10),
            max_evals=N)

        a_vals = [t['misc']['vals']['a'][0] for t in self.trials.trials]
        counts = np.bincount(a_vals)
        print counts
        assert counts[3] > N * .6
def main():
	from hyperopt import fmin,tpe,hp,Trials
	from hyperopt.mongoexp import MongoTrials
	import os 

	fit_params=eval(open('fit_parameters.txt').read())
	fit_params['root']=os.getcwd()
	directory=init_directory(fit_params)
	if fit_params['optimization']=='hyperopt':
		space=search_space(fit_params)
		trials=Trials()
		best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials)
		plot_results(trials.trials)

	#https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB
	''' commands for MongoDB
	mongod --dbpath . --port 1234
	export PYTHONPATH=$PYTHONPATH:/home/pduggins/influence_susceptibility_conformity
	hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1
	'''
	if fit_params['optimization']=='mongodb':
		space=search_space(fit_params)
		space['directory']=directory
		trials=MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp4')
		best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials)
		plot_results(trials.trials)

	if fit_params['optimization']=='evolve':
		from pathos.multiprocessing import ProcessingPool as Pool
		from pathos.helpers import freeze_support #for Windows
		import numpy as np
		import pandas as pd
		# freeze_support()
		evo_pop=init_evo_pop(fit_params)
		pool = Pool(nodes=fit_params['threads'])

		for g in range(fit_params['generations']):
			exp_params=[value['P'] for value in evo_pop.itervalues()]
			fitness_list=pool.map(run, exp_params)
			# new_gen_list=tournament_selection(fitness_list,fit_params)
			new_gen_list=rank_proportional_selection(fitness_list)
			remade_pop=remake(evo_pop,new_gen_list)
			mutated_pop=mutate(remade_pop,evo_pop,fit_params)
			evo_pop=mutated_pop
			# crossed_pop=crossover(mutated_pop)
			# evo_pop=crossed_pop
			mean_F=np.average([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()])
			std_F=np.std([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()])
			print '\nGeneration %s: mean_F=%s, std F=%s' %(g+1,mean_F,std_F) 

		out_pop=pd.DataFrame([evo_pop])
		out_pop.reset_index().to_json('evo_pop.json',orient='records')
Ejemplo n.º 22
0
def base_minimizer(model, data, functions, algo, max_evals, trials,
                   rseed=1337, full_model_string=None, notebook_name=None,
                   verbose=True, stack=3, keep_temp=False):
    if full_model_string is not None:
        model_str = full_model_string
    else:
        model_str = get_hyperopt_model_string(model, data, functions, notebook_name, verbose, stack)
    temp_file = './temp_model.py'
    write_temp_files(model_str, temp_file)

    if 'temp_model' in sys.modules:
        del sys.modules["temp_model"]

    try:
        from temp_model import keras_fmin_fnct, get_space
    except:
        print("Unexpected error: {}".format(sys.exc_info()[0]))
        raise
    try:
        if not keep_temp:
            os.remove(temp_file)
            os.remove(temp_file + 'c')
    except OSError:
        pass

    try:
        # for backward compatibility.
        return (
            fmin(keras_fmin_fnct,
                 space=get_space(),
                 algo=algo,
                 max_evals=max_evals,
                 trials=trials,
                 rseed=rseed,
                 return_argmin=True),
            get_space()
        )
    except TypeError:
        pass

    return (
        fmin(keras_fmin_fnct,
             space=get_space(),
             algo=algo,
             max_evals=max_evals,
             trials=trials,
             rstate=np.random.RandomState(rseed),
             return_argmin=True),
        get_space()
    )
Ejemplo n.º 23
0
def hyperopt_x2_iterates(n_iters=100):
    iterates = []
    trials = Trials()
    random = np.random.RandomState(0)

    def fn(params):
        iterates.append(params['x'])
        return params['x']**2

    for i in range(n_iters):
        fmin(fn=fn, algo=tpe.suggest, max_evals=i+1, trials=trials,
             space={'x': hp.uniform('x', -10, 10)},
             **HyperoptTPE._hyperopt_fmin_random_kwarg(random))

    return np.array(iterates)
Ejemplo n.º 24
0
def xgb_parameter_search():
    from hyperopt import fmin, tpe, hp
    from kagura.xgbwrapper import XGBWrapper

    xs = load("xs")
    ys = load("ys")

    if args.tiny:
        tmp, xs, tmp, ys = stratified_split(xs, ys)

    train_xs, test_xs, train_ys, test_ys = stratified_split(xs, ys)

    def target_func((eta, max_depth, subsample, colsample_bytree)):
        global model
        model = XGBWrapper(
            eta=eta, max_depth=max_depth, test=(test_xs, test_ys),
            subsample=subsample, colsample_bytree=colsample_bytree,
            num_class=10
        )

        model.fit(train_xs, train_ys)
        log_loss = model.score(test_xs, test_ys)
        logging.info(
            "hyperopt eta=%f,max_depth=%d,subsample=%f"
            ",colsample_bytree=%f,log_loss=%f,best_iteration=%d",
            eta, max_depth, subsample, colsample_bytree,
            log_loss, model.bst.best_iteration)

        name = 'xgb_%f_%d_%f_%f_%f' % (eta, max_depth, subsample, colsample_bytree, log_loss)
        model.bst.save_model(name)
        return log_loss

    default_space = [
             hp.uniform('eta', 0, 1),
             hp.choice('max_depth', [4, 5, 6, 7, 8, 9]),
             hp.uniform('subsample', 0.4, 1),
             hp.uniform('colsample_bytree', 0.4, 1)]
    narrow_space = [
             hp.uniform('eta', 0.1, 0.4),
             hp.choice('max_depth', [5, 6]),
             hp.uniform('subsample', 0.8, 1),
             hp.uniform('colsample_bytree', 0.8, 1)]
    fmin(fn=target_func,
         space=narrow_space,
         algo=tpe.suggest,
         max_evals=10000)

    return
Ejemplo n.º 25
0
def run_all_dl(csvfile = saving_fp, 
                space = [hp.quniform('h1', 100, 550, 1), 
                        hp.quniform('h2', 100, 550, 1),
                        hp.quniform('h3', 100, 550, 1),
                        #hp.choice('activation', ["RectifierWithDropout", "TanhWithDropout"]),
                        hp.uniform('hdr1', 0.001, 0.3),
                        hp.uniform('hdr2', 0.001, 0.3),
                        hp.uniform('hdr3', 0.001, 0.3),
                        hp.uniform('rho', 0.9, 0.999), 
                        hp.uniform('epsilon', 1e-10, 1e-4)]):
          # maxout works well with dropout (Goodfellow et al 2013), and rectifier has worked well with image recognition (LeCun et al 1998)
          start_save(csvfile = csvfile)
          trials = Trials()
          print "Deep learning..."
          best = fmin(objective,
                      space = space,
                      algo=tpe.suggest,
                      max_evals=evals,
                      trials=trials)
          print best
          print trials.losses()
          with open('output/dlbest.pkl', 'w') as output:
            pickle.dump(best, output, -1)
          with open('output/dltrials.pkl', 'w') as output:
            pickle.dump(trials, output, -1)
Ejemplo n.º 26
0
Archivo: run.py Proyecto: benbo/botc
def main():

    usage = "%prog text.json labels.csv feature_dir output_dir"
    parser = OptionParser(usage=usage)
    parser.add_option('-m', dest='max_iter', default=4,
                      help='Maximum iterations of Bayesian optimization; default=%default')

    (options, args) = parser.parse_args()
    max_iter = int(options.max_iter)

    global data_filename, label_filename, feature_dir, output_dir, log_filename

    data_filename = args[0]
    label_filename = args[1]
    feature_dir = args[2]
    output_dir = args[3]

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    log_filename = os.path.join(output_dir, 'log.txt')

    with open(log_filename, 'w') as logfile:
        logfile.write(','.join([data_filename, label_filename, feature_dir, output_dir]))

    trials = Trials()
    best = fmin(call_experiment,
                space=space,
                algo=tpe.suggest,
                max_evals=max_iter,
                trials=trials)

    print space_eval(space, best)
    print trials.losses()
Ejemplo n.º 27
0
    def test_random(self):
        # test that that a space with a pchoice in it is
        # (a) accepted by tpe.suggest and
        # (b) handled correctly.
        N = 150
        fmin(self.objective,
            space=self.space,
            trials=self.trials,
            algo=rand.suggest,
            max_evals=N)

        a_vals = [t['misc']['vals']['a'][0] for t in self.trials.trials]
        counts = np.bincount(a_vals)
        print(counts)
        assert counts[3] > N * .35
        assert counts[3] < N * .60
Ejemplo n.º 28
0
def test_landing_screen():

    # define an objective function
    def objective(args):
        case, val = args
        if case == 'case 1':
            return val
        else:
            return val ** 2

    # define a search space
    from hyperopt import hp
    space = hp.choice('a',
        [
            ('case 1', 1 + hp.lognormal('c1', 0, 1)),
            ('case 2', hp.uniform('c2', -10, 10))
        ])

    # minimize the objective over the space
    import hyperopt
    best = hyperopt.fmin(objective, space,
        algo=hyperopt.tpe.suggest,
        max_evals=100)

    print best
    # -> {'a': 1, 'c2': 0.01420615366247227}

    print hyperopt.space_eval(space, best)
Ejemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-me", "--maxeval", default=60)

    args = parser.parse_args()
    bo = BaseflightOptimize()
    
    # for i in range(3):
    #     params = np.random.uniform(0, 20, (6,))
    #     # params.dtype = int
    #     print bo.objective(params.astype(int))
    # # while bo.running:
    # #     time.sleep(1.0)

    from hyperopt import hp, fmin, tpe, Trials
    space = [
        # hp.quniform("alt_p", 5, 120, 1),
        hp.quniform("alt_p", 5, 60, 1),
        hp.quniform("alt_i", 0, 100, 1),
        # hp.quniform("alt_d", 0, 50, 1),
        hp.quniform("alt_d", 20, 70, 1),
        # hp.quniform("vel_p", 10, 120, 1),
        hp.quniform("vel_p", 20, 90, 1),
        # hp.quniform("vel_i", 0, 100, 1),
        hp.quniform("vel_i", 0, 60, 1),
        hp.quniform("vel_d", 0, 50, 1),
        ]
        
    trials = Trials()
    best = fmin(bo.objective, space, algo=tpe.suggest, max_evals=int(args.maxeval), trials=trials)
    print "best", best
Ejemplo n.º 30
0
    def load_dataset(self):
        X = dataset_df.drop("year_0", axis=1).values
        y = dataset_df["year_0"].values
        return (X, y)


logging.info("Running grid search for alpha (num trials = %d)...",
             args.gs_num_trials)


def objective(alpha):
    class OneTimeExperiment(ExperimentData):
        def build_model(self):
            model = linear_model.Ridge(alpha)
            return model

    expertiment = OneTimeExperiment(ALL_EVALUATORS)
    stats = expertiment.run_train_test_validation()
    return {'loss': stats["MSE"], 'status': STATUS_OK}


best_alpha = fmin(objective,
                  space=hp.loguniform("alpha_p", -7, 0.1),
                  algo=tpe.suggest,
                  max_evals=args.gs_num_trials,
                  verbose=1)
logging.info("Best alpha found: %f", best_alpha["alpha_p"])

logging.info("Saving best parameters to: %s", args.parameters_path)
json.dump(best_alpha, open(args.parameters_path, "w"))
                  'boosting_type': 'goss',
                  'subsample': 1.0
              }]),
    'min_child_samples':
    hp.quniform('min_child_samples', 20, 500, 5),
    'subsample_for_bin':
    hp.quniform('subsample_for_bin', 20000, 300000, 20000),
}

# Global variable
global ITERATION

ITERATION = 0

algo = partial(tpe.suggest, n_startup_jobs=-1)
best = fmin(objective, space, algo=algo,
            max_evals=MAX_EVALS)  #max_evals表示想要训练的最大模型数量,越大越容易找到最优解
print('*****************************')
print('best\n', best)

# Sort the trials with lowest loss (highest AUC) first
bayes_trials_results = sorted(bayes_trials.params_results,
                              key=lambda x: x['loss'])
print('bayes_trials_results[:2]', bayes_trials_results[:2])

# Sort the trials with lowest loss (highest AUC) first
bayes_trials_results = sorted(bayes_trials.params_results,
                              key=lambda x: x['loss'])
print('bayes_trials_results[:2]', bayes_trials_results[:2])

#################################################################
Ejemplo n.º 32
0
def grid_search(args: HyperoptArgs):
    # Create loggers
    logger = create_logger(name='hyperparameter_optimization',
                           save_dir=args.log_dir,
                           quiet=True)
    train_logger = create_logger(name='train',
                                 save_dir=args.save_dir,
                                 quiet=args.quiet)

    # Run grid search
    results = []

    # Define hyperparameter optimization
    def objective(hyperparams: Dict[str, Union[int, float]]) -> float:
        # Convert hyperparams from float to int when necessary
        for key in INT_KEYS:
            hyperparams[key] = int(hyperparams[key])

        # Copy args
        hyper_args = deepcopy(args)

        # Update args with hyperparams
        if args.save_dir is not None:
            folder_name = '_'.join(f'{key}_{value}'
                                   for key, value in hyperparams.items())
            hyper_args.save_dir = os.path.join(hyper_args.save_dir,
                                               folder_name)
        for key, value in hyperparams.items():
            setattr(hyper_args, key, value)

        # Record hyperparameters
        logger.info(hyperparams)

        # Cross validate
        mean_score, std_score = cross_validate(hyper_args, train_logger)

        # Record results
        temp_model = MoleculeModel(hyper_args)
        num_params = param_count(temp_model)
        logger.info(f'num params: {num_params:,}')
        logger.info(f'{mean_score} +/- {std_score} {hyper_args.metric}')

        results.append({
            'mean_score': mean_score,
            'std_score': std_score,
            'hyperparams': hyperparams,
            'num_params': num_params
        })

        # Deal with nan
        if np.isnan(mean_score):
            if hyper_args.dataset_type == 'classification':
                mean_score = 0
            else:
                raise ValueError(
                    'Can\'t handle nan score for non-classification dataset.')

        return (1 if hyper_args.minimize_score else -1) * mean_score

    fmin(objective, SPACE, algo=tpe.suggest, max_evals=args.num_iters)

    # Report best result
    results = [
        result for result in results if not np.isnan(result['mean_score'])
    ]
    best_result = min(results,
                      key=lambda result:
                      (1
                       if args.minimize_score else -1) * result['mean_score'])
    logger.info('best')
    logger.info(best_result['hyperparams'])
    logger.info(f'num params: {best_result["num_params"]:,}')
    logger.info(
        f'{best_result["mean_score"]} +/- {best_result["std_score"]} {args.metric}'
    )

    # Save best hyperparameter settings as JSON config file
    makedirs(args.config_save_path, isfile=True)

    with open(args.config_save_path, 'w') as f:
        json.dump(best_result['hyperparams'], f, indent=4, sort_keys=True)
Ejemplo n.º 33
0
    model.add(LSTM(int(params['units3']),return_sequences=True))
    model.add(Dropout(params['dropout3']))

    model.add(LSTM(int(params['units4']),return_sequences=False))

    model.add(Dropout(params['dropout4']))
    model.add(Dense(trainY.shape[1]))
    model.add(Activation("relu"))

    model.compile(loss='mse', optimizer='adam')
    model.fit(trainX, trainY, epochs=30, batch_size=int(params['batch_size']), verbose=2)
    y_hat  =  model.predict(testX)



    diff=retrive(y_hat)-y_glob

    #rmse=np.sqrt((diff**2).mean())
    mae=(np.abs(diff)).mean()
    return {'loss': mae, 'status': STATUS_OK}



trials = Trials()
best = fmin(trainModel, space, algo=tpe.suggest, max_evals=30, trials=trials)
fo = open("res3.txt", "w")
fo.write( repr(best))
fo.close()

Ejemplo n.º 34
0
print(param_hyperopt)

def objective(params):
    model_pipeline.set_params(**params)
    shuffle = KFold(n_splits=5, shuffle=True, random_state=1)
    score = cross_val_score(model_pipeline, X_train, y_train, cv=shuffle, n_jobs=1)
    # binarize to have ROC-scoring
    return 1-score.mean()

### The trials object will store details of each iteration
trials = Trials()

### Run the hyperparameter search using the tpe algorithm
best = fmin(fn = objective,
            space = param_hyperopt,
            algo = tpe.suggest,
            max_evals = 10,
            trials = trials,
            rstate= np.random.RandomState(1))


### Get the values of the optimal parameters
best_params = space_eval(space, best)

### Fit the model with the optimal hyperparamters
model_pipeline.set_params(**best_params)
model_pipeline.fit(X_train, y_train)

### Score with the test data
y_score = model_pipeline.predict_proba(X_test)
# auc_score = roc_auc_score(y_test, y_score[:,1])
y_predict = model_pipeline.predict(X_test)
Ejemplo n.º 35
0
def adaboost_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum,
                     learn_options):
    '''
    AdaBoostRegressor from scikitlearn.
    '''

    if learn_options['adaboost_version'] == 'python':
        if not learn_options['adaboost_CV']:
            clf = en.GradientBoostingRegressor(
                loss=learn_options['adaboost_loss'],
                learning_rate=learn_options['adaboost_learning_rate'],
                n_estimators=learn_options['adaboost_n_estimators'],
                alpha=learn_options['adaboost_alpha'],
                subsample=1.0,
                min_samples_split=2,
                min_samples_leaf=1,
                max_depth=learn_options['adaboost_max_depth'],
                init=None,
                random_state=None,
                max_features=None,
                verbose=0,
                max_leaf_nodes=None,
                warm_start=False)

            clf.fit(X[train], y[train].flatten())
            y_pred = clf.predict(X[test])[:, None]
        else:  # optimize the parameters if the adaboosted algorithm

            if learn_options["algorithm_hyperparam_search"] == "bo":
                print

                from hyperopt import hp, fmin, tpe, rand

                def adaboost_scoring_bo(params):
                    # label_encoder = sklearn.preprocessing.LabelEncoder()
                    # label_encoder.fit(y_all['Target gene'].values[train])
                    # gene_classes = label_encoder.transform(y_all['Target gene'].values[train])
                    # n_folds = len(np.unique(gene_classes))
                    cv = sklearn.cross_validation.KFold(
                        y_all['Target gene'].values[train].shape[0],
                        n_folds=20,
                        shuffle=True)
                    est = en.GradientBoostingRegressor(
                        n_estimators=1000,
                        learning_rate=params['learning_rate'],
                        max_depth=params['max_depth'],
                        min_samples_leaf=params['min_samples_leaf'],
                        max_features=params['max_features'])
                    scorer = cross_val_score(est,
                                             X[train],
                                             y[train].flatten(),
                                             cv=cv,
                                             n_jobs=20)
                    return np.median(scorer)

                space = {
                    'learning_rate': hp.uniform('learning_rate', 0.001, 0.1),
                    'max_depth': hp.quniform('max_depth', 1, 8, 1),
                    'min_samples_leaf': hp.quniform('min_samples_leaf', 3, 20,
                                                    1),
                    'max_features': hp.uniform('max_features', 0.05, 1.0)
                }

                best = fmin(adaboost_scoring_bo,
                            space,
                            algo=tpe.suggest,
                            max_evals=50,
                            verbose=1)
                print best
                clf = en.GradientBoostingRegressor(
                    n_estimators=learn_options['adaboost_n_estimators'],
                    learning_rate=best['learning_rate'],
                    max_depth=best['max_depth'],
                    min_samples_leaf=best['min_samples_leaf'],
                    max_features=best['max_features'])

                clf.fit(X[train], y[train].flatten())
            elif learn_options["algorithm_hyperparam_search"] == "grid":
                n_jobs = 20

                print "Adaboost with GridSearch"
                from sklearn.grid_search import GridSearchCV
                #param_grid = {'learning_rate': [0.1, 0.05, 0.01],
                #              'max_depth': [4, 5, 6, 7],
                #              'min_samples_leaf': [5, 7, 10, 12, 15],
                #              'max_features': [1.0, 0.5, 0.3, 0.1]}
                param_grid = {
                    'learning_rate': [0.1, 0.01],
                    'max_depth': [4, 7],
                    'min_samples_leaf': [5, 15],
                    'max_features': [1.0, 0.1]
                }

                label_encoder = sklearn.preprocessing.LabelEncoder()
                label_encoder.fit(y_all['Target gene'].values[train])
                gene_classes = label_encoder.transform(
                    y_all['Target gene'].values[train])
                n_folds = len(np.unique(gene_classes))
                cv = sklearn.cross_validation.StratifiedKFold(gene_classes,
                                                              n_folds=n_folds,
                                                              shuffle=True)

                est = en.GradientBoostingRegressor(
                    loss=learn_options['adaboost_loss'],
                    n_estimators=learn_options['adaboost_n_estimators'])
                clf = GridSearchCV(est,
                                   param_grid,
                                   n_jobs=n_jobs,
                                   verbose=1,
                                   cv=cv,
                                   scoring=spearman_scoring,
                                   iid=False).fit(X[train], y[train].flatten())
                print clf.best_params_
            else:
                raise Exception(
                    "if using adaboost_CV then need to specify grid (grid search) or bo (bayesian optimization)"
                )

            y_pred = clf.predict(X[test])[:, None]
    else:
        raise NotImplementedError

    return y_pred, clf
Ejemplo n.º 36
0
    def _hyperopt(self,
                  X,
                  y,
                  params,
                  random_state=1,
                  higher_better=True,
                  n_iter=50):
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=0.3, random_state=random_state)
        train_data = lgb.Dataset(X_train, label=y_train)
        valid_data = lgb.Dataset(X_val, label=y_val)

        space = {
            "learning_rate":
            hp.loguniform("learning_rate", np.log(0.001), np.log(0.07)),
            "max_depth":
            hp.choice("max_depth", [3, 4]),
            "num_leaves":
            hp.choice("num_leaves", np.linspace(10, 150, 50, dtype=int)),
            "feature_fraction":
            hp.quniform("feature_fraction", 0.5, 0.99, 0.1),
            "bagging_fraction":
            hp.quniform("bagging_fraction", 0.5, 0.99, 0.1),
            "bagging_freq":
            hp.choice("bagging_freq", np.linspace(0, 50, 10, dtype=int)),
            "reg_alpha":
            hp.uniform("reg_alpha", 0, 2),
            "reg_lambda":
            hp.uniform("reg_lambda", 0, 2),
            "min_child_weight":
            hp.uniform('min_child_weight', 0.5, 10),
        }

        def objective(hyperparams):
            model = lgb.train({
                **params,
                **hyperparams
            },
                              train_data,
                              n_iter,
                              valid_data,
                              early_stopping_rounds=25,
                              verbose_eval=0)
            score = model.best_score["valid_0"][params["metric"]]
            if higher_better:
                return {'loss': -score, 'status': STATUS_OK}
            else:
                return {'loss': score, 'status': STATUS_OK}

        trials = Trials()
        best = fmin(fn=objective,
                    space=space,
                    trials=trials,
                    algo=tpe.suggest,
                    max_evals=2,
                    verbose=1,
                    rstate=np.random.RandomState(1))

        hyperparams = space_eval(space, best)
        log(f"{params['metric']} = {-trials.best_trial['result']['loss']:0.4f} {hyperparams}"
            )
        return hyperparams
Ejemplo n.º 37
0
    def hyper_tune(self, early_stop=100, num_trial=100):
        """
        Hyper Parameter Tuning for LightGBM classifier

        Parameters
        ----------
        early_stop: Int
            Number of iterations to perform before stopping hyperparameter tuning models
        num_trial: Int
            Number of trials to explore in the hyperparameter tuning space

        Returns
        -------
        best_hyperparams: Dict
            SEt of LightGBM Hyperparameters that lead to best performance on the validation set
        """
        # define hyperparameters need to be tuned and space to search over
        space = {
            "num_leaves": hp.quniform("num_leaves", 10, 300, 10),
            "min_data_in_leaf": hp.quniform("min_data_in_leaf", 30, 400, 10),
            "learning_rate": hp.uniform("learning_rate", 0.01, 0.2),
            "lambda_l1": hp.uniform("lambda_l1", 0.01, 0.2),
            "lambda_l2": hp.uniform("lambda_l2", 0, 0.2),
            "max_bin": hp.quniform("max_bin", 10, 200, 10),
            "bagging_fraction": hp.uniform("bagging_fraction", 0.1, 0.9),
            "feature_fraction": hp.uniform("feature_fraction", 0.1, 0.9),
        }

        # define the hyperparameter tuning goal
        def objective(space):
            params = {
                "num_leaves": int(space["num_leaves"]),
                "min_data_in_leaf": int(space["min_data_in_leaf"]),
                "learning_rate": space["learning_rate"],
                "lambda_l1": space["lambda_l1"],
                "lambda_l2": space["lambda_l2"],
                "max_bin": int(space["max_bin"]),
                "bagging_fraction": space["bagging_fraction"],
                "feature_fraction": space["feature_fraction"],
                "objective": "regression",
                "max_depth": -1,
                "boosting": "goss",
                "bagging_seed": 11,
                "n_estimators": 99999999,
                "verbosity": -1,
                "nthread": -1,
                "random_state": 1024,
                "metric": "l2"
            }

            lgbm = LGBMRegressor(**params)
            lgbm.fit(
                self.X_train,
                self.y_train,
                eval_set=[(self.X_val, self.y_val)],
                eval_metric="l2",
                early_stopping_rounds=early_stop,
            )

            valid_pred = lgbm.predict(self.X_val)
            valid_pred = clip_outputs(valid_pred, self.clip_min, self.clip_max)

            valid_pred = np.around(valid_pred).astype(int)
            score_acc = accuracy_score(self.y_val, valid_pred)

            print("SCORE:", score_acc)
            return {"loss": -score_acc, "status": STATUS_OK}

        # set the hyperparameter tunning configurations and start tuning
        trials = Trials()
        rstate_generated = np.random.default_rng(1024)
        best_hyperparams = fmin(
            fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=num_trial,
            trials=trials,
            rstate=rstate_generated,
        )

        # print out the best hyperparameters
        print("finish tuning, get the best parameters", best_hyperparams)
        return best_hyperparams
Ejemplo n.º 38
0
    def hyper_tune(self, early_stop=100, num_trial=100):
        """
        Hyper Parameter Tuning for LightGBM classifier

        Parameters
        ----------
        early_stop: Int
            Number of iterations to perform before stopping hyperparameter tuning models
        num_trial: Int
            Number of trials to explore in the hyperparameter tuning space

        Returns
        -------
        best_hyperparams: Dict
            SEt of LightGBM Hyperparameters that lead to best performance on the validation set
        """
        # define hyperparameters need to be tuned and space to search over
        space = {
            'num_leaves': hp.quniform('num_leaves', 10, 300, 10),
            'min_data_in_leaf': hp.quniform('min_data_in_leaf', 30, 400, 10),
            'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
            "lambda_l1": hp.uniform("lambda_l1", 0.01, 0.2),
            "lambda_l2": hp.uniform("lambda_l2", 0, 0.2),
            'max_bin': hp.quniform('max_bin', 10, 200, 10),
            'bagging_fraction': hp.uniform('bagging_fraction', 0.1, 0.9),
            'feature_fraction': hp.uniform('feature_fraction', 0.1, 0.9)
        }

        # define the hyperparameter tuning goal
        def objective(space):
            params = {
                'num_leaves': int(space['num_leaves']),
                'min_data_in_leaf': int(space['min_data_in_leaf']),
                'learning_rate': space['learning_rate'],
                "lambda_l1": space["lambda_l1"],
                "lambda_l2": space["lambda_l2"],
                "max_bin": int(space['max_bin']),
                "bagging_fraction": space['bagging_fraction'],
                "feature_fraction": space['feature_fraction'],
                'objective': 'binary',
                'max_depth': -1,
                "boosting": "goss",
                "bagging_seed": 11,
                'n_estimators': 99999999,
                "verbosity": -1,
                "nthread": -1,
                "random_state": 1024,
                'metric': 'auc'
            }

            lgbm = LGBMClassifier(**params)
            lgbm.fit(self.X_train,
                     self.y_train,
                     eval_set=[(self.X_val, self.y_val)],
                     eval_metric='AUC',
                     early_stopping_rounds=early_stop)

            valid_pred_prob = lgbm.predict_proba(self.X_val)[:, 1]

            score_auc = roc_auc_score(self.y_val, valid_pred_prob)
            valid_pred = lgbm.predict(self.X_val)

            # score_acc = accuracy_score(self.y_val, valid_pred)
            precision, recall, f, _ = precision_recall_fscore_support(
                self.y_val, valid_pred, average='macro')

            print("SCORE:", score_auc)
            return {'loss': -score_auc, 'status': STATUS_OK}

        # set the hyperparameter tunning configurations and start tuning
        trials = Trials()
        rstate_generated = np.random.default_rng(1024)
        best_hyperparams = fmin(fn=objective,
                                space=space,
                                algo=tpe.suggest,
                                max_evals=num_trial,
                                trials=trials,
                                rstate=rstate_generated)

        # print out the best hyperparameters
        print("finish tuning, get the best parameters", best_hyperparams)
        return best_hyperparams
Ejemplo n.º 39
0
    def model(self):
        #cname = sys._getframe().f_code.co_name
        cname = 'et'
        train, y, test = self.train_, self.y_, self.test_
        train.drop('id', axis=1, inplace=True)
        test.drop('id', axis=1, inplace=True)
        from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval

        def step_et(params):
            clf = ensemble.ExtraTreesRegressor(**params)
            cv = model_selection.cross_val_score(clf,
                                                 train,
                                                 y,
                                                 scoring=metrics.make_scorer(
                                                     metrics.log_loss),
                                                 cv=5,
                                                 n_jobs=-2)
            score = np.mean(cv)
            print(cname, score, params, self.now())
            return dict(loss=score, status=STATUS_OK)

        space_et = dict(n_estimators=hp.choice('n_estimators', range(50,
                                                                     1500)),
                        min_samples_split=hp.choice('min_samples_split',
                                                    range(2, 10)),
                        min_samples_leaf=hp.choice('min_samples_leaf',
                                                   range(1, 10)),
                        max_features=hp.choice(
                            'max_features', range(4, min(20, train.shape[1]))),
                        random_state=1)
        trs = self.load('et_trials')
        if trs == None or self.debug_:
            tr = Trials()
        else:
            tr, _ = trs
        if len(tr.trials) > 0:
            print('reusing %d trials, best was:' % (len(tr.trials)),
                  space_eval(space_et, tr.argmin))
            best = tr.argmin
        while len(tr.trials) < 30:
            print(len(tr.trials), end=' ')
            best = fmin(step_et,
                        space_et,
                        algo=partial(tpe.suggest, n_startup_jobs=1),
                        max_evals=len(tr.trials) + 1,
                        trials=tr)
            self.save('et_trials', (tr, space_et))
        et_params = space_eval(space_et, best)
        print(et_params)

        N_splits = self.num_splits_
        N_seeds = self.num_seeds_

        v, z = self.v_, self.z_
        skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
        cv = []
        for s in range(N_seeds):
            scores = []
            cname2 = cname + str(s)
            v[cname2], z[cname2] = 0, 0
            et_params['random_state'] = s + 4242
            for n, (itrain, ival) in enumerate(skf.split(train, y)):
                clf = ensemble.ExtraTreesRegressor(**et_params)
                clf.fit(train.ix[itrain], y[itrain])
                p = clf.predict(train.ix[ival])
                v.loc[ival, cname2] += p
                score = metrics.log_loss(y[ival], p)
                z[cname2] += clf.predict(test)
                print(
                    cname, 'seed %d step %d of %d: ' %
                    (et_params['random_state'], n + 1, skf.n_splits), score,
                    self.now())
                scores.append(score)
            z[cname2] /= N_splits
            cv.append(np.mean(scores))
            print('seed %d loss: ' % (et_params['random_state']), scores,
                  np.mean(scores), np.std(scores))
            z['y'] = z[cname2]

        print('cv:', cv, np.mean(cv), np.std(cv))
        return cv, None
Ejemplo n.º 40
0
    def suggest(self, history, searchspace):
        """
        Suggest params to maximize an objective function based on the
        function evaluation history using a tree of Parzen estimators (TPE),
        as implemented in the hyperopt package.

        Use of this function requires that hyperopt be installed.
        """
        # This function is very odd, because as far as I can tell there's
        # no real documented API for any of the internals of hyperopt. Its
        # execution model is that hyperopt calls your objective function
        # (instead of merely providing you with suggested points, and then
        # you calling the function yourself), and its very tricky (for me)
        # to use the internal hyperopt data structures to get these predictions
        # out directly.

        # so they path we take in this function is to construct a synthetic
        # hyperopt.Trials database which from the `history`, and then call
        # hyoperopt.fmin with a dummy objective function that logs the value
        # used, and then return that value to our client.

        # The form of the hyperopt.Trials database isn't really documented in
        # the code -- most of this comes from reverse engineering it, by
        # running fmin() on a simple function and then inspecting the form of
        # the resulting trials object.
        if 'hyperopt' not in sys.modules:
            raise ImportError('No module named hyperopt')

        random = check_random_state(self.seed)
        hp_searchspace = searchspace.to_hyperopt()

        trials = Trials()
        for i, (params, scores, status) in enumerate(history):
            if status == 'SUCCEEDED':
                # we're doing maximization, hyperopt.fmin() does minimization,
                # so we need to swap the sign
                result = {'loss': -np.mean(scores), 'status': STATUS_OK}
            elif status == 'PENDING':
                result = {'status': STATUS_RUNNING}
            elif status == 'FAILED':
                result = {'status': STATUS_FAIL}
            else:
                raise RuntimeError('unrecognized status: %s' % status)

            # the vals key in the trials dict is basically just the params
            # dict, but enum variables (hyperopt hp.choice() nodes) are
            # different, because the index of the parameter is specified
            # in vals, not the parameter itself.

            vals = {}
            for var in searchspace:
                if isinstance(var, EnumVariable):
                    # get the index in the choices of the parameter, and use
                    # that.
                    matches = [
                        i for i, c in enumerate(var.choices)
                        if c == params[var.name]
                    ]
                    assert len(matches) == 1
                    vals[var.name] = matches
                else:
                    # the other big difference is that all of the param values
                    # are wrapped in length-1 lists.
                    vals[var.name] = [params[var.name]]

            trials.insert_trial_doc({
                'misc': {
                    'cmd': ('domain_attachment', 'FMinIter_Domain'),
                    'idxs': dict((k, [i]) for k in hp_searchspace.keys()),
                    'tid': i,
                    'vals': vals,
                    'workdir': None
                },
                'result': result,
                'tid': i,
                # bunch of fixed fields that hyperopt seems to require
                'owner': None,
                'spec': None,
                'state': 2,
                'book_time': None,
                'exp_key': None,
                'refresh_time': None,
                'version': 0
            })

        trials.refresh()
        chosen_params_container = []

        def suggest(*args, **kwargs):
            return tpe.suggest(*args,
                               **kwargs,
                               gamma=self.gamma,
                               n_startup_jobs=self.seeds)

        def mock_fn(x):
            # http://stackoverflow.com/a/3190783/1079728
            # to get around no nonlocal keywork in python2
            chosen_params_container.append(x)
            return 0

        fmin(fn=mock_fn,
             algo=suggest,
             space=hp_searchspace,
             trials=trials,
             max_evals=len(trials.trials) + 1,
             **self._hyperopt_fmin_random_kwarg(random))
        chosen_params = chosen_params_container[0]

        return chosen_params
Ejemplo n.º 41
0
df_tr, df_te = train_test_split(df, test_size=0.3, random_state=1)
dtrain = lgb.Dataset(
	df_tr.iloc[:, :-1],
	label=df_tr['income_label'],
	categorical_feature=categorical_columns,
	feature_name=all_columns,
	free_raw_data=False)

maxevals=100
early_stop_dict = {}
objective.i=0
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=maxevals,
            trials=trials)

best['num_boost_round'] = early_stop_dict[trials.best_trial['tid']]
best['num_leaves'] = int(best['num_leaves'])
best['verbose'] = -1
model = lgb.LGBMClassifier(**best)
model.fit(dtrain.data,
	dtrain.label,
	feature_name=all_columns,
	categorical_feature=categorical_columns)

X_te = df_te.iloc[:,:-1].values
y_te = df_te['income_label'].values
preds = model.predict(X_te)
Ejemplo n.º 42
0
def hyperopt_search(dataset, param_grid, n_iter, sample, trials_init):

    if trials_init:
        with open(trials_init, 'rb') as f:
            trials = pickle.load(f)
            n_trials_init = len(trials.trials)
    else:
        trials = Trials()
        n_trials_init = 0

    def objective(params):
        all_data = tmer2_gmtkn_parser("datasets/{}".format(dataset))
        sampled_data = random.sample(all_data, sample)
        sampled_systems = [k["atoms"] for k in sampled_data]
        sampled_stoichiometry = [k["stoichiometry"] for k in sampled_data]
        sampled_reference_value = [
            k["reference_value"] / 627.509 for k in sampled_data
        ]
        sampled_charges = [k["charges"] for k in sampled_data]
        sampled_multiplicities = [k["multiplicities"] for k in sampled_data]
        sampled_reactions = [k["reaction"] for k in sampled_data]

        logging.info("New iteration...")
        start = time.time()

        losses = []
        reactions = []

        for syst, stoich, ref_v, charges, multi, reacs in zip(
                sampled_systems, sampled_stoichiometry,
                sampled_reference_value, sampled_charges,
                sampled_multiplicities, sampled_reactions):
            output = calc_reaction(syst, stoich, ref_v, charges, multi, reacs,
                                   params)
            losses.append(output[0])
            reactions.append(output[1])

        reactions_losses = dict(zip(reactions, losses))

        logging.info("Total loss: {:.4f}".format(sum(losses)))
        logging.info("Iteration took {:.1f} s\n".format(time.time() - start))

        return {
            'loss': sum(losses) / sample,
            'params': params,
            'reactions_losses': reactions_losses,
            'status': STATUS_OK
        }

    current_time = time.strftime("%Y_%m_%d__%H_%M_%S", time.localtime())
    trials_filename = "{0}_{1}_iters_{2}_in_batch_{3}.pickle".format(
        dataset, n_iter, sample, current_time)

    for n in range(n_trials_init + 1, n_iter + 1):
        try:
            fmin(objective,
                 param_grid,
                 algo=tpe.suggest,
                 trials=trials,
                 max_evals=n)
            with open(trials_filename, 'wb') as f:
                pickle.dump(trials, f)
        except IndexError as err:
            print(err)
            with open(trials_filename, 'rb') as f:
                trials = pickle.load(f)

    return trials_filename
Ejemplo n.º 43
0
    def optimize(self,
                 max_evals: Optional[int] = None,
                 epoch: Optional[int] = None,
                 build: Optional[int] = None) -> float:

        target_build = build if build else self.latest_build(
            stage='dev') if self.latest_build(stage='dev') else 0
        # if _restored_model_info was set and we are in optimizing state
        # we are resuming optimization, load trials for this evaluation
        if (self.state
                and self.state.type == ModelDescriptorStateType.optimizing
                and hasattr(self, '_restored_model_info')
                and self._restored_model_info and isfile(self.trials_path)):

            trials = pickle.load(open(self.trials_path, 'rb'))
            completed_evals = self.state.completed_evals
            target_evals = self.state.target_evals
        else:
            trials = Trials()
            # to make sure we don't hit warning on resume before first epoch completes
            pickle.dump(trials, open(self.trials_path, 'wb'))
            completed_evals = 0
            target_evals = max_evals if max_evals else 10

            self._update_state(
                ModelDescriptorOptimizingState(build=target_build,
                                               completed_evals=0,
                                               target_evals=max_evals))

        # set inside the objective
        best_run_path: Optional[str] = None

        def objective(space: Dict[str, Any]) -> Dict[str, Any]:
            nonlocal epoch
            nonlocal best_run_path

            if (self.state
                    and self.state.type == ModelDescriptorStateType.optimizing
                    and hasattr(self, '_restored_model_info')
                    and self._restored_model_info
                    and isfile(self.trials_path)):

                model, model_state, history_dict = self._restored_model_info
                self._restored_model_info = None
                # model_state target_epoch should be used for all subsequent trainings
                epoch = model_state.training_target_epochs

                print(
                    f'Hyperopt eval: {self.state.completed_evals + 1}/{self.state.target_evals}, restored model'
                )
            else:
                model = self.create_model(space)
                model_state = TrainingState(version=self._version,
                                            build=target_build)
                model_state.training_target_epochs += epoch if epoch else 10
                model_state.mode = ModelDescriptorStateType.optimizing.value
                print(
                    f'Hyperopt eval: {self.state.completed_evals + 1}/{self.state.target_evals}, fresh model'
                )

            best_run_path = f'{self.data_path}/dev/{model_state.build}.v{model_state.version}/best_hparam_space.json'
            validation_loss = self._train_dev_core(model, model_state, {})
            self.state.completed_evals += 1
            self.state.save_to_file(self.descriptor_path)
            return {'loss': validation_loss, 'status': STATUS_OK}

        best_run = None
        for i in range(completed_evals, target_evals):
            best_run = fmin(objective,
                            space=self.hyperopt_space(),
                            algo=tpe.suggest,
                            max_evals=i + 1,
                            trials=trials,
                            verbose=1)
            pickle.dump(trials, open(self.trials_path, 'wb'))

        self._update_state(None)
        if isfile(self.trials_path):
            remove(self.trials_path)

        if best_run_path:
            with open(best_run_path, 'w') as json_file:
                print(json.dumps(best_run), file=json_file)
        else:
            warn(
                'best_run_path is not set and thus best hyperparameter space is not preserved'
            )

        losses = trials.losses()
        best_losses_index = numpy.argmin(losses)
        return losses[best_losses_index]
Ejemplo n.º 44
0
def ensembleSelection(feat_folder,
                      model_folder,
                      model_list,
                      cdf,
                      cdf_test,
                      subm_prefix,
                      hypteropt_max_evals=10,
                      w_min=-1.,
                      w_max=1.,
                      bagging_replacement=False,
                      bagging_fraction=0.5,
                      bagging_size=10,
                      init_top_k=5,
                      prunning_fraction=0.2):
    ## load all the prediction
    maxNumValid = 12000
    pred_list_valid = np.zeros(
        (len(model_list), config.n_runs, config.n_folds, maxNumValid),
        dtype=float)  # 一共四维
    Y_list_valid = np.zeros((config.n_runs, config.n_folds, maxNumValid),
                            dtype=float)
    cdf_list_valid = np.zeros(
        (config.n_runs, config.n_folds, config.n_classes), dtype=float)
    numValidMatrix = np.zeros((config.n_runs, config.n_folds), dtype=int)
    p_ens_list_valid = np.zeros((config.n_runs, config.n_folds, maxNumValid),
                                dtype=float)

    numTest = 22513

    ## model to idx
    model2idx = dict()
    kappa_list = dict()
    for i, model in enumerate(model_list):
        model2idx[model] = i
        kappa_list[model] = 0
    print("============================================================")
    print("Load model...")
    for model in model_list:
        model_id = model2idx[model]
        print("model: %s" % model)
        kappa_cv = np.zeros((config.n_runs, config.n_folds), dtype=float)
        ## load cvf
        for run in range(config.n_runs):
            for fold in range(config.n_folds):
                path = "%s/Run%d/Fold%d" % (model_folder, run + 1, fold + 1)
                pred_file = "%s/valid.pred.%s.csv" % (path, model)
                cdf_file = "%s/Run%d/Fold%d/valid.cdf" % (feat_folder, run + 1,
                                                          fold + 1)
                this_p_valid = pd.read_csv(pred_file, dtype=float)
                numValidMatrix[run][fold] = this_p_valid.shape[0]
                pred_list_valid[
                    model_id, run,
                    fold, :numValidMatrix[run][fold]] = this_p_valid[
                        "prediction"].values  # 存预测值
                Y_list_valid[run,
                             fold, :numValidMatrix[run][fold]] = this_p_valid[
                                 "target"].values  # 存目标值
                ## load cdf
                if cdf == None:
                    cdf_list_valid[run, fold, :] = np.loadtxt(cdf_file,
                                                              dtype=float)
                else:
                    cdf_list_valid[run, fold, :] = cdf
                ## 计算kappa
                score = getScore(
                    pred_list_valid[model_id, run,
                                    fold, :numValidMatrix[run][fold]],
                    cdf_list_valid[run, fold, :])
                kappa_cv[run][fold] = quadratic_weighted_kappa(
                    score, Y_list_valid[run, fold, :numValidMatrix[run][fold]])

        print("kappa: %.6f" % np.mean(kappa_cv))
        kappa_list[model] = np.mean(kappa_cv)

    cdf_mean_init = np.mean(np.mean(cdf_list_valid, axis=0), axis=0)
    cdf_mean_init = cdf_mean_init.tolist()
    cdf_mean_init.insert(0, 0)
    pdf_mean_init = np.diff(np.asarray(cdf_mean_init))

    sorted_models = sorted(kappa_list.items(), key=lambda x: x[1])[::-1]

    # greedy ensemble
    print("============================================================")
    print("Perform ensemble selection...")
    best_bagged_model_list = [[]] * bagging_size
    best_bagged_model_weight = [[]] * bagging_size
    num_model = len(model_list)
    #print bagging_size
    for bagging_iter in range(bagging_size):
        rng = np.random.RandomState(2015 + 100 * bagging_iter)
        if bagging_replacement:
            sampleSize = int(num_model * bagging_fraction)
            index_base = rng.randint(num_model, size=sampleSize)
        else:
            randnum = rng.uniform(size=num_model)
            index_base = [
                i for i in range(num_model) if randnum[i] < bagging_fraction
            ]
        this_sorted_models = [sorted_models[i] for i in sorted(index_base)]

        #print this_model_list
        best_model_list = []
        best_model_weight = []
        best_kappa = 0
        best_model = None
        p_ens_list_valid_tmp = np.zeros(
            (config.n_runs, config.n_folds, maxNumValid), dtype=float)
        #### initialization
        w_ens, this_w = 0, 1.0
        if init_top_k > 0:
            cnt = 0
            kappa_cv = np.zeros((config.n_runs, config.n_folds), dtype=float)
            for model, kappa in this_sorted_models:
                if cnt >= init_top_k:
                    continue
                print("add to the ensembles the following model")
                print("model: %s" % model)
                print("kappa: %.6f" % kappa)
                this_p_list_valid = pred_list_valid[model2idx[model]]
                for run in range(config.n_runs):
                    for fold in range(config.n_folds):
                        numValid = numValidMatrix[run][fold]
                        if cnt == 0:
                            this_w = 1.0
                        else:
                            pass
                        p_ens_list_valid_tmp[run, fold, :numValid] = (
                            w_ens * p_ens_list_valid_tmp[run, fold, :numValid]
                            + this_w * this_p_list_valid[run, fold, :numValid]
                        ) / (w_ens + this_w)
                        #p_ens_list_valid_tmp[run,fold,:numValid] = p_ens_list_valid_tmp[run,fold,:numValid].argsort().argsort()
                        if cnt == init_top_k - 1:
                            cdf = cdf_list_valid[run, fold, :]
                            true_label = Y_list_valid[run, fold, :numValid]
                            score = getScore(
                                p_ens_list_valid_tmp[run, fold, :numValid],
                                cdf)
                            kappa_cv[run][fold] = quadratic_weighted_kappa(
                                score, true_label)
                best_model_list.append(model)
                best_model_weight.append(this_w)
                w_ens += this_w
                cnt += 1
            print("Init kappa: %.6f (%.6f)" %
                  (np.mean(kappa_cv), np.std(kappa_cv)))
        #### ensemble selection with replacement
        iter = 0
        while True:
            iter += 1
            for model, _ in this_sorted_models:
                this_p_list_valid = pred_list_valid[model2idx[model]]

                ## hyperopt for the best weight
                trials = Trials()
                param_space = {'weight2': hp.uniform('weight2', w_min, w_max)}
                obj = lambda param: ensembleSelectionObj(
                    param, p_ens_list_valid_tmp, 1., this_p_list_valid,
                    Y_list_valid, cdf_list_valid, numValidMatrix)
                best_params = fmin(obj,
                                   param_space,
                                   algo=tpe.suggest,
                                   trials=trials,
                                   max_evals=hypteropt_max_evals)
                this_w = best_params['weight2']
                this_w *= w_ens
                # all the current prediction to the ensemble
                kappa_cv = np.zeros((config.n_runs, config.n_folds),
                                    dtype=float)
                for run in range(config.n_runs):
                    for fold in range(config.n_folds):
                        numValid = numValidMatrix[run][fold]
                        p1 = p_ens_list_valid_tmp[run, fold, :numValid]
                        p2 = this_p_list_valid[run, fold, :numValid]
                        true_label = Y_list_valid[run, fold, :numValid]
                        cdf = cdf_list_valid[run, fold, :]
                        p_ens = (w_ens * p1 + this_w * p2) / (w_ens + this_w)
                        score = getScore(p_ens, cdf)
                        kappa_cv[run][fold] = quadratic_weighted_kappa(
                            score, true_label)
                if np.mean(kappa_cv) > best_kappa:
                    best_kappa, best_model, best_weight = np.mean(
                        kappa_cv), model, this_w
            if best_model == None:
                break
            print("Iter: %d" % iter)
            print("    model: %s" % best_model)
            print("    weight: %s" % best_weight)
            print("    kappa: %.6f" % best_kappa)

            best_model_list.append(best_model)
            best_model_weight.append(best_weight)
            # valid
            this_p_list_valid = pred_list_valid[model2idx[best_model]]
            for run in range(config.n_runs):
                for fold in range(config.n_folds):
                    numValid = numValidMatrix[run][fold]
                    p_ens_list_valid_tmp[run, fold, :numValid] = (
                        w_ens * p_ens_list_valid_tmp[run, fold, :numValid] +
                        best_weight * this_p_list_valid[run, fold, :numValid]
                    ) / (w_ens + best_weight)
            best_model = None
            w_ens += best_weight

        kappa_cv = np.zeros((config.n_runs, config.n_folds), dtype=float)
        cutoff = np.zeros((3), dtype=float)
        for run in range(config.n_runs):
            for fold in range(config.n_folds):
                numValid = numValidMatrix[run][fold]
                true_label = Y_list_valid[run, fold, :numValid]
                cdf = cdf_list_valid[run, fold, :]
                p_ens_list_valid[run, fold, :numValid] = (
                    bagging_iter * p_ens_list_valid[run, fold, :numValid] +
                    p_ens_list_valid_tmp[run, fold, :numValid]) / (
                        bagging_iter + 1.)
                score, cutoff_tmp = getScore(
                    p_ens_list_valid[run, fold, :numValid], cdf, "valid")
                kappa_cv[run][fold] = quadratic_weighted_kappa(
                    score, true_label)
                cutoff += cutoff_tmp
        cutoff /= float(config.n_runs * config.n_folds)
        cutoff *= (22513 / ((2. / 3) * 10158))
        print("Bag %d, kappa: %.6f (%.6f)" %
              (bagging_iter + 1, np.mean(kappa_cv), np.std(kappa_cv)))
        best_kappa_mean = np.mean(kappa_cv)
        best_kappa_std = np.std(kappa_cv)
        best_bagged_model_list[bagging_iter] = best_model_list
        best_bagged_model_weight[bagging_iter] = best_model_weight

        ## save the current prediction
        # use cdf
        output = ensembleSelectionPrediction(
            model_folder, best_bagged_model_list[:(bagging_iter + 1)],
            best_bagged_model_weight[:(bagging_iter + 1)], cdf_test)
        sub_file = "%s_[InitTopK%d]_[BaggingSize%d]_[BaggingFraction%s]_[Mean%.6f]_[Std%.6f]_cdf.csv" % (
            subm_prefix, init_top_k, bagging_iter + 1, bagging_fraction,
            best_kappa_mean, best_kappa_std)
        output.to_csv(sub_file, index=False)
        # use cutoff
        output = ensembleSelectionPrediction(
            model_folder, best_bagged_model_list[:(bagging_iter + 1)],
            best_bagged_model_weight[:(bagging_iter + 1)], cdf_test, cutoff)
        sub_file = "%s_[InitTopK%d]_[BaggingSize%d]_[BaggingFraction%s]_[Mean%.6f]_[Std%.6f]_cutoff.csv" % (
            subm_prefix, init_top_k, bagging_iter + 1, bagging_fraction,
            best_kappa_mean, best_kappa_std)
        output.to_csv(sub_file, index=False)
    return best_kappa_mean, best_kappa_std, best_bagged_model_list, best_bagged_model_weight
Ejemplo n.º 45
0
            colsample_bytree=parameter['colsample_bytree'],
            n_estimators=parameter['n_estimators'],
            random_state=2020,
            n_jobs=6)
        model.fit(train_x, train_y)

        # valid set
        valid_prediction = model.predict(valid_x)
        r2 = metrics.r2_score(valid_y, valid_prediction)
        return {'loss': -r2, 'status': STATUS_OK, 'model': model}

    # hyper parameter optimization
    trials = Trials()
    best = fmin(hyperopt_my_xgb,
                space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=50)
    print(best)

    # load the best model parameters
    args['max_depth'] = list(range(3, 10, 1))[best['max_depth']]
    args['min_child_weight'] = list(range(1, 6, 1))[best['min_child_weight']]
    args['gamma'] = [i / 50 for i in range(10)][best['gamma']]
    args['reg_lambda'] = [1e-5, 1e-2, 0.1, 1][best['reg_lambda']]
    args['reg_alpha'] = [1e-5, 1e-2, 0.1, 1][best['reg_alpha']]
    args['lr'] = [0.01, 0.05, 0.001, 0.005][best['lr']]
    args['n_estimators'] = list(range(100, 300, 20))[best['n_estimators']]
    args['colsample_bytree'] = [i / 100.0 for i in range(75, 90, 5)
                                ][best['colsample_bytree']]
    args['subsample'] = [i / 100.0
Ejemplo n.º 46
0
                      axis=1)

    ids_train = pd.read_pickle('data/drop_duplicates/ids_train.pkl',
                               compression='gzip')
    target = pd.read_pickle('data/drop_duplicates/target.pkl',
                            compression='gzip')

    train = train[list(
        set(train.columns.values) - set(col_nuls) - set(col_less_10))]

    xgb_train = xgb.DMatrix(train, label=target)

    trials = Trials()
    best = fmin(fn=score,
                space=space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=150)
'''
2017-08-31 15:22:56 - INFO - Training with params:
2017-08-31 15:22:56 - INFO - {'colsample_bytree': 1.0, 'eval_metric': 'auc', 'min_child_weight': 6, 'subsample': 0.6000000000000001, 'eta': 0.01, 'objective': 'binary:logistic', 'alpha': 0.9, 'booster': 'gbtree', 'seed': 2017, 'max_depth': 4, 'gamma': 0.05, 'lambda': 0.6000000000000001}
2017-08-31 15:42:43 - INFO - score = 0.701148
2017-08-31 15:42:43 - INFO - best_rounds = 1296.000000        
2017-08-31 16:06:34 - INFO - Training with params:
2017-08-31 16:06:34 - INFO - {'colsample_bytree': 1.0, 'eval_metric': 'auc', 'min_child_weight': 6, 'subsample': 0.6000000000000001, 'eta': 0.01, 'objective': 'binary:logistic', 'alpha': 0.5, 'booster': 'gbtree', 'seed': 2017, 'max_depth': 7, 'gamma': 0.01, 'lambda': 0.6000000000000001}
2017-08-31 16:32:25 - INFO - score = 0.700087
2017-08-31 16:32:25 - INFO - best_rounds = 970.000000
2017-08-31 16:32:25 - INFO - Training with params:
2017-08-31 16:32:25 - INFO - {'colsample_bytree': 0.9, 'eval_metric': 'auc', 'min_child_weight': 6, 'subsample': 0.6000000000000001, 'eta': 0.01, 'objective': 'binary:logistic', 'alpha': 0.4, 'booster': 'gbtree', 'seed': 2017, 'max_depth': 4, 'gamma': 0.03, 'lambda': 0.7000000000000001}
2017-08-31 16:50:31 - INFO - score = 0.700914
2017-08-31 16:50:31 - INFO - best_rounds = 1294.000000
Ejemplo n.º 47
0
    #   ("CB",   CatBoostClassifier,             params_CB,   0),
    #   ("NGB",  NGBClassifier,                  params_NGB,  0),
    #   ("RGF",  RGFClassifier,                  params_RGF,  0),
    #   ("FRGF", FastRGFClassifier,              params_FRGF, 0)
]

for model_name, model_fn, model_params, model_evals in models:

    print(f"{model_name}...")
    optimizeModel = partial(func2minimize,
                            model_name=model_name,
                            model_fn=model_fn)

    result = fmin(fn=optimizeModel,
                  space=model_params,
                  algo=tpe.suggest,
                  max_evals=model_evals,
                  trials=Trials())

    print(result)  # -> {'a': 1, 'c2': 0.01420615366247227}

#import hyperopt
#print(hyperopt.space_eval(params_space, result))  # -> ('case 2', 0.01420615366247227}

################################ Save HTML


def highlight_max(s):
    is_max = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_max]
    return get_tranformer_score(xrf)


def get_tranformer_score(tranformer):
    xrf = tranformer
    dpredict = xgb.DMatrix(X_test)
    prediction = xrf.predict(dpredict, ntree_limit=xrf.best_ntree_limit)

    return mean_squared_error(y_test, prediction)


#------------------------------------------06-------------------
algo = partial(tpe.suggest, n_startup_jobs=1)
best = fmin(xgboost_factory,
            space,
            algo=algo,
            max_evals=100,
            pass_expr_memo_ctrl=None)

#------------------------------------07------------
RMSE = xgboost_factory(best)
print('best :', best)
print('best param after transform :')
argsDict_tranform(best, isPrint=True)
print('rmse of the best xgboost:', np.sqrt(RMSE))
#

# # #
# d= joblib.load('threexgboost.pkl')
#
# y= d.predict(DATA)
        writer = csv.writer(log_handler)
        headers = ['trial_counter', 'log_loss_mean', 'log_loss_std', 'spend_time']
        for k, v in sorted(param_space.items()):
            headers.append(k)
        print(headers)
        writer.writerow(headers)
        log_handler.flush()

        print("************************************************************")
        print("Search for the best params")
        # global trial_counter
        trial_counter = 0
        trials = Trials()
        # lambda在这一步并不会运行,只是定义一个函数而已
        objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name)
        # objective放到fmin中,会被调用,且传进三个参数
        best_params = fmin(objective, param_space, algo=tpe.suggest,
                           trials=trials, max_evals=param_space["max_evals"])
        for f in int_feat:
            if f in best_params:
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k, v in best_params.items():
            print("        %s: %s" % (k, v))
        trial_log_losss = -np.asarray(trials.losses(), dtype=float)
        best_log_loss_mean = max(trial_log_losss)
        ind = np.where(trial_log_losss == best_log_loss_mean)[0][0]
        best_log_loss_std = trials.trial_attachments(trials.trials[ind])['std']
        print("log_loss stats")
        print("        Mean: %.6f\n        Std: %.6f" % (best_log_loss_mean, best_log_loss_std))
Ejemplo n.º 50
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        bandit = opt_q_uniform(self.target)
        prior_weight = 2.5
        gamma = 0.20
        algo = partial(
            tpe.suggest,
            prior_weight=prior_weight,
            n_startup_jobs=2,
            n_EI_candidates=128,
            gamma=gamma,
        )

        trials = Trials()
        fmin(passthrough,
             space=bandit.expr,
             algo=algo,
             trials=trials,
             max_evals=self.LEN)
        if self.show_vars:
            import hyperopt.plotting

            hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1)

        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        idxs = idxs["x"]
        vals = vals["x"]

        losses = trials.losses()

        from hyperopt.tpe import ap_split_trials
        from hyperopt.tpe import adaptive_parzen_samplers

        qu = scope.quniform(1.01, 10, 1)
        fn = adaptive_parzen_samplers["quniform"]
        fn_kwargs = dict(size=(4, ), rng=np.random)
        s_below = pyll.Literal()
        s_above = pyll.Literal()
        b_args = [s_below, prior_weight] + qu.pos_args
        b_post = fn(*b_args, **fn_kwargs)
        a_args = [s_above, prior_weight] + qu.pos_args
        a_post = fn(*a_args, **fn_kwargs)

        # print b_post
        # print a_post
        fn_lpdf = getattr(scope, a_post.name + "_lpdf")
        print(fn_lpdf)
        # calculate the llik of b_post under both distributions
        a_kwargs = dict([(n, a) for n, a in a_post.named_args
                         if n not in ("rng", "size")])
        b_kwargs = dict([(n, a) for n, a in b_post.named_args
                         if n not in ("rng", "size")])
        below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs)
        above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs)
        new_node = scope.broadcast_best(b_post, below_llik, above_llik)

        print("=" * 80)

        do_show = self.show_steps

        for ii in range(2, 9):
            if ii > len(idxs):
                break
            print("-" * 80)
            print("ROUND", ii)
            print("-" * 80)
            all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10]
            below, above = ap_split_trials(idxs[:ii], vals[:ii], idxs[:ii],
                                           losses[:ii], gamma)
            below = below.astype("int")
            above = above.astype("int")
            print("BB0", below)
            print("BB1", above)
            # print 'BELOW',  zip(range(100), np.bincount(below, minlength=11))
            # print 'ABOVE',  zip(range(100), np.bincount(above, minlength=11))
            memo = {b_post: all_vals, s_below: below, s_above: above}
            bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node],
                                       memo=memo)
            # print bl - al
            print("BB2", dict(list(zip(all_vals, bl - al))))
            print("BB3", dict(list(zip(all_vals, bl))))
            print("BB4", dict(list(zip(all_vals, al))))
            print("ORIG PICKED", vals[ii])
            print("PROPER OPT PICKS:", nv)

            # assert np.allclose(below, [3, 3, 9])
            # assert len(below) + len(above) == len(vals)

            if do_show:
                plt.subplot(8, 1, ii)
                # plt.scatter(all_vals,
                #    np.bincount(below, minlength=11)[2:], c='b')
                # plt.scatter(all_vals,
                #    np.bincount(above, minlength=11)[2:], c='c')
                plt.scatter(all_vals, bl, c="g")
                plt.scatter(all_vals, al, c="r")
        if do_show:
            plt.show()
Ejemplo n.º 51
0
    # for epochs in range(5):
    for ep in range(20):
        optimize_history = optimize_model.fit(train_data, H_t, batch_size=1, nb_epoch=1,
                                              validation_data=(val_data, H_val), shuffle=False)
        optimize_model.reset_states()

    loss_v = optimize_history.history['val_loss']
    print loss_v

    loss_out = loss_v[-1]

    return {'loss': loss_out, 'status': STATUS_OK}


trials = Trials()
best = fmin(objective, space, algo=tpe.suggest, trials=trials, max_evals=20)

#Building Stateful Model
lstm_hidden = hyperopt.space_eval(space, best)
print lstm_hidden
tsteps = 24
out_dim = 24

lstm_model = build_lstm_v1.lstm_model_110(lstm_hidden, train_data.shape[2], tsteps)
save_model = lstm_model

##callbacks for Early Stopping
callbacks = [EarlyStopping(monitor='val_loss', patience=3)]

#parameters for simulation
attempt_max = 5
Ejemplo n.º 52
0
             
             path_to_trials = path_to_setup + sname + '_' + str(class_ids[0]) + 'x' + str(class_ids[1]) + '.pkl'
             
             trials = base.Trials()
             try:
                 print('Trying to pickle file')
                 trials = pickle.load(open(path_to_trials, 'rb'))
             except:
                 print('No trial file at specified path, creating new one')
                 trials = base.Trials()
             else:
                 print('File found')
             
             try:
                 print('Size of object: ' + str(len(trials)))
                 best = fmin(objective, space=space, algo=tpe.suggest, max_evals=len(trials) + n_iter, trials=trials, verbose=1)
                 pickle.dump(trials, open(path_to_trials, 'wb'))
                 # print(suj, class_ids, best)
             except:
                 print('Exception raised')
                 pickle.dump(trials, open(path_to_trials, 'wb'))
                 # print('\n', suj, class_ids, trials.best_trial['misc']['vals'])
                 raise
             
             acc = (-1) * trials.best_trial['result']['loss']
             # print(suj, class_ids, str(round(acc*100,2))+'%')
             
             best = trials.best_trial['misc']['vals']
 
             df.append(np.r_[[suj, class_ids[0], class_ids[1], int(args['ncsp']), int(args['nbands']), args['acc'][0], acc, 
                               args['tmin'][0], args['tmax'][0], args['fl'][0], args['fh'][0], args['clf'][0], args['clf_details'][0]], 
Ejemplo n.º 53
0
                    num_boost_round=20,
                    valid_sets=lgb_eval,
                    early_stopping_rounds=5)

    # print('Save model...')
    # save model to file
    # gbm.save_model('model.txt')

    # print('Start predicting...')
    # predict
    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
    # eval
    print('The rmse of prediction is:',
          mean_squared_error(y_test, y_pred)**0.5)
    return mean_squared_error(y_test, y_pred)**0.5


space = {
    "learning_rate": hp.uniform("learning_rate", 0.01,
                                1),  #[0,1,2,3,4,5] -> [50,]
    "feature_fraction": hp.uniform("feature_fraction", 0.01,
                                   0.99),  #[0,1,2,3,4,5] -> 0.05,0.06
    "bagging_fraction": hp.uniform("bagging_fraction", 0.25,
                                   0.95),  #[0,1,2,3] -> [0.7,0.8,0.9,1.0]
    "bagging_freq": hp.randint("bagging_freq", 10),  #
}
algo = partial(tpe.suggest, n_startup_jobs=1)
best = fmin(GBM, space, algo=algo, max_evals=4)

print('best:' + str(best))
print('bgmBest' + str(GBM(best)))
Ejemplo n.º 54
0
    # store
    new_row = pd.DataFrame([np.append([avg_score], list(params.values()))],
                           columns=np.append(['score'], list(params.keys())))
    df_results = df_results.append(new_row, ignore_index=True)
    #np.savetxt('hyperopt_preds/pred' + str(df_results.index.max()) + '.txt', predict, fmt='%s')
    df_results.to_csv('hyperopt_results_sgd.csv')
    print("\tScore {0}\n\n".format(avg_score))
    return {'loss': -avg_score, 'status': STATUS_OK}


if __name__ == '__main__':
    train, target, test, _, _, _, _ = load_preprocessing()

    space = {
        'alpha': hp.loguniform('alpha', -7, -1),
        #'eta': hp.quniform('eta', 0.0001, 0.01, 0.0001),
        'penalty': 'elasticnet',
        'loss': 'log',
        #'class_weight': None,
        'power_t': 0.5,
        'n_jobs': -1,
        'l1_ratio': hp.uniform('l1_ratio', 0, 0.5)
    }
    df_results = pd.DataFrame(columns=np.append(['score'], list(space.keys())))
    best = fmin(eval_param, space, algo=tpe.suggest, max_evals=300)

    print(best)

    # Send a text message with twillio
    #from my_phone import send_text
    #send_text("Best params: " + str(best))
Ejemplo n.º 55
0
		evals=watchlist2)
	score = log_loss(dtestCV2.get_label(), model.predict(dtestCV2))
	print "\tScore {0}\n\n".format(score)
	return {'loss': score, 'status': STATUS_OK}

if __name__ == "__main__":
        """load data"""
	dtrain1 = xgb.DMatrix('dtrain1.buffer')
	dtestCV1 = xgb.DMatrix('dtestCV1.buffer')
	dtrain2 = xgb.DMatrix('dtrain2.buffer')
	dtestCV2 = xgb.DMatrix('dtestCV2.buffer')

        """Set the hyperparameter space"""
	space = {'eta' : hp.quniform('eta', 0.025, 0.5, 0.025),
		'max_depth' : hp.choice('max_depth', np.arange(1, 16, dtype=int)),
		'min_child_weight' : hp.quniform('min_child_weight', 1, 6, 1),
		'subsample' : hp.quniform('subsample', 0.5, 1, 0.05),
		'gamma' : hp.quniform('gamma', 0.5, 1, 0.05),
		'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05),
		'num_class' : 38,
		'eval_metric': 'mlogloss',
		'objective': 'multi:softprob'}

	"""Evaluate the loss and find the optima parameters"""  
	best1 = fmin(objective1, space=space, algo=tpe.suggest, max_evals=100)
	print "Optimal parameters for dtrain1 are: ", best1

	best2 = fmin(objective2, space=space, algo=tpe.suggest, max_evals=100)
	print "Optimal parameters for dtrain2 are: ", best2

Ejemplo n.º 56
0
    print input
    output_fd = open(data_file, 'a')
    for x in input:
        output_fd.write('%f ' % x)
    output_fd.write('P\n')
    output_fd.close()
    while True:
        input_fd = open(data_file, 'r')
        last_line = ''
        for line in input_fd:
            last_line = line
        strs = last_line.split()
        if len(strs) < len(input) + 1 or strs[len(input)] == 'P':
            input_fd.close()
            time.sleep(1)
            continue
        else:
            input_fd.close()
            return float(strs[len(input)])


search_space = (hp.uniform('lr', -5, 0), hp.quniform('slack', 0, 3, 1),
                hp.quniform('batchsize', 0, 3, 1))

os.system('rm %s' % data_file)
best = fmin(fn=my_func,
            space=search_space,
            algo=tpe.suggest,
            max_evals=10000000)
print best
def main():
    args = initialise_app()

    is_training = args["do_train"] != "False"
    is_precompile = args["precompile"]

    if not is_training and args["single_file"]:
        if args["early_gc_disable"]:
            gc.disable()

        # We need to predict at least once to pre-compile models.
        raw_sequence_data = PhysioNet2017DataSource()
        raw_sequence_data.load_single(args["single_file"])

        args_copy = dict(args)

        # No need to split into test / train set. We are only evaluating once.
        args_copy["test_set_fraction"] = 1.
        # No need to save meta params for single predictions.
        args_copy["meta_params_file"] = None

        seq_train = collect_level1_features(args_copy, raw_sequence_data.copy())

        # Don't waste cycles on GC - app is terminated after one prediction anyway.
        gc.disable()
    else:
        if args["cache_level1"]:
            raw_sequence_data = load_sequence_data(args)
            seq_train = collect_level1_features(args, raw_sequence_data)
            pickle.dump(seq_train,
                        open(args["cache_file"], "w"),
                        pickle.HIGHEST_PROTOCOL)
            print("INFO: Saved level 1 cache.", file=sys.stderr)
        else:
            print("INFO: Loading level 1 cache.", file=sys.stderr)
            seq_train = pickle.load(open(args["cache_file"], "r"))

        if args["load_meta_params"]:
            print("INFO: Loading meta params from file: ", args["load_meta_params"], file=sys.stderr)
            split_indices, _ = pickle.load(open(args["load_meta_params"], "r"))
        else:
            print("INFO: Not using meta params.", file=sys.stderr)
            split_indices = None

        print('INFO: Train set balance is: ', seq_train.get_class_balance(), file=sys.stderr)

    # Remove superfluous secondary output nodes.
    seq_train.x = np.concatenate((seq_train.x[:, :30:2],
                                  seq_train.x[:, 30:]),
                                 axis=-1)

    if not args["load_existing"]:
        from af_classifier.model_builder import ModelBuilder

        num_outputs = 4
        num_inputs = seq_train.x[0].shape[-1]

        nn_model = ModelBuilder.build_nn_model(num_inputs,
                                               num_outputs,
                                               p_dropout=float(args["dropout"]),
                                               num_units=int(args["num_units"]),
                                               learning_rate=float(args["learning_rate"]),
                                               num_layers=int(args["num_recurrent_layers"]),
                                               noise=float(args["noise"]))
    else:
        print("INFO: Loading existing model: ", args["load_existing"], file=sys.stderr)
        nn_model = ModelFactory.get_model(args["load_existing"])

    if is_training:
        from af_classifier.model_trainer import ModelTrainer

        if args["do_hyperopt"]:
            # Perform a hyper parameter search optimising loss.
            from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

            # Define hyper parameter space over which to do optimisation.
            space = {
                'num_layers': hp.uniform('num_layers', 1, 5),
                'num_units': hp.uniform('num_units', 64, 256),
                'dropout': hp.uniform('dropout', 0.4, .85),
                'num_epochs': hp.uniform('num_epochs', 120, 1500)
            }

            def train_hyper_opts(params):
                from sklearn.cross_validation import StratifiedKFold

                num_folds = int(1./float(args["test_set_fraction"]))

                seq_train.to_indexed()

                # Cross validation on validation set to select blender model hyperparameters.
                skf = StratifiedKFold(seq_train.y, n_folds=num_folds)

                scores = np.zeros(num_folds)
                for i, indices in enumerate(skf):
                    train_idx, test_idx = indices

                    # Prepare the training and test set for this fold.
                    train_set = seq_train.__class__(seq_train.x[train_idx],
                                                    seq_train.y[train_idx])
                    test_set = seq_train.__class__(seq_train.x[test_idx],
                                                   seq_train.y[test_idx])

                    train_set.to_categorical(num_outputs)
                    test_set.to_categorical(num_outputs)

                    num_inputs = train_set.x[0].shape[-1]

                    class_weight = None
                    if args["class_weighted"]:
                        class_weight = equal_class_weights(train_set)
                        print("INFO: Class weights are", class_weight, file=sys.stderr)

                    opt_model = ModelBuilder.build_nn_model(num_inputs,
                                                            num_outputs,
                                                            p_dropout=float(params["dropout"]),
                                                            num_units=int(np.round(params["num_units"])),
                                                            learning_rate=float(args["learning_rate"]),
                                                            num_layers=int(np.round(params["num_layers"])))

                    score, acc = ModelTrainer.train_model(opt_model,
                                                          train_set,
                                                          test_set,
                                                          int(params["num_epochs"]),
                                                          int(args["batch_size"]),
                                                          do_early_stopping=False,
                                                          with_checkpoints=False,
                                                          do_eval=True,
                                                          save_best_only=True,
                                                          checkpoint_path=join(args["output_directory"],
                                                                               args["model_name"]),
                                                          class_weight=class_weight,
                                                          report_min=False)

                    scores[i] = score

                seq_train.to_categorical(num_outputs)

                print("INFO: Tested with params:", params, file=sys.stderr)
                print("INFO: Scores were:", scores, file=sys.stderr)

                return {'loss': np.mean(scores), 'status': STATUS_OK}

            trials = Trials()
            best = fmin(train_hyper_opts, space, algo=tpe.suggest, max_evals=int(args["num_epochs"]), trials=trials)
            print("INFO: Best config was:", best, file=sys.stderr)
        else:
            seq_test = seq_train.copy()

            class_weight = None
            if args["class_weighted"]:
                class_weight = equal_class_weights(seq_train)
                print("INFO: Class weights are", class_weight, file=sys.stderr)

            ModelTrainer.train_model(nn_model,
                                     seq_train,
                                     seq_test,
                                     int(args["num_epochs"]),
                                     int(args["batch_size"]),
                                     do_early_stopping=False,
                                     with_checkpoints=True,
                                     do_eval=False,
                                     save_best_only=False,
                                     checkpoint_path=join(args["output_directory"], args["model_name"]),
                                     class_weight=class_weight)


    if args["single_file"]:
        # Run blender once to fully initialise it.
        # Without this the blender model would not be usable after unpickling.
        y = nn_model.predict(seq_train.x)

        # Select the maximum activation as our predicted class index.
        y_idx = np.argmax(y, axis=1)[0]

        print("INFO: Predictions are:", y, file=sys.stderr)
        print("F:", seq_train.x[0], file=sys.stderr)

        print_answer_line(basename(args["single_file"]), y_idx)

        if is_precompile:
            ModelFactory.save_all_models_precompiled()
    else:
        from af_classifier.model_trainer import ModelTrainer

        ModelTrainer.create_confusion_matrix(nn_model, seq_train)

        if not is_training:
            seq_validation = load_sequence_data({"dataset": args["level2_dataset"]})

            args_copy = dict(args)

            # No need to split into test / train set. We are only evaluating once.
            args_copy["test_set_fraction"] = 1.
            # No need to save meta params for single predictions.
            args_copy["meta_params_file"] = None

            seq_validation = collect_level1_features(args_copy, seq_validation)

            # Remove superfluous secondary output nodes.
            seq_validation.x = np.concatenate((seq_validation.x[:, :30:2],
                                               seq_validation.x[:, 30:]),
                                              axis=-1)

            y_pred = nn_model.predict(seq_validation.x)

            file_list, _ = PhysioNet2017DataSource.read_reference(args["level2_dataset"])

            # Prepare the answers.txt output for the entry.zip distribution.
            y_pred_idx = np.argmax(y_pred, axis=-1)
            for i, y_idx in enumerate(y_pred_idx):
                print_answer_line(file_list[i], y_idx)
def evaluate_agent(tasks, tier):
    """Evaluates the random agent on the given tasks/tier.

  Args:
      tasks: A list of task instances (strings) in the split to evaluate.
      tier: A string of the action tier.

  Returns:
      A Evaluator object updated with the results of all the siulations.
  """

    # Create a simulator for the task and tier.
    simulator = phyre.initialize_simulator(tasks, tier)
    evaluator = phyre.Evaluator(tasks)
    task_data_dict = phyre.loader.load_compiled_task_dict()
    empty_action = phyre.simulator.scene_if.UserInput()
    tasks_solved = 0
    for task_index in tqdm(range(len(tasks)), desc='Evaluate tasks'):
        task_id = tasks[task_index]
        task_data = task_data_dict[task_id]
        _, _, images, _ = phyre.simulator.magic_ponies(task_data,
                                                       empty_action,
                                                       need_images=True,
                                                       stride=100)

        evaluator.maybe_log_attempt(task_index,
                                    phyre.simulation_cache.NOT_SOLVED)

        seq_data = ImgToObj.getObjectAndGoalSequence(images)
        goal_type = ImgToObj.Layer.dynamic_goal.value
        if goal_type not in images[0]:
            goal_type = ImgToObj.Layer.static_goal.value

        simFunc = partial(evalAction,
                          initial_img=images[0],
                          seq_data=seq_data,
                          goal_type=goal_type,
                          simulator=simulator,
                          task_index=task_index,
                          evaluator=evaluator)
        space = {
            'x': hp.uniform('x', 0, 1),
            'y': hp.uniform('y', 0, 1),
            'r': hp.uniform('r', 0, 1),
        }
        trials = Trials()

        max_evals = 0

        solved_task = False
        best_score = 0

        while evaluator.get_attempts_for_task(
                task_index) < phyre.MAX_TEST_ATTEMPTS and not solved_task:
            max_evals += phyre.MAX_TEST_ATTEMPTS - evaluator.get_attempts_for_task(
                task_index)
            if best_score > -1.0:
                best = fmin(simFunc,
                            space=space,
                            algo=hyperopt.rand.suggest,
                            max_evals=max_evals,
                            trials=trials,
                            rstate=random.seed(0),
                            show_progressbar=False)
            else:
                best = fmin(simFunc,
                            space=space,
                            algo=tpe.suggest,
                            max_evals=max_evals,
                            trials=trials,
                            rstate=random.seed(0),
                            show_progressbar=False)
            counter = Counter(result['solved'] for result in trials.results)
            solved_task = counter[True] > 0
            tasks_solved += solved_task
            best_score = trials.best_trial['result']['loss']

    print(tasks_solved, "Tasks solved out of ", len(tasks), "Total Tasks")
    return (evaluator.get_aucess(), tasks_solved, len(tasks))
Ejemplo n.º 59
0
def hyper(corpus_directory, word_delimiter="|", tag_delimiter="/",
          num_step=60, valid_split=0.1, epochs=5, shuffle=False):
    """Hyperas"""

    # Initialize global variable
    globals()['num_step'] = num_step
    globals()['epochs'] = epochs
    globals()['shuffle'] = shuffle

    # Load train dataset
    train_dataset = Corpus(corpus_directory, word_delimiter, tag_delimiter)

    # Create index for character and tag
    char_index = index_builder(constant.CHARACTER_LIST,
                               constant.CHAR_START_INDEX)
    tag_index = index_builder(constant.TAG_LIST, constant.TAG_START_INDEX)

    # Generate input
    inb = InputBuilder(train_dataset, char_index, tag_index, num_step)
    x_true = inb.x
    y_true = inb.y

    # Split training and validation dataset
    x_train, x_test, y_train, y_test = train_test_split(x_true, y_true,
                                                        test_size=valid_split,
                                                        random_state=constant.SEED)

    # Bind dataset to global variable
    globals()['x_train'] = x_train
    globals()['y_train'] = y_train
    globals()['x_test'] = x_test
    globals()['y_test'] = y_test

    print("[ORIGINAL]", len(x_true), len(y_true))
    print("[SPLIT]", len(x_train), len(y_train), len(x_test), len(y_test))

    # Stop whenever you like (Ctrl+C)
    while True:
        # Initialize Trials
        trials_path = "checkpoint/trials.pickle"

        try:
            trials = pickle.load(open(trials_path, "rb"))
            max_trials = len(trials.trials) + 1

            print("Running trails #{}".format(max_trials))

        except:
            trials = Trials()
            max_trials = 1

            print("Create new trials")

        # Run Hyperopt
        best = fmin(model, space=space, algo=tpe.suggest, max_evals=max_trials,
                    trials=trials)

        # Display best model
        print("[BEST MODEL]")
        print("Checkpoint Directory;", trials.best_trial["result"]["checkpoint_directory"])
        print("Params;", trials.best_trial["result"]["params"])

        # Save Trials
        pickle.dump(trials, open(trials_path, "wb"))
Ejemplo n.º 60
-2
def test_branin(suggest=hp_gpsmbo.hpsuggest.suggest, seed=1, iters=10):
    import matplotlib.pyplot as plt
    plt.ion()
    mins = []
    all_ys = []
    for ii in range(int(seed), int(seed) + int(iters)):
        print 'SEED', ii
        space = branin()
        trials = hyperopt.Trials()
        hyperopt.fmin(
            fn=lambda x: x,
            space=space.expr,
            trials=trials,
            algo=partial(suggest, stop_at=0.398),
            rstate=np.random.RandomState(ii),
            max_evals=50)
        plt.subplot(2, 1, 1)
        plt.cla()
        ys = trials.losses()
        all_ys.append(ys)
        for ys_jj in all_ys:
            plt.plot(ys_jj)
        plt.plot(trials.losses())
        plt.subplot(2, 1, 2)
        plt.cla()
        for ys_jj in all_ys:
            plt.plot(ys_jj)
        plt.ylim(0, 1)
        plt.axhline(np.min(ys))
        plt.annotate('min=%f' % np.min(ys), xy=(1, np.min(ys)))
        plt.draw()
        mins.append(min(ys))
        print 'MINS', mins
    assert np.max(mins) < 0.398