def xgb_bayes_search(exp, param_keys=None, param_vals=None, num_proc=None): if num_proc is None: num_proc = 4 if param_keys is None: param_keys = ['model_type', 'max_depth', 'min_child_weight', 'subsample', 'colsample_bytree', 'learning_rate', 'silent', 'objective', 'nthread', 'n_estimators', 'seed'] if param_vals is None: param_space = {'model_type': XGBClassifier, 'max_depth': hp.quniform('max_depth', 6, 9, 1), 'min_child_weight': hp.quniform('min_child_weight', 3, 7, 1), 'subsample': hp.uniform('subsample', 0.5, 1.0), 'colsample_bytree': hp.uniform('colsample', 0.5, 1.0), 'learning_rate': hp.uniform('eta', 0.01, 0.02), 'silent': 1, 'objective': 'binary:logistic', 'nthread': num_proc, 'n_estimators': 400, 'seed': 9438} bs = param_search.BayesSearch(SklearnModel, exp, param_keys, param_space, cv_out='xgb-bayes-scores.pkl', cv_pred_out='xgb-bayes-preds.pkl') best_param, best_score = bs.search_by_cv() param_search.write_cv_res_csv(cv_out = 'xgb-bayes-scores.pkl', cv_csv_out = 'xgb-bayes-scores.csv') return best_param, best_score
def decision_tree(name, criterion=None, splitter=None, max_features=None, max_depth=None, min_samples_split=None, min_samples_leaf=None, presort=False, random_state=None): def _name(msg): return '%s.%s_%s' % (name, 'sgd', msg) rval = scope.sklearn_DecisionTreeClassifier( criterion=hp.choice( _name('criterion'), ['gini', 'entropy']) if criterion is None else criterion, splitter=hp.choice( _name('splitter'), ['best', 'random']) if splitter is None else splitter, max_features=hp.choice( _name('max_features'), ['sqrt', 'log2', None]) if max_features is None else max_features, max_depth=max_depth, min_samples_split=hp.quniform( _name('min_samples_split'), 1, 10, 1) if min_samples_split is None else min_samples_split, min_samples_leaf=hp.quniform( _name('min_samples_leaf'), 1, 5, 1) if min_samples_leaf is None else min_samples_leaf, presort=presort, random_state=_random_state(_name('rstate'), random_state), ) return rval
def main(): #Setup log dir_path = os.path.dirname(os.path.realpath(__file__)) fmt = "%(levelname) -10s %(asctime)s %(module)s:%(lineno)s %(funcName)s %(message)s" handler = logging.FileHandler(os.path.join(dir_path, 'optimizer.log'), mode='w') handler.setFormatter(logging.Formatter(fmt)) log.addHandler(handler) log.setLevel(logging.DEBUG) try: optimizer = Optimizer(GhoshModel, sys.argv[3], sys.argv[1], sys.argv[2]) except Exception as e: log.error(e) space = { LEARN: hp.uniform(LEARN, 0.0000001, 0.0001), KERNEL: hp.quniform(KERNEL, 8, 3, 1), BATCH: hp.quniform(BATCH, 128, 4, 1) } log.info("Space:") log.info(space) best = fmin(optimizer.objective, space=space, algo=tpe.suggest, max_evals=100) print(best) log.info(str(best))
def search_space(fit_params): from hyperopt import hp import numpy as np space={ 'seed': None,#np.random.randint(1,1000000000), 'mean_intolerance': hp.quniform('mean_intolerance',0.7,1.0,0.1), 'mean_susceptibility':hp.quniform('mean_susceptibility',1,5,1), 'mean_conformity': hp.quniform('mean_conformity',0.1,0.5,0.1), 'std_intolerance': 0.3, 'std_susceptibility': 0.7, 'std_conformity': 0.3, # 'std_intolerance': hp.quniform('std_intolerance',0.1,0.5,0.1), # 'std_susceptibility': hp.quniform('std_susceptibility',0.1,0.5,0.1), # 'std_conformity': hp.quniform('std_conformity',0.1,0.5,0.1), 'gridsize':fit_params['gridsize'], 'popsize':fit_params['popsize'], 't_sim':fit_params['t_sim'], 't_measure':fit_params['t_measure'], 'mean_init_opinion':fit_params['mean_init_opinion'], 'std_init_opinion':fit_params['std_init_opinion'], 'mean_social_reach':fit_params['mean_social_reach'], 'std_social_reach':fit_params['std_social_reach'], 'dataset':fit_params['dataset'], 'optimization':fit_params['optimization'], 'sim_threshold':fit_params['sim_threshold'], 'loss_metric':fit_params['loss_metric'], 'issue':fit_params['issue'], 'averages':fit_params['averages'], 'root':fit_params['root'], } return space
def nn_bayes_search(train_fname, test_fname, out_fname_prefix='nn-bayes'): exp = ExperimentL1(train_fname=train_fname, test_fname=test_fname) param_keys = ['in_size', 'hid_size', 'batch_size', 'in_dropout', 'hid_dropout', 'nonlinearity', 'updates', 'learning_rate', #'l1_reg', #'l2_reg', 'num_epochs'] param_space = {'in_size': exp.train_x.shape[1], 'hid_size': hp.quniform('hid', 10, 300, 5), 'batch_size': hp.quniform('bsize', 200, 5000, 50), 'in_dropout': hp.uniform('in_drop', 0.0, 0.5), 'hid_dropout': hp.uniform('hid_drop', 0.0, 0.6), 'updates': hp.choice('updates', [nesterov_momentum, adam]), 'nonlinearity': hp.choice('nonlinear', [sigmoid, tanh, rectify]), 'learning_rate': hp.uniform('lr', 0.0001, 0.1), #'learning_rate': 0.01, #'l1_reg': hp.uniform('l1_reg', 0.0, 0.000001), #'l2_reg': hp.uniform('l2_reg', 0.0, 0.000001), 'num_epochs': hp.quniform('epochs', 200, 1000, 50), } bs = param_search.BayesSearch(LasagneModel, exp, model_param_keys=param_keys, model_param_space=param_space, cv_out=out_fname_prefix+'-scores.pkl', cv_pred_out=out_fname_prefix+'-preds.pkl', refit_pred_out=out_fname_prefix+'-refit-preds.pkl', dump_round=1, use_lower=0, n_folds=5) bs.search_by_cv(max_evals=301) param_search.write_cv_res_csv(bs.cv_out, bs.cv_out.replace('.pkl', '.csv'))
def optimize(trials, X, y, y_ix, reps, max_evals): space = { 'n_estimators': hp.quniform('n_estimators', 30, 150, 30), 'criterion': hp.choice('criterion', ['gini', 'entropy']), 'max_depth': hp.quniform('max_depth', 1, 5, 1), 'min_samples_split': hp.quniform('min_samples_split', 1, 9, 2), 'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 5, 1), 'bootstrap': False, 'oob_score': False, 'n_jobs': -1 } s = Score(X, y, y_ix, reps) best = fmin(s.get_score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals ) best["n_estimators"] = int(best["n_estimators"]) best["max_depth"] = int(best["max_depth"]) best["min_samples_split"] = int(best["min_samples_split"]) best["min_samples_leaf"] = int(best["min_samples_leaf"]) best["n_estimators"] = int(best["n_estimators"]) best["criterion"] = ['gini', 'entropy'][best["criterion"]] del s return best
def get_cnn_model(model_num, search_space): space = cnn_space(search_space) hparams = {'model_' + model_num: 'CNN', 'word_vectors_' + model_num: ('word2vec', True), 'delta_' + model_num: True, 'flex_' + model_num: (True, .15), 'filters_' + model_num: hp.quniform('filters_' + model_num, *space['filters_'], 1), 'kernel_size_' + model_num: hp.quniform('kernel_size_' + model_num, *space['kernel_size_'], 1), 'kernel_increment_' + model_num: hp.quniform('kernel_increment_' + model_num, *space['kernel_increment_'], 1), 'kernel_num_' + model_num: hp.quniform('kernel_num_' + model_num, *space['kernel_num_'], 1), 'dropout_' + model_num: hp.uniform('dropout_' + model_num, *space['dropout_']), 'batch_size_' + model_num: hp.quniform('batch_size_' + model_num, *space['batch_size_'], 1), 'activation_fn_' + model_num: hp.choice('activation_fn_' + model_num, space['activation_fn_'])} if space['no_reg']: hparams['regularizer_cnn_' + model_num] = hp.choice('regularizer_cnn_' + model_num, [ (None, 0.0), ('l2', hp.uniform('l2_strength_cnn_' + model_num, *space['l2_'])), ('l2_clip', hp.uniform('l2_clip_norm_' + model_num, *space['l2_clip_'])) ]) else: hparams['regularizer_cnn_' + model_num] = hp.choice('regularizer_cnn_' + model_num, [ ('l2', hp.uniform('l2_strength_cnn_' + model_num, *space['l2_'])), ('l2_clip', hp.uniform('l2_clip_norm_' + model_num, *space['l2_clip_'])) ]) if space['search_lr']: hparams['learning_rate_' + model_num] = hp.lognormal('learning_rate_' + model_num, 0, 1) / 3000 else: hparams['learning_rate_' + model_num] = .0003
def xgb_model_stacking(exp_l2, out_fname_prefix, use_lower=0): from xgboost.sklearn import XGBClassifier param_keys = ['model_type', 'max_depth', 'min_child_weight', 'subsample', 'colsample_bytree', 'learning_rate', 'silent', 'objective', 'nthread', 'n_estimators', 'seed'] param_space = {'model_type': XGBClassifier, 'max_depth': hp.quniform('max_depth', 2, 9, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 7, 1), 'subsample': hp.uniform('subsample', 0.1, 1.0), 'colsample_bytree': hp.uniform('colsample', 0.3, 1.0), 'learning_rate': hp.uniform('eta', 0.01, 0.02), 'silent': 1, 'objective': 'binary:logistic', 'nthread': 3, 'n_estimators': hp.quniform('n', 100, 1000, 50), 'seed': hp.choice('seed', [1234,53454,6676,12893])} # param_space = {'model_type': XGBClassifier, 'max_depth': hp.quniform('max_depth', 3, 9, 1), # 'min_child_weight': hp.quniform('min_child_weight', 3, 7, 1), # 'subsample': hp.uniform('subsample', 0.1, 1.0), # 'colsample_bytree': hp.uniform('colsample', 0.1, 0.6), # 'learning_rate': hp.uniform('eta', 0.01, 0.02), # 'silent': 1, 'objective': 'binary:logistic', # 'nthread': 4, 'n_estimators': 600, 'seed': hp.choice('seed', [1234,53454,6676,12893])} # l2 model output bs = param_search.BayesSearch(SklearnModel, exp_l2, param_keys, param_space, cv_out=out_fname_prefix+'-scores.pkl', cv_pred_out=out_fname_prefix+'-preds.pkl', refit_pred_out=out_fname_prefix+'-refit-preds.pkl', dump_round=10, use_lower=use_lower) best = bs.search_by_cv() param_search.write_cv_res_csv(bs.cv_out, bs.cv_out.replace('.pkl', '.csv')) return best
def init2(self): Files.mkdir("../model/others") self.fpath = "../model/others/hopt_keras.txt" if config.test: self.max_evals = 1 else: self.max_evals = 50 self.space = { "adadelta_eps": hp.loguniform("adadelta_eps", np.log(1e-07), np.log(1e-05)), "adadelta_lr": hp.loguniform("adadelta_lr", np.log(0.1), np.log(1.0)), "adadelta_rho_m": hp.loguniform("adadelta_rho_m", np.log(0.01), np.log(0.1)), "decay": hp.loguniform("decay", np.log(0.0001), np.log(0.1)), "dropout1": hp.quniform('dropout1', 0.1, 0.5, 0.1), "dropout2": hp.quniform('dropout2', 0.1, 0.5, 0.1), "h1": hp.quniform('h1', 50, 500, 10), # 450.0, "h2": hp.quniform('h2', 20, 250, 5), # 200.0, "nb_epochs": None, # 22, } self.i_folds = [("B", 0)] self.output_items = [] self.output_items += ["loss", "n", "rsme"] self.output_items += ["loss{}".format(i) for i, i_fold in enumerate(self.i_folds)] self.output_items += ["n{}".format(i) for i, i_fold in enumerate(self.i_folds)] self.output_items += ["rsme{}".format(i) for i, i_fold in enumerate(self.i_folds)]
def get_xgboost_params(name="xgboost_common"): return scope.get_xgb_model( n_estimators=scope.int( hp.quniform( get_full_name(name, "n_estimators"), 1, 200, 1, ), ), max_depth=scope.int( hp.quniform( get_full_name(name, 'max_depth'), 1, 13, 1, ), ), min_child_weight=scope.int( hp.quniform( get_full_name(name, 'min_child_weight'), 1, 6, 1, ), ), subsample=scope.int( hp.uniform( get_full_name(name, 'subsample'), 0.5, 1, ), ), gamma=hp.uniform( get_full_name(name, 'gamma'), 0.5, 1, ), nthread=1, seed=RANDOM_STATE, )
def run_all_dl(csvfile = saving_fp, space = [hp.quniform('h1', 100, 550, 1), hp.quniform('h2', 100, 550, 1), hp.quniform('h3', 100, 550, 1), #hp.choice('activation', ["RectifierWithDropout", "TanhWithDropout"]), hp.uniform('hdr1', 0.001, 0.3), hp.uniform('hdr2', 0.001, 0.3), hp.uniform('hdr3', 0.001, 0.3), hp.uniform('rho', 0.9, 0.999), hp.uniform('epsilon', 1e-10, 1e-4)]): # maxout works well with dropout (Goodfellow et al 2013), and rectifier has worked well with image recognition (LeCun et al 1998) start_save(csvfile = csvfile) trials = Trials() print "Deep learning..." best = fmin(objective, space = space, algo=tpe.suggest, max_evals=evals, trials=trials) print best print trials.losses() with open('output/dlbest.pkl', 'w') as output: pickle.dump(best, output, -1) with open('output/dltrials.pkl', 'w') as output: pickle.dump(trials, output, -1)
def optimize(trials, X, y, max_evals): space = { 'n_estimators': hp.quniform('n_estimators', 100, 500, 50), 'criterion': hp.choice('criterion', ['gini', 'entropy']), 'max_depth': hp.quniform('max_depth', 1, 7, 1), 'min_samples_split': hp.quniform('min_samples_split', 1, 9, 2), 'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 5, 1), 'bootstrap': True, 'oob_score': True, 'n_jobs': -1 } s = Score(X, y) best = fmin(s.get_score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals ) best['n_estimators'] = int(best['n_estimators']) best['max_depth'] = int(best['max_depth']) best['min_samples_split'] = int(best['min_samples_split']) best['min_samples_leaf'] = int(best['min_samples_leaf']) best['n_estimators'] = int(best['n_estimators']) best['criterion'] = ['gini', 'entropy'][best['criterion']] best['bootstrap'] = True best['oob_score'] = True best['n_jobs'] = -1 del s return best
def param_space_reg_skl_etr(): return { 'task': 'reg_skl_etr', 'n_estimators': hp.quniform("n_estimators", min_num_round, max_num_round, num_round_step), 'max_features': hp.quniform("max_features", 0.05, 1.0, 0.05), 'n_jobs': nb_job, 'random_state': rdmseed, "max_evals": max_evals, }
def optimize(trials): space = ( hp.quniform('n_estimators', 100,200, 100), hp.quniform('learning_rate', 0.1, 0.2, 0.1), hp.quniform('max_depth', 5, 6, 1), hp.quniform('subsample', 0.5, 1, 0.5) ) best = fmin(run_est_func, space, algo=tpe.suggest, trials=trials, max_evals=10) print best
def optimize(X_train, Y_train, X_test, Y_test): space = {\ 'min_child_weight' : hp.choice('min_child_weight', np.arange(1, 10, 1, dtype = np.int64)),\ 'subsample' : hp.quniform('subsample', 0.6815, 0.6816, 1e-5),\ 'gamma' : hp.quniform('gamma', 0.5, 1.0, 0.01),\ 'colsample_bytree' : hp.quniform('colsample_bytree', 0.701, 0.702, 1e-4),\ } best = fmin(lambda params: score(X_train, Y_train, X_test, Y_test, params), space, algo = tpe.suggest, trials = Trials(), max_evals = 1000) print best
def random_forest(name, n_estimators=None, criterion=None, max_features=None, max_depth=None, min_samples_split=None, min_samples_leaf=None, bootstrap=None, oob_score=None, n_jobs=1, random_state=None, verbose=False): def _name(msg): return '%s.%s_%s' % (name, 'random_forest', msg) """ Out of bag estimation only available if bootstrap=True """ bootstrap_oob = hp.choice(_name('bootstrap_oob'), [(True, True), (True, False), (False, False)]) rval = scope.sklearn_RandomForestClassifier( n_estimators=scope.int(hp.quniform( _name('n_estimators'), 1, 50, 1)) if n_estimators is None else n_estimators, criterion=hp.choice( _name('criterion'), ['gini', 'entropy']) if criterion is None else criterion, max_features=hp.choice( _name('max_features'), ['sqrt', 'log2', None]) if max_features is None else max_features, max_depth=max_depth, min_samples_split=hp.quniform( _name('min_samples_split'), 1, 10, 1) if min_samples_split is None else min_samples_split, min_samples_leaf=hp.quniform( _name('min_samples_leaf'), 1, 5, 1) if min_samples_leaf is None else min_samples_leaf, bootstrap=bootstrap_oob[0] if bootstrap is None else bootstrap, oob_score=bootstrap_oob[1] if oob_score is None else oob_score, #bootstrap=hp.choice( # _name('bootstrap'), # [ True, False ] ) if bootstrap is None else bootstrap, #oob_score=hp.choice( # _name('oob_score'), # [ True, False ] ) if oob_score is None else oob_score, n_jobs=n_jobs, random_state=_random_state(_name('rstate'), random_state), verbose=verbose, ) return rval
def param_space_reg_skl_gbm(): return { 'task': 'reg_skl_gbm', 'n_estimators': hp.quniform("n_estimators", min_num_round, max_num_round, num_round_step), 'learning_rate': hp.quniform("learning_rate", 0.01, 0.5, 0.01), 'max_features': hp.quniform("max_features", 0.05, 1.0, 0.05), 'max_depth': hp.quniform('max_depth', 1, 15, 1), 'subsample': hp.quniform('subsample', 0.5, 1, 0.1), 'random_state': rdmseed, "max_evals": max_evals, }
def optimize(trials, X, y, max_evals): space = { 'num_boost_round': hp.quniform('num_boost_round', 10, 150, 10), 'eta': hp.quniform('eta', 0.1, 0.3, 0.1), 'gamma': hp.quniform('gamma', 0, 1, 0.2), 'max_depth': hp.quniform('max_depth', 1, 8, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1), 'subsample': hp.quniform('subsample', 0.7, 1, 0.1), 'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.1), 'colsample_bylevel': hp.quniform('colsample_bylevel', 0.5, 1, 0.1), 'silent': 1, 'objective': 'binary:logistic' } s = Score(X, y) best = fmin(s.get_score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals ) best['max_depth'] = int(best['max_depth']) best['min_child_weight'] = int(best['min_child_weight']) best['num_boost_round'] = int(best['num_boost_round']) del s return best
def __init__(self): min_nb_layers = 3 max_nb_layers = 4 corruption = lambda name : hp.uniform(name, 0, 1) learning_rate = lambda name: hp.uniform(name, 0.5, 1) nb_neurons = lambda name: hp.quniform(name, 100, 800, 2) nb_epochs = lambda name: hp.quniform(name, 20, 50, 2) templates = { "corruption": corruption, "learning_rate": learning_rate, "nb_neurons": nb_neurons, "nb_epochs" : nb_epochs } ILC_HP_Params.__init__(self, templates, min_nb_layers=min_nb_layers, max_nb_layers=max_nb_layers)
def param_space_rank_xgb_linear(): return { 'task': 'ranking', 'booster': 'gblinear', 'objective': 'rank:pairwise', 'eta': hp.quniform('eta', 0.01, 1, 0.01), 'lambda': hp.quniform('lambda', 0, 5, 0.05), 'alpha': hp.quniform('alpha', 0, 0.5, 0.005), 'lambda_bias': hp.quniform('lambda_bias', 0, 3, 0.1), 'num_round': hp.quniform('num_round', min_num_round, max_num_round, num_round_step), 'nthread': nb_job, 'silent': 1, 'seed': rdmseed, "max_evals": max_evals, }
def optimize(trials): space = { 'n_estimators' : hp.quniform('n_estimators', 100, 1000, 100), 'learning_rate' : hp.quniform('learning_rate', 0.05, 0.5, 0.05), #'max_depth' : hp.quniform('max_depth', 1, 13, 2), 'min_child_weight' : hp.quniform('min_child_weight', 1, 6, 1), 'subsample' : hp.quniform('subsample', 0.5, 1, 0.05), 'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05), } best = fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=250) print best
def optimize(trials): space = { 'eta' : hp.uniform('eta', 0.05, 0.3), 'max_depth' : hp.quniform('max_depth', 1, 8, int(1)), 'min_child_weight' : hp.quniform('min_child_weight', 1, 6, 1), 'subsample' : hp.uniform('subsample', 0.5, 1), 'gamma' : hp.uniform('gamma', 0.5, 1), 'colsample_bytree' : hp.uniform('colsample_bytree', 0.5, 1), } best = fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=500) print '-------------------------------' print 'best parameters are: ' print best return best
def get_xgboost_model(model_num): return { 'model_' + model_num: 'XGBoost', 'eta_' + model_num: hp.loguniform('eta_' + model_num,-5,0), 'gamma_' + model_num: hp.uniform('gamma_' + model_num,0,10), 'max_depth_' + model_num: hp.quniform('max_depth_' + model_num, 1,30,1), 'min_child_weight_' + model_num: hp.uniform('min_child_weight_' + model_num, 0, 10), 'max_delta_step_' + model_num: hp.uniform('max_delta_step_' + model_num, 0, 10), 'num_round_' + model_num: hp.quniform('num_round_' + model_num, 1, 10, 1), 'subsample_' + model_num: 1, 'regularizer_xgb_' + model_num: hp.choice('regularizer_xgb_' + model_num,[ ('l1', hp.loguniform('l1_strength_xgb_' + model_num, -5,5)), ('l2', hp.loguniform('l2_strength_xgb_' + model_num, -5,5)) ]) }
def main(): from hyperopt import hp, fmin, tpe, Trials, rand space = [ hp.quniform("alt_p", -20, 20, 1), hp.quniform("alt_i", -20, 20, 1), hp.quniform("alt_d", -20, 20, 1), ] trials = Trials() # print "dir(trials)", dir(trials) # print trials.trials # best = fmin(objective, space, algo=rand.suggest, max_evals=100, trials=trials, rseed=1010101010) best = fmin(objective, space, algo=tpe.suggest, max_evals=500, trials=trials, rseed=1010101010) # print trials.view() # print "spec", trials.specs print "best", best
def test_preproc(self): """ As a domain expert, I have a particular pre-processing that I believe reveals important patterns in my data. I would like to know how good a classifier can be built on top of my preprocessing algorithm. """ # -- for testing purpose, suppose that the RBM is our "domain-specific # pre-processing" algo = SklearnClassifier( partial( hyperopt_estimator, preprocessing=hp.choice('pp', [ # -- VQ (alone) [ hpc.colkmeans('vq0', n_init=1), ], # -- VQ -> RBM [ hpc.colkmeans('vq1', n_clusters=scope.int( hp.quniform( 'vq1.n_clusters', 1, 5, q=1)), n_init=1), hpc.rbm(name='rbm:alone', verbose=0) ], # -- VQ -> RBM -> PCA [ hpc.colkmeans('vq2', n_clusters=scope.int( hp.quniform( 'vq2.n_clusters', 1, 5, q=1)), n_init=1), hpc.rbm(name='rbm:pre-pca', verbose=0), hpc.pca('pca') ], ]), classifier=hpc.any_classifier('classif'), algo=tpe.suggest, max_evals=10, )) mean_test_error = self.view.protocol(algo) print('mean test error:', mean_test_error)
def test_sparse_random_projection(self): # restrict n_components to be less than or equal to data dimension # to prevent sklearn warnings from printing during tests n_components = scope.int(hp.quniform( 'preprocessing.n_components', low=1, high=8, q=1 )) model = hyperopt_estimator( classifier=components.gaussian_nb('classifier'), preprocessing=[ components.sparse_random_projection( 'preprocessing', n_components=n_components, ) ], algo=rand.suggest, trial_timeout=5.0, max_evals=5, ) X_train = np.random.randn(1000, 8) Y_train = (self.X_train[:, 0] > 0).astype('int') X_test = np.random.randn(1000, 8) Y_test = (self.X_test[:, 0] > 0).astype('int') model.fit(X_train, Y_train) model.score(X_test, Y_test)
def set_finetune_space(self, config_file): ''' Given the original deep net architecture, and a set of pretrained weights and biases, define the configuration space to search for fintuning parameters ''' # we know these fields won't change, so go ahead and set them as # defaults now model_params = nt.get_model_params(config_file) optim_params = nt.get_optim_params(config_file) default_finetune_model_params = {k: model_params[k] for k in ('num_hids', 'activs', 'd', 'k')} default_finetune_model_params['loss_terms'] = ['cross_entropy'] default_finetune_optim_params = {k: optim_params[k] for k in ('optim_method', 'optim_type')} # define the space of hyperparameters we wish to search_finetune_model_params = {'l1_reg': hp.choice('l1_reg', [None, hp.loguniform('l1_decay', log(1e-5), log(10))]), 'l2_reg': hp.choice('l2_reg', [None, hp.loguniform('l2_decay', log(1e-5), log(10))])} search_finetune_optim_params = {'learn_rate': hp.uniform('learn_rate', 0, 1), 'rho': hp.uniform('rho', 0, 1), 'num_epochs': hp.qloguniform('num_epochs', log(10), log(5e3), 1), 'batch_size': hp.quniform('batch_size', 128, 1024, 1), 'init_method': hp.choice('init_method', ['gauss', 'fan-io']), 'scale_factor': hp.uniform('scale_factor', 0, 1)} # combine the default and search parameters into a dictionary to define the # full space - this is what will be passed into the objective function finetune_model_params = self.merge_default_search( default_finetune_model_params, search_finetune_model_params) finetune_optim_params = self.merge_default_search( default_finetune_optim_params, search_finetune_optim_params) finetune_hyperspace = { 'finetune_model_params': finetune_model_params, 'finetune_optim_params': finetune_optim_params} return finetune_hyperspace
def helper_neighbors(): return hp.choice('neighbor_type', [ {'ktype': 'kneighbors', 'n_neighbors': hp.quniform('num', 3, 19, 1)}, {'ktype': 'radiusneighbors', 'radius': hp.uniform('rad', 0, 2), 'out_label': 1} ])
def run(self): print "ensemble iter %d, model (%d, %s)" %(self.ensemble_iter, self.model, self.model_library[self.model]) # jump for the first max model #if self.ensemble_iter == 1 and self.model == self.sorted_model[0]: # return obj = lambda param: ensemble_selection_obj(param, self.model_pred_tmp, self.model_valid_pred[self.model], self.valid_labels, self.num_valid_matrix) param_space = { 'weight': hp.quniform('weight', 0, 1, 0.01), } trials = Trials() #trials = MongoTrials('mongo://172.16.13.7:27017/ensemble/jobs', exp_key='exp%d_%d'%(ensemble_iter, model)) best_param = fmin(obj, space = param_space, algo = tpe.suggest, max_evals = config.ensemble_max_evals, trials = trials) best_w = best_param['weight'] gini_cv_tmp = np.zeros((config.kiter, config.kfold), dtype=float) for iter in range(config.kiter): for fold in range(config.kfold): p1 = self.model_pred_tmp[iter, fold, :self.num_valid_matrix[iter, fold]] p2 = self.model_valid_pred[self.model, iter, fold, :self.num_valid_matrix[iter, fold]] y_true = self.valid_labels[iter, fold, :self.num_valid_matrix[iter, fold]] y_pred = ensemble_algorithm(p1, p2, best_w) score = ml_score(y_true, y_pred) gini_cv_tmp[iter, fold] = score print "Iter %d, ml_score %f, Model %s, Weight %f" %(self.ensemble_iter, np.mean(gini_cv_tmp), self.model_library[self.model], best_w) if (np.mean(gini_cv_tmp) - self.best_gini[0]) >= 0.000001: self.best_gini[0], self.best_model[0], self.best_weight[0] = np.mean(gini_cv_tmp), self.model, best_w
def ts_lagselector(name, lower_lags=1, upper_lags=1): rval = scope.ts_LagSelector( lag_size=scope.int( hp.quniform(name + '.lags', lower_lags - .5, upper_lags + .5, 1)) ) return rval
p_test = model.predict(d_test) sub = pd.DataFrame() sub['test_id'] = df_test['test_id'] sub['is_duplicate'] = p_test #sub.to_csv('../results/' + args.save + '.csv') sub.to_csv('../results/' + 'leaky' + '.csv') #####Tune space = { "booster": "gbtree", "objective": 'binary:logistic', "scale_pos_weight": pos_wt[0], #"n_estimators" : hp.quniform("n_estimators", 500, 10000, 100), "learning_rate" : 0.1,#hp.qloguniform("learning_rate", 0.01, 0.1, 0.01), "gamma": hp.quniform("gamma", 0.0, 3.0, 0.1), "alpha" : hp.quniform("reg_alpha", 0.0, 3.0, 0.1), "lambda" : hp.quniform("reg_lambda", 0.0, 3.0,0.1), "min_child_weight": hp.quniform("min_child_weight", 0., 30, 0.2), "max_depth": hp.choice("max_depth", np.arange(3, 20, dtype=int)), "subsample": hp.quniform("subsample", 0.3, 1.0, 0.05), "colsample_bytree": hp.quniform("colsample_bytree", 0.3, 1.0, 0.05), "colsample_bylevel": hp.quniform("colsample_bylevel", 0.1, 1, 0.05), "nthread": 2, "seed": 12345,#hp.quniform("n_estimators", [1,2017,12345,1695,23,54]), "eval_metric": "logloss" } def objective(space): plst = list(space.items()) watchlist = [ (xgtrain,'train'), (xgval, 'val') ]
def main(output_path): # Set up Hyperopt def target_transform(y, mu=200): return np.log(y + mu) def target_inverse_transform(y_tr, mu=200): return np.exp(y_tr) - mu start_time = datetime.now() # Read and preprocess data df = pd.read_csv('/home/ledovsky/allstate/run_res/feat_train_2.csv') X = df.drop(['loss', 'id'], 1) y = df.loss # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=2016) n_folds = 4 cv = KFold(n_splits=n_folds, shuffle=True, random_state=2016) def evaluate_lightgbm(params): print 'new iteration ', datetime.now().strftime('%H:%M') model = GBMRegressor( num_threads=8, num_iterations=5000, verbose=False, early_stopping_round=25, bagging_seed=2016, metric='l1', learning_rate=0.1, max_depth=12, num_leaves=int(params['num_leaves']), # num_leaves=127, # feature_fraction=params['feature_fraction'], # bagging_fraction=params['bagging_fraction'], feature_fraction=0.7, bagging_fraction=0.7, min_data_in_leaf=int(params['min_data_in_leaf']), max_bin=int(params['max_bin']), # lambda_l1=params['lambda_l1'], # lambda_l2=params['lambda_l2'] ) for val, train in cv.split(X): X_train = X.iloc[train].values y_train = y.iloc[train].values X_val = X.iloc[val].values y_val = y.iloc[val].values model.fit(X_train, target_transform(y_train), test_data=[(X_val, target_transform(y_val))]) best_iter = model.best_round y_pred = target_inverse_transform(model.predict(X_val)) y_pred_train = target_inverse_transform(model.predict(X_train)) mae = mean_absolute_error(y_val, y_pred) mae_train = mean_absolute_error(y_train, y_pred_train) break # best_iter /= float(n_folds) # mae /= n_folds # mae_train /= n_folds run_time = datetime.now() - start_time return { 'loss': mae, 'mae_train': mae_train, 'status': STATUS_OK, 'best_round': best_iter } space = { # 'max_depth': hp.quniform('max_depth', 13, 13, 1), 'num_leaves': hp.quniform('num_leaves', 101, 501, 50), 'max_bin': hp.quniform('max_bin', 63, 511, 64), # 'bagging_fraction': hp.quniform('bagging_fraction', 0.6, 0.8, 0.1), # 'feature_fraction': hp.quniform('feature_fraction', 0.6, 0.8, 0.1), 'min_data_in_leaf': hp.quniform('min_data_in_leaf', 1, 501, 50), # 'lambda_l1': hp.loguniform('lambda_l1', -5, 0), # 'lambda_l2': hp.loguniform('lambda_l2', -5, 0), } trials = Trials() # trials = MongoTrials('mongo://localhost:27017/allstate/jobs', exp_key='lightgbm_2') # Run optimization fmin(fn=evaluate_lightgbm, space=space, algo=tpe.suggest, max_evals=30, trials=trials) # Print output result = dumps(trials.trials) with open(output_path, 'w') as f: f.write(result)
"wb") as fout: pickle.dump(test_y, fout) cv_iter += 1 del train # set up search space if cls_method == "rf": space = {'max_features': hp.uniform('max_features', 0, 1)} elif cls_method == "xgboost": space = { 'learning_rate': hp.uniform("learning_rate", 0, 1), 'subsample': hp.uniform("subsample", 0.5, 1), 'max_depth': scope.int(hp.quniform('max_depth', 4, 30, 1)), 'colsample_bytree': hp.uniform("colsample_bytree", 0.5, 1), 'min_child_weight': scope.int(hp.quniform('min_child_weight', 1, 6, 1)) } elif cls_method == "logit": space = {'C': hp.uniform('C', -15, 15)} elif cls_method == "svm": space = { 'C': hp.uniform('C', -15, 15), 'gamma': hp.uniform('gamma', -15, 15) } # optimize parameters
def run(column_set='raw', train_delta=True, scale=True, evals=40): print(sklearn.__version__) train = pd.read_pickle('train_rolling.pkl') test = pd.read_pickle('test_rolling.pkl') # X, y train def Xandy(df): if column_set == 'raw': X = df.drop(['next point norm close', 'norm close integral'], axis=1) elif column_set == 'pid': # last price, integral price, last relative volume X = df[[ 'norm close 143', 'norm close integral', 'norm volume 143' ]] # first derivative X['norm close 143 d'] = df['norm close 143'] - df['norm close 142'] # second derivative X['norm close 143 dd'] = ( (df['norm close 143'] - df['norm close 142']) - (df['norm close 142'] - df['norm close 141'])) # first derivative relative volume X['norm volume 143 d'] = df['norm volume 143'] - df[ 'norm volume 142'] # second derivative relative volume X['norm volume 143 dd'] = ( (df['norm volume 143'] - df['norm volume 142']) - (df['norm volume 142'] - df['norm volume 141'])) else: raise ValueError() return (X, df['next point norm close']) X, y = Xandy(train) print(len(X), len(X.columns)) if train_delta: y = y - train['norm close 143'] X_test, y_test = Xandy(test) print(len(X_test), len(X_test.columns)) y_test = y_test - test['norm close 143'].values # fit scaler if scale: scaler = StandardScaler().fit(X) X = scaler.transform(X) X_test = scaler.transform(X_test) # fmin search space = { 'alpha': hp.uniform('alpha', 0.1, 1e-6), 'first_layer_size': hp.quniform('first_layer_size', 5, 100, 5), 'layer_decay': hp.uniform('layer_decay', 0.15, 0.5), } def make_layers(init, rate): layers = [] layer_size = init while layer_size > 1: layers.append(layer_size) layer_size = int(layer_size - init * rate) return tuple(layers) def f(x): layers = make_layers(int(x['first_layer_size']), x['layer_decay']) start_time = timeit.default_timer() mlpr = MLPRegressor(hidden_layer_sizes=layers, activation="relu", solver='adam', alpha=x['alpha'], learning_rate="constant", max_iter=200, random_state=123).fit(X, y) score = mlpr.score(X_test, y_test) print(layers) print('score: %f; time: %f' % (score, timeit.default_timer() - start_time)) return -score trials = Trials() best = fmin(fn=f, space=space, trials=trials, algo=tpe.suggest, max_evals=evals) print(best) layers = make_layers(int(best['first_layer_size']), best['layer_decay']) print(layers) mlpr = MLPRegressor(hidden_layer_sizes=layers, activation="relu", solver='adam', alpha=best['alpha'], learning_rate="constant", max_iter=200, random_state=123).fit(X, y) y_mlpr = mlpr.predict(X_test) x_plt = range(len(X_test)) plt.figure(figsize=(20, 10)) plt.scatter(x_plt, y_test, label='data', s=4, color='k') plt.plot(x_plt, y_mlpr, label='MLPR %f' % r2_score(y_test, y_mlpr)) plt.legend() plt.show() f, ax = plt.subplots(1) xs = [t['misc']['vals']['alpha'] for t in trials.trials] ys = [t['result']['loss'] for t in trials.trials] ax.scatter(xs, ys, s=20, linewidth=0.01, alpha=0.75) ax.set_title('$R^2$ $vs$ $alpha$ ', fontsize=18) ax.set_xlabel('$alpha$', fontsize=16) ax.set_ylabel('$R^2$', fontsize=16)
from rlpy.Domains import HIVTreatment from rlpy.Agents import SARSA, Q_LEARNING from rlpy.Representations import * from rlpy.Policies import eGreedy from rlpy.Experiments import Experiment import numpy as np from hyperopt import hp param_space = { 'resolution': hp.quniform("resolution", 3, 30, 1), 'lambda_': hp.uniform("lambda_", 0., 1.), 'boyan_N0': hp.loguniform("boyan_N0", np.log(1e1), np.log(1e5)), 'initial_learn_rate': hp.loguniform("initial_learn_rate", np.log(5e-2), np.log(1)) } def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000
} df = data() df = df[df["ad"] == outlier] agg = df.groupby("ad").sum().iloc[outlier]["av"] # define a search space a1_min = min(list(df["a1"].unique())) a1_max = max(list(df["a1"].unique())) a2_min = min(list(df["a2"].unique())) a2_max = max(list(df["a2"].unique())) q = 1 space = hp.choice( "parameters", [ { "a1_lower": hp.quniform("a1_lower", a1_min, a1_max, q), "a1_upper": hp.quniform("a1_upper", a1_min, a1_max, q), "a2_lower": hp.quniform("a2_lower", a2_min, a2_max, q), "a2_upper": hp.quniform("a2_upper", a2_min, a2_max, q), }, ], ) trials = Trials() # minimize the objective over the space best = fmin(objective, space, algo=tpe.suggest, max_evals=1000, trials=trials) print(space_eval(space, best))
#%% # Test if it really works scores = testClassifier(data, bst['learner'], limit, feats, 5) print("{:.5f} accuracy for {}, {}, {}".format(np.mean(scores), limit, feats, bst['learner'])) #%% Test ligbm cv train_set = lgb.Dataset(data[0], data[1], data[2]) model = lgb.LGBMClassifier() # Discrete uniform distribution num_leaves = {'num_leaves': hp.quniform('num_leaves', 30, 150, 1)} # Learning rate log uniform distribution learning_rate = { 'learning_rate': hp.loguniform('learning_rate', np.log(0.005), np.log(0.2)) } # Define the search space space = { 'class_weight': hp.choice('class_weight', [None, 'balanced']), 'boosting_type': hp.choice('boosting_type', [{ 'boosting_type': 'gbdt', 'subsample': hp.uniform('gdbt_subsample', 0.5, 1)
from copy import deepcopy import json from typing import Dict, Union import os from hyperopt import fmin, hp, tpe import numpy as np from chemprop.args import HyperoptArgs from chemprop.models import MoleculeModel from chemprop.nn_utils import param_count from chemprop.train import cross_validate from chemprop.utils import create_logger, makedirs SPACE = { 'hidden_size': hp.quniform('hidden_size', low=300, high=2400, q=100), 'depth': hp.quniform('depth', low=2, high=6, q=1), 'dropout': hp.quniform('dropout', low=0.0, high=0.4, q=0.05), 'ffn_num_layers': hp.quniform('ffn_num_layers', low=1, high=3, q=1) } INT_KEYS = ['hidden_size', 'depth', 'ffn_num_layers'] def grid_search(args: HyperoptArgs): # Create loggers logger = create_logger(name='hyperparameter_optimization', save_dir=args.log_dir, quiet=True) train_logger = create_logger(name='train', save_dir=args.save_dir, quiet=args.quiet)
# In[29]: #0.7454817543455738 #0.7455201258585626 # In[30]: print(classification_report(y_dev, pred_dev)) # # Bayesian Optimization # In[36]: param_hyperopt = { 'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(1)), 'max_depth': scope.int(hp.quniform('max_depth', 5, 15, 1)), 'n_estimators': scope.int(hp.quniform('n_estimators', 1000, 15000, 1000)), 'num_leaves': scope.int(hp.quniform('num_leaves', 5, 50, 1)), 'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart']), 'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0), 'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0), } # In[38]: def hyperopt(param_space, X_train, y_train, X_test, y_test, num_eval): start = time.time() def objective_function(params):
from sklearn.metrics import roc_auc_score as AUC, log_loss, accuracy_score as accuracy from hyperopt import hp from hyperopt.pyll.stochastic import sample from load_data import x_train, y_train, x_test, y_test # trees_per_iteration = 5 space = { 'criterion': hp.choice('c', ('gini', 'entropy')), 'bootstrap': hp.choice('b', (True, False)), 'class_weight': hp.choice('cw', ('balanced', 'balanced_subsample', None)), 'max_depth': hp.quniform('md', 2, 10, 1), 'max_features': hp.choice('mf', ('sqrt', 'log2', None)), 'min_samples_split': hp.quniform('msp', 2, 20, 1), 'min_samples_leaf': hp.quniform('msl', 1, 10, 1), } # def get_params(): params = sample(space) new_params = {} for k, v in params.items(): if type(v) == float and int(v) == v: new_params[k] = int(v)
task = args.task config_filename = 'lgb_{}.config'.format(task) # основной настроечный параметр модели - длина символьных N-грамм (шинглов) shingle_len = args.shingle_len # настраиваем логирование в файл ruchatbot.utils.logging_helpers.init_trainer_logging( os.path.join(tmp_folder, 'lgb_{}.log'.format(task))) if run_mode == 'hyperopt': ho_samples, ho_shingle2id = load_samples(input_path, lemmatize, 300000) space = { 'num_leaves': hp.quniform('num_leaves', 20, 100, 1), #'shingle_len': hp.quniform('shingle_len', 3, 3, 1), 'min_data_in_leaf': hp.quniform('min_data_in_leaf', 5, 100, 1), #'feature_fraction': hp.uniform('feature_fraction', 1.0, 1.0), #'bagging_fraction': hp.uniform('bagging_fraction', 1.0, 1.0), 'learning_rate': hp.loguniform('learning_rate', -2, -1.2), #'min_sum_hessian_in_leaf': hp.loguniform('min_sum_hessian_in_leaf', 0, 2.3), } hyperopt_log_writer = open( os.path.join(tmp_folder, 'lgb_{}.hyperopt.txt'.format(task)), 'w') trials = Trials() best = hyperopt.fmin(fn=objective, space=space, algo=HYPEROPT_ALGO,
model.fit(xtrain, ytrain) pred = model.predict(xtest) fold_acc = metrics.accuracy_score(ytest, pred) accuracies.append(fold_acc) return -1.0 * np.mean(accuracies) #read the dataset if __name__ == '__main__': df = pd.read_csv('input/train.csv') X = df.drop('price_range', axis=1).values y = df.price_range.values param_space = { 'max_depth': scope.int(hp.quniform('max_depth', 3, 15, 1)), 'n_estimators': scope.int(hp.quniform('n_estimators', 100, 600, 1)), 'criterion': hp.choice('criterion', ['gini', 'entropy']), 'max_features': hp.uniform('max_features', 0.01, 1) } optimization_function = partial(optimize, x=X, y=y) trials = Trials() result = fmin(fn=optimization_function, space=param_space, max_evals=15, algo=tpe.suggest, trials=trials)
""" Cart-pole balancing with iFDD+ """ from rlpy.Domains import InfCartPoleBalance from rlpy.Agents import Greedy_GQ, SARSA, Q_Learning from rlpy.Representations import * from rlpy.Policies import eGreedy from rlpy.Experiments import Experiment import numpy as np from hyperopt import hp param_space = { 'discretization': hp.quniform("discretization", 5, 40, 1), 'discover_threshold': hp.loguniform("discover_threshold", np.log(1e-2), np.log(1e1)), #'lambda_': hp.uniform("lambda_", 0., 1.), 'boyan_N0': hp.loguniform("boyan_N0", np.log(1e1), np.log(1e5)), 'initial_learn_rate': hp.loguniform("initial_learn_rate", np.log(1e-3), np.log(1)) } def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.013461679, lambda_=0., boyan_N0=484.78006, initial_learn_rate=0.5651405,
model.fit(x=x_train, y=y_train, batch_size=params['batchSize'], epochs=1) #Turns out that according to the 6 tests I ran, 1 epoch is enough to see which network is the most accurate, testing wasn't extensive though and there was definitely a great deal of variation score = model.evaluate(x_test, y_test, batch_size=128) print("Accuracy on Testing Data:", str(score[1] * 100) + "%") print("Hyperparameters: " + str(params)) sess.close() return {'loss': score[0], 'status': STATUS_OK} batchSizes = [32, 64, 128] parameters = { 'secondDropout': hp.uniform("secondDropout", 0, .5), 'firstDropout': hp.uniform("firstDropout", 0, .5), 'layer3Filters': hp.quniform("layer3Filters", 16, 128, 1), 'layer2Filters': hp.quniform("layer2Filters", 16, 128, 1), 'layer2Kernel': hp.quniform("layer2Kernel", 3, 6, 1), 'denseNodes': hp.quniform("denseNodes", 256, 2048, 1), 'batchSize': hp.choice("batchSize", batchSizes), 'learningRate': hp.uniform("learningRate", .000000001, .0001), 'layer1Filters': hp.quniform("layer1Filters", 16, 128, 1), 'layer3Kernel': hp.quniform("layer3Kernel", 3, 6, 1), 'layer1Kernel': hp.quniform("layer1Kernel", 3, 6, 1), 'layer4Filters': hp.quniform("layer4Filters", 16, 128, 1), 'layer4Kernel': hp.quniform("layer4Kernel", 3, 6, 1) } best = fmin(fn=runCNN, space=parameters, algo=tpe.suggest, max_evals=10000,
parse_dates=True) default_parameters = { 'ohlcv': ohlcv, 'fastlen': 24, 'slowlen': 27, 'filterlen': 1, 'buyfilterth': 82, 'sellfilterth': 82, 'rsiperiod': 14, 'overBought': 70.1, 'overSold': 29.9, } hyperopt_parameters = { 'fastlen': hp.quniform('fastlen', 1, 100, 1), 'slowlen': hp.quniform('slowlen', 1, 100, 1), # 'filterlen': hp.loguniform('filterlen', 0, 5), # 'buyfilterth': hp.loguniform('buyfilterth', 0, 3), # 'sellfilterth': hp.loguniform('sellfilterth', 0, 3), # 'rsiperiod': hp.quniform('rsiperiod', 1, 30, 1), # 'overBought': hp.quniform('overBought', 60, 90, 2), # 'overSold': hp.quniform('overSold', 1, 40, 2), } best, report = BacktestIteration(sma_cross_backtest, default_parameters, hyperopt_parameters, 0, maximize=lambda r: r.All.ProfitFactor) report.DataFrame.to_csv('TradeData.csv')
X_train3 = StandardScaler().fit_transform(featuresTRAIN(features, 1, 2)) if h==0: y_val = [y_val1, y_val2, y_val3] y_train = [y_train1, y_train2, y_train3] X_val = [X_val1, X_val2, X_val3] X_train = [X_train1, X_train2, X_train3] else: y_val = [y_val1[h:], y_val2[h:], y_val3[h:]] y_train = [y_train1[h:], y_train2[h:], y_train3[h:]] X_val = [X_val1[:-h], X_val2[:-h], X_val3[:-h]] X_train = [X_train1[:-h], X_train2[:-h], X_train3[:-h]] # Parameter Space SPACE_FNN = dict([('split', hp.quniform('split',2,100,1)), ('leaf', hp.quniform('leaf',2,20,1)), ('features', hp.quniform('features',1,5,1)), ('estimators', hp.choice('estimators', [50,100,150,300])) ]) trials = Trials() #Try to minimize MSE over different configurations best = fmin(objective, SPACE_FNN, algo=tpe.suggest, trials =trials, max_evals=50) print(best) # loss = [t['result']['loss'] for t in trials.trials] # print(np.array(loss.sort())) pruebas.append(trials) with open('RFR_trials_t+'+str(h+1)+'.pkl', 'wb') as f:
from math import log # In[42]: from hyperopt.pyll.stochastic import sample # In[43]: space = { 'learning_rate': hp.loguniform('learning_rate', log(0.01), log(0.3)), 'max_depth': hp.quniform('max_depth', 2, 20, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 10, 1), 'subsample': hp.uniform('subsample', 0.5, 1.0), 'colsample': hp.choice('colsample', [ ('colsample_bytree', hp.uniform('colsample_bytree', 0.5, 1.0)), ('colsample_bylevel', hp.uniform('colsample_bylevel', 0.5, 1.0)) ]), 'gamma': hp.loguniform('gamma', log(1e-8), log(10)), 'reg_alpha': hp.loguniform('reg_alpha', log(1), log(100)), 'reg_lambda': hp.uniform('reg_lambda', 0.1, 10), 'scale_pos_weight': hp.uniform('scale_pos_weight', 1, 100), 'silent': 1, 'eval_metric': 'auc', 'objective': 'binary:logistic' }
def main(output_path): # Read and preprocess data df = pd.read_csv('../raw_data/train.csv') df_test = pd.read_csv('../raw_data/test.csv') X = df.drop(['loss'], 1) y = df.loss X_test = df_test fe = FeatureExtractor().fit(X, X_test) X_tr = fe.transform(X) X_train, X_val, y_train, y_val = train_test_split(X_tr, y, test_size=0.2, random_state=2016) # Set up Hyperopt space = { 'max_depth': hp.quniform('max_depth', 10, 15, 1), 'num_leaves': hp.quniform('num_leaves', 50, 500, 50), 'bagging_fraction': hp.quniform('bagging_fraction', 0.5, 0.9, 0.05), 'feature_fraction': hp.quniform('feature_fraction', 0.3, 0.9, 0.05), 'min_data_in_leaf': hp.quniform('min_data_in_leaf', 10, 300, 30), 'lambda_l1': hp.loguniform('lambda_l1', -5, 2), 'lambda_l2': hp.loguniform('lambda_l2', -5, 2), } def evaluate_lightgbm(params): print 'new iteration ', datetime.now().strftime('%H:%M') model = GBMRegressor(num_threads=6, num_iterations=5000, verbose=False, early_stopping_round=25, bagging_seed=2016, metric='l1', learning_rate=0.01, max_depth=int(params['max_depth']), num_leaves=int(params['num_leaves']), feature_fraction=params['feature_fraction'], bagging_fraction=params['bagging_fraction'], min_data_in_leaf=int(params['min_data_in_leaf']), lambda_l1=params['lambda_l1'], lambda_l2=params['lambda_l2']) model.fit(X_train.values, target_transform(y_train.values), test_data=[(X_val.values, target_transform(y_val.values))]) best_iter = model.best_round y_pred = target_inverse_transform(model.predict(X_val)) mae = mean_absolute_error(y_val, y_pred) return {'loss': mae, 'status': STATUS_OK, 'best_round': best_iter} trials = Trials() # Run optimization fmin(fn=evaluate_lightgbm, space=space, algo=tpe.suggest, max_evals=100, trials=trials) # Print output result = dumps(trials.trials) with open(output_path, 'w') as f: f.write(result)
num_round = 50 start = time.time() model = xgb.train(params, trainDM, num_round) end = time.time() pred = model.predict(validDM,ntree_limit=model.best_iteration + 1) auc = roc_auc_score(validation_labels, pred) print "SCORE:", auc, " Train Time:", (end - start) return{'loss':1-auc, 'status': STATUS_OK, 'eval_time': (end - start)} space = { 'eta': hp.quniform('eta', 0.01, 0.3, 0.02), 'max_depth': hp.choice('max_depth', np.arange(0, 14, dtype=int)), 'min_child_weight': hp.quniform('min_child_weight', 1, 100, 1), 'subsample': hp.quniform('subsample', 0.5, 1, 0.1), 'gamma': hp.quniform('gamma', 0.1, 1, 0.1), 'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.05), 'alpha' : hp.quniform('alpha', 0.0, 2, 0.1), 'lambda': hp.quniform('lambda', 0.0, 2, 0.1), 'eval_metric': 'auc', 'objective': 'binary:logistic', 'nthread': 48, 'booster': 'gbtree', 'tree_method': 'hist', 'grow_policy' : 'lossguide', 'max_leaves': hp.choice('max_leaves', np.arange(0, 255, dtype=int)), 'silent': 1
def __init__(self): self.parameter_space = { 'n_estimators': hp.quniform('n_estimators', 1, 500, 1), 'max_depth': hp.quniform('max_depth', 1, 100, 1), 'max_features': hp.uniform('max_features', 0, 1) }
def __init__(self): super().__init__() self.parameter_space['filters'] = hp.quniform('filters', 1, 10, 1)
def _build_space(nlayer, data_augmentation, n_exogenous_inputs): """Function that generates the hyperparameter/feature search space Parameters ---------- nlayer : int Number of layers of the DNN model data_augmentation : bool Boolean that selects whether augmenting data is considered n_exogenous_inputs : int Number of exogenous inputs in the market under study Returns ------- dict Dictionary defining the search space """ # Defining the hyperparameter space. First the neural net hyperparameters, # later the input features space = { 'batch_normalization': hp.choice('batch_normalization', [False, True]), 'dropout': hp.uniform('dropout', 0, 1), 'lr': hp.loguniform('lr', np.log(5e-4), np.log(0.1)), 'seed': hp.quniform('seed', 1, 1000, 1), 'neurons1': hp.quniform('neurons1', 50, 500, 1), 'activation': hp.choice('activation', [ "relu", "softplus", "tanh", 'selu', 'LeakyReLU', 'PReLU', 'sigmoid' ]), 'init': hp.choice('init', [ 'Orthogonal', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_uniform', 'he_normal' ]), 'reg': hp.choice( 'reg', [{ 'val': None, 'lambda': 0 }, { 'val': 'l1', 'lambda': hp.loguniform('lambdal1', np.log(1e-5), np.log(1)) }]), 'scaleX': hp.choice('scaleX', ['No', 'Norm', 'Norm1', 'Std', 'Median', 'Invariant']), 'scaleY': hp.choice('scaleY', ['No', 'Norm', 'Norm1', 'Std', 'Median', 'Invariant']) } if nlayer >= 2: space['neurons2'] = hp.quniform('neurons2', 25, 400, 1) if nlayer >= 3: space['neurons3'] = hp.quniform('neurons3', 25, 300, 1) if nlayer >= 4: space['neurons4'] = hp.quniform('neurons4', 25, 200, 1) if nlayer >= 5: space['neurons5'] = hp.quniform('neurons5', 25, 200, 1) # Defining the possible input features as hyperparameters space['In: Day'] = hp.choice('In: Day', [False, True]) space['In: Price D-1'] = hp.choice('In: Price D-1', [False, True]) space['In: Price D-2'] = hp.choice('In: Price D-2', [False, True]) space['In: Price D-3'] = hp.choice('In: Price D-3', [False, True]) space['In: Price D-7'] = hp.choice('In: Price D-7', [False, True]) for n_ex in range(1, n_exogenous_inputs + 1): space['In: Exog-' + str(n_ex) + ' D'] = hp.choice( 'In: Exog-' + str(n_ex) + ' D', [False, True]) space['In: Exog-' + str(n_ex) + ' D-1'] = hp.choice( 'In: Exog-' + str(n_ex) + ' D-1', [False, True]) space['In: Exog-' + str(n_ex) + ' D-7'] = hp.choice( 'In: Exog-' + str(n_ex) + ' D-7', [False, True]) if data_augmentation: # For the multiple output model, we allow as an option to use the 24 horizons in a day # during training, i.e. not only predict 00 to 23, but 01 to 24, 02 to 01, etc. # For testing the evaluation is normal space['24 datapoints per day'] = hp.choice('24 datapoints per day', [False, True]) return space
XGB_model.fit(XGB_X_train, XGB_Y_train, eval_set=eval_set, eval_metric="rmse", early_stopping_rounds=10,verbose=1) XGB_predictions = XGB_model.predict(XGB_X_cv) XGB_rmse= np.sqrt(mean_squared_error(XGB_predictions,XGB_Y_cv)) return {'loss':XGB_rmse, 'status':STATUS_OK} print(" \n\nBest Model Search processing for XgBoost...") space ={'learning_rate':hp.loguniform('learning_rate',np.log(0.01), np.log(0.5)), 'max_depth': hp.choice("x_max_depth",range( 4, 16, 1)), 'num_leaves': hp.choice('num_leaves', range(2, 300, 1)), 'min_child_weight': hp.quniform ('x_min_child', 1, 10, 1), 'feature_fraction': hp.uniform('feature_fraction', 0.1, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.1, 1.0), 'subsample': hp.uniform('subsample', 0.1, 1.0), 'gamma' : hp.uniform ('x_gamma', 0.1,0.5), 'colsample_bytree' : hp.uniform ('x_colsample_bytree', 0.7,1), 'reg_lambda': hp.uniform ('x_reg_lambda', 0,1), } trials = Trials() best = fmin(fn=XGB_fine_tune, space=space, algo=tpe.suggest, max_evals=10, trials=trials) print("BEST PARAMETERS:",best)
while True: input_fd = open(data_file, 'r') last_line = '' for line in input_fd: last_line = line strs = last_line.split() if len(strs) < len(input) + 1 or strs[len(input)] == 'P': input_fd.close() time.sleep(1) continue else: input_fd.close() return float(strs[len(input)]) search_space = (hp.uniform('lr', -5, 0), hp.quniform('slack', 0, 3, 1), hp.quniform('batchsize', 0, 3, 1), hp.uniform('momentum', 0, 1), hp.uniform('lr2', -5, 0), hp.quniform('slack2', 0, 3, 1), hp.quniform('batchsize2', 0, 3, 1), hp.uniform('momentum2', 0, 1), hp.uniform('lr3', -5, 0), hp.quniform('slack3', 0, 3, 1), hp.quniform('batchsize3', 0, 3, 1), hp.uniform('momentum3', 0, 1), hp.uniform('lr4', -5, 0), hp.quniform('slack4', 0, 3, 1), hp.quniform('batchsize4', 0, 3, 1), hp.uniform('momentum4', 0, 1)) os.system('rm %s' % data_file)
str.join(' ', ['{}={}'.format(k, v) for k, v in sorted_params]), nbrounds, submit_guid)) log_writer.flush() end = time.time() elapsed = int(end - start) #print('elapsed={}'.format(elapsed ) ) return {'loss': cv_logloss, 'status': STATUS_OK} # -------------------------------------------------------------------------------- space = { 'max_depth': hp.quniform("max_depth", 5, 6, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 20, 1), 'subsample': hp.uniform('subsample', 0.75, 1.0), 'gamma': hp.loguniform('gamma', -5.0, 0.0), 'eta': hp.loguniform('eta', -3, -1.6), 'colsample_bytree': hp.uniform('colsample_bytree', 0.90, 1.0), 'colsample_bylevel': hp.uniform('colsample_bylevel', 0.90, 1.0), } # -------------------------------------------------------------- trials = Trials() best = hyperopt.fmin(fn=objective, space=space, algo=HYPEROPT_ALGO, max_evals=N_HYPEROPT_PROBES,
# Keras Models -> keras # Regularized Greedy Forest Models -> rgf ######################################## ## Parameter Space for XGBoost models ## ######################################## ## In the early stage of the competition, I mostly focus on ## raw tfidf features and linear booster. ## regression with linear booster param_space_reg_xgb_linear = { 'task': 'regression', 'booster': 'gblinear', # can be gbtree, gblinear or dart 'objective': 'reg:linear', 'eval_metric': 'logloss', 'eta' : hp.quniform('eta', 0.01, 1, 0.01), 'lambda' : hp.quniform('lambda', 0, 5, 0.05), 'alpha' : hp.quniform('alpha', 0, 0.5, 0.005), 'lambda_bias' : hp.quniform('lambda_bias', 0, 3, 0.1), 'num_round' : hp.quniform('num_round', xgb_min_num_round, xgb_max_num_round, xgb_num_round_step), 'nthread': xgb_nthread, 'silent' : 1, 'seed': xgb_random_seed, "max_evals": hyperopt_param["xgb_max_evals"], } ## regression with tree booster param_space_reg_xgb_tree = { 'task': 'regression', 'booster': 'gbtree', # can be gbtree, gblinear or dart 'objective': 'reg:linear',
import numpy as np from hyperopt import hp from sklearn.pipeline import Pipeline from sklearn.neighbors import KNeighborsClassifier from config import random_seed from utils.python_utils import quniform_int from sklearn.pipeline import Pipeline steps = [ ('knn' ,KNeighborsClassifier(n_neighbors=5, n_jobs=-1)) ] model = Pipeline(steps=steps) params_space = { 'knn__n_neighbors': quniform_int('n_neighbors', 1, 50, 2), 'knn__weights': hp.choice('weights', ['uniform', 'distance']) , 'knn__p': hp.quniform('p', 2.5, 5.5, 1), }
path = "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls" data = pd.read_excel(path, header=1, index_col=0) data = data.rename(columns={'default payment next month': "default"}) print2(data.head()) X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], stratify=data.iloc[:, -1], test_size=0.3) print2(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # Set up space dictionary with specified hyperparameters space = { 'max_depth': hp.quniform('max_depth', 2, 10, 2), 'learning_rate': hp.uniform('learning_rate', 0.001, 0.9) } # Set up objective function n = 1 def objective(params): params = { 'max_depth': int(params["max_depth"]), 'learning_rate': params["learning_rate"] } gbm_clf = GradientBoostingClassifier(n_estimators=100, **params) best_score = cross_val_score(gbm_clf, X_train,
val_data = numpy.reshape(val_data, (X_day_val.shape[0], 24, val_data.shape[1])) test_data = numpy.reshape(test_data, (X_day_e.shape[0], 24, test_data.shape[1])) X_sch_t = numpy.reshape(X_sch_t, (X_day_t.shape[0], 24, X_sch_t.shape[1])) X_sch_val = numpy.reshape(X_sch_val, (X_day_val.shape[0], 24, X_sch_val.shape[1])) X_sch_e = numpy.reshape(X_sch_e, (X_day_e.shape[0], 24, X_sch_e.shape[1])) #H_t = numpy.reshape(H_t, (H_mean_t.shape[0], 24, 1)) #H_e = numpy.reshape(H_e, (H_mean_e.shape[0], 24, 1)) H_t = numpy.reshape(H_t, (H_mean_t.shape[0], 24)) H_val = numpy.reshape(H_val, (H_mean_v.shape[0], 24)) H_e = numpy.reshape(H_e, (H_mean_e.shape[0], 24)) #This block is for optimizing LSTM layers space = { 'Layer1': hp.quniform('Layer1', 10, 100, 5), 'Layer2': hp.quniform('Layer2', 10, 100, 5), 'Layer3': hp.quniform('Layer3', 5, 20, 1), #'D1': hp.uniform('D1', 0, 0.5), #'D2': hp.uniform('D2', 0, 0.5), #'layer2_units': hp.quniform('layer2_units', 10, 50, 1) } def objective(params): #optimize_model = build_lstm_v1.lstm_model_102(params, train_data.shape[2], 24, 24) #optimize_model = build_lstm_v1.lstm_model_106(params, train_data.shape[2], 24) optimize_model = build_lstm_v1.lstm_model_106(params, train_data.shape[2], 24) #for epochs in range(5): for ep in range(5):
""" Cart-pole balancing with tabular representation """ from rlpy.Domains import InfCartPoleBalance from rlpy.Agents import Q_Learning from rlpy.Representations import * from rlpy.Policies import eGreedy from rlpy.Experiments import Experiment import numpy as np from hyperopt import hp param_space = { 'discretization': hp.quniform("resolution", 4, 40, 1), 'lambda_': hp.uniform("lambda_", 0., 1.), 'boyan_N0': hp.loguniform("boyan_N0", np.log(1e1), np.log(1e5)), 'initial_learn_rate': hp.loguniform("initial_learn_rate", np.log(5e-2), np.log(1)) } def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, discretization=20., lambda_=0.75): opt = {} opt["exp_id"] = exp_id