def suggest_multiple_from_name(dbname, host, port, bandit_algo_names, bandit_names, exp_keys, N, bandit_args_list, bandit_kwargs_list, bandit_algo_args_list, bandit_algo_kwargs_list, mql=1, refresh=False): port = int(port) trials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, port, dbname), refresh=False) algos = [] for bn, ban, ba, bk, baa, bak, ek in zip(bandit_names, bandit_algo_names, bandit_args_list, bandit_kwargs_list, bandit_algo_args_list, bandit_algo_kwargs_list, exp_keys): bandit = json_call(bn, ba, bk) subtrials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, port, dbname), refresh=False, exp_key=ek) if ba or bk: subtrials.attachments['bandit_data_' + ek] = cPickle.dumps((bn, ba, bk)) bak['cmd'] = ('driver_attachment', 'bandit_data_' + ek) else: bak['cmd'] = ('bandit_json evaluate', bn) algo = json_call(ban, (bandit,) + baa, bak) algos.append(algo) algo = InterleaveAlgo(algos, exp_keys) exp = hyperopt.Experiment(trials, algo, poll_interval_secs=.1) exp.max_queue_len = mql if N is not None: exp.run(N, block_until_done=True) else: return exp
def test_trial_attachments(): exp_key = "A" with TempMongo() as tm: mj = tm.mongo_jobs("foo") trials = MongoTrials(tm.connection_string("foo"), exp_key=exp_key) space = hp.uniform("x", -10, 10) max_evals = 3 fmin_thread = threading.Thread(target=fmin_thread_fn, args=(space, trials, max_evals)) fmin_thread.start() mw = MongoWorker(mj=mj, logfilename=None, workdir="mongoexp_test_dir") n_jobs = max_evals while n_jobs: try: mw.run_one("hostname", 10.0, erase_created_workdir=True) print("worker: ran job") except Exception as exc: print(f"worker: encountered error : {str(exc)}") traceback.print_exc() n_jobs -= 1 fmin_thread.join() all_trials = MongoTrials(tm.connection_string("foo")) assert len(all_trials) == max_evals assert trials.count_by_state_synced(JOB_STATE_DONE) == max_evals assert trials.count_by_state_unsynced(JOB_STATE_DONE) == max_evals
def _refresh_trials(): global TRIALS, TRIALS_REFRESHED if TRIALS is None: TRIALS = MongoTrials('mongo://localhost:1234/covid/jobs') TRIALS_REFRESHED = datetime.now() TRIALS.refresh()
def transfer_trials(fromdb, todb): """ Insert all of the documents in `fromdb` into `todb`. """ from_trials = MongoTrials('mongo://localhost:44556/%s/jobs' % fromdb) to_trials = MongoTrials('mongo://localhost:44556/%s/jobs' % todb) from_docs = [copy.deepcopy(doc) for doc in from_trials] for doc in from_docs: del doc['_id'] to_trials.insert_trial_docs(doc)
def validate_from_tids(dbname): trials = MongoTrials('mongo://localhost:44556/%s/jobs' % dbname, refresh=False) trials.refresh() tdict = dict([(t['tid'], t) for t in trials]) print "TIDS", tdict.keys() for tid, t in tdict.items(): assert t['misc']['tid'] == tid if 'from_tid' in t['misc']: if t['misc']['from_tid'] not in tdict: print 'WTF gave us', tid, t['misc']['from_tid']
def run_optimization(level=1): print(f"Optimizing at level {level}") set_random_seeds(4) next_lvl_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{level+1}') if len(next_lvl_trials.trials) > 0: print(f"Already completed level {level} -- skipping") return exp_key = f'covid-{level}' trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=exp_key) suggestion_box = hyperopt.tpe.suggest if level == 1: max_evals = LEVEL_DEFS[0][1] depth = 1 elif level > 1: depth, new_budget, extend_budget = LEVEL_DEFS[level - 1] last_depth, _, _ = LEVEL_DEFS[level - 2] # Minimum one per node for the expensive ones -- no point wasting compute time num_new = int(np.ceil((new_budget / depth) / NUM_NODES) * NUM_NODES) if len(trials.trials) == 0: print("Generating estimates from previous level") result_docs = configure_next_level(level, depth, extend_budget) num_to_extend = len(result_docs) suggestion_box = create_suggestion_box(result_docs) last_level_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{level-1}') prev_level_count = len( [x for x in last_level_trials.losses() if x is not None]) max_evals = prev_level_count + num_new trials.refresh() objective = functools.partial(test_parameterization, num_epochs=depth) if len([x for x in trials.statuses() if x == 'ok']) >= max_evals: print(f"Already completed level {level} -- skipping") else: best = hyperopt.fmin(objective, space=SEARCH_SPACE, algo=suggestion_box, max_evals=max_evals, trials=trials) print(best)
def main(): sys.path.insert(0, PATH_OPT_CONFIGS) args = parse_args() with open(args.default_config, 'r') as stream: default_config = yaml.load(stream, Loader=yaml.FullLoader) default_config = replace_path_config(default_config) space = __import__(args.space_config[:-3]).space exp = ExperimentRunner(default_config=default_config, results_dir=os.path.abspath(args.results_dir), tmp_dir=os.path.abspath(args.tmp_dir), python_path=sys.executable, train_path=os.path.abspath( os.path.join(os.getcwd(), "..", "src", "train.py")), debug=args.debug, num_gpus=args.num_gpus) trials = MongoTrials('mongo://localhost:1234/tweet_sent/jobs', exp_key=args.exp_name) best = fmin(exp, space, trials=trials, algo=tpe.suggest, max_evals=args.num_trials)
def do_test(runs, flder): df = load_train() space = { 'k': hp.qnormal('k', 25, 10, 1), 'f': hp.loguniform('f', log(0.1), log(5)) } trials = MongoTrials( 'mongo://10.20.0.144:27017/bid_exp_family_and_zero_column/jobs', exp_key='exp1') log_file = os.path.join(flder, 'log.txt') objective = partial(bid_and_zero_optimizer.loss_for_batch, df=df, runs=runs, flder=flder, log_file=log_file) best = fmin(objective, space=space, algo=tpe.suggest, trials=trials, max_evals=10000) print print 'curr={}'.format(best) print 'best={}'.format(get_the_best_loss(trials))
def main(output_path): # Set up Hyperopt space = { 'max_depth': hp.quniform('max_depth', 13, 13, 1), 'num_leaves': hp.quniform('num_leaves', 50, 500, 50), 'bagging_fraction': hp.quniform('bagging_fraction', 0.5, 0.9, 0.05), 'feature_fraction': hp.quniform('feature_fraction', 0.25, 0.55, 0.05), 'min_data_in_leaf': hp.quniform('min_data_in_leaf', 100, 500, 50), 'lambda_l1': hp.loguniform('lambda_l1', -3, 2), 'lambda_l2': hp.loguniform('lambda_l2', -3, 2), } # trials = Trials() trials = MongoTrials('mongo://localhost:27017/allstate/jobs', exp_key='lightgbm_2') # Run optimization fmin(fn=evaluate_lightgbm, space=space, algo=tpe.suggest, max_evals=200, trials=trials) # Print output result = dumps(trials.trials) with open(output_path, 'w') as f: f.write(result)
def main(train_data, validation_data, test_data, trials_output, vw_args, max_evals, outer_loss_function, mongo, timeout): space = { '--ftrl_alpha': hp.loguniform('ftrl_alpha', log(1e-5), log(1e-1)), '--ftrl_beta': hp.uniform('ftrl_beta', 0.01, 1.), '--l1': hp.loguniform('l1', log(1e-8), log(1e-1)), '--l2': hp.loguniform('l2', log(1e-8), log(1e-1)), # '--passes': hp.quniform('passes', 1, 5, 1), # '--learning_rate': hp.loguniform('learning_rate', log(0.01), log(10)), # '--classweight 1:': hp.loguniform('classweight_pos', log(1), log(1000)), # '--classweight -1:': hp.loguniform('classweight_neg', log(0.001), log(1)), } trials = MongoTrials(mongo) if mongo else Trials() objective = Objective(train_data=train_data, validation_data=validation_data, test_data=test_data, vw_args=vw_args, outer_loss_function=outer_loss_function, timeout=timeout) search(space, objective, trials=trials, trials_output=trials_output, max_evals=max_evals) return 0
def main(): mongo_db_host = os.environ["MONGO_DB_HOST"] mongo_db_port = os.environ["MONGO_DB_PORT"] experiment_name = os.environ["EXPERIMENT_NAME"] mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format( mongo_db_host, mongo_db_port) while True: try: trials = MongoTrials(mongo_connect_str, exp_key=experiment_name) except ServerSelectionTimeoutError: pass else: space = {'x': hp.uniform('x', -2, 2)} best = fmin(obj, space=space, trials=trials, algo=tpe.suggest, max_evals=100) if os.environ["JOB_NAME"] == "ps": save_path = os.path.join(get_logs_path("./logs"), "results.json") with open(save_path, "w") as f: json.dump(json.dumps(best), f) return
def load_hp(hp_fname, host, port): if hp_fname is not None: hp = cPickle.load(open(hp_fname, 'r')) hp_space = hp['hp_space'] trials = hp['trials'] n_startup_jobs = hp['n_startup_jobs'] elif host is not None: trials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, int(port), mongo_dbname), exp_key=mongo_dbname, refresh=True) # -- retrieve hp_space and n_startup_jobs from trials attachment cmd0, cmd1 = trials.miscs[0]['cmd'] assert cmd0 == 'domain_attachment' blob = trials.attachments[cmd1] domain = cPickle.loads(blob) spec = hyperopt.base.spec_from_misc(trials.miscs[0]) memo = domain.memo_from_config(spec) argdict = hyperopt.pyll.rec_eval(domain.expr, memo=memo) hp_space = argdict['hp_space'] n_startup_jobs = argdict['n_startup_trials'] else: raise ValueError('No Pickle file nor MongoDB host informed') return hp_space, trials, n_startup_jobs
def do_test(runs): expiriment_name = 'bid_and_mngr_hcc_trials' gc_host = '35.187.46.132' df = load_train() space = { 'mngr_k': hp.quniform('mngr_k', 3, 50, 1), 'mngr_f': hp.loguniform('mngr_f', log(0.1), log(5)), 'mngr_n': hp.choice('mngr_n', [2, 3, 4, 5, 6, 7, 10]), 'bid_k': hp.quniform('bid_k', 3, 50, 1), 'bid_f': hp.loguniform('bid_f', log(0.1), log(5)), 'bid_n': hp.choice('bid_n', [2, 3, 4, 5, 6, 7, 10]) } db_path = 'mongo://{}:27017/{}/jobs'.format(gc_host, expiriment_name) trials = MongoTrials(db_path, exp_key='exp1') # trials = Trials() objective = partial(bid_and_mngr_optimizer.loss_for_batch, df=df, runs=runs) best = fmin(objective, space=space, algo=tpe.suggest, trials=trials, max_evals=10000) print print 'curr={}'.format(best) print 'best={}'.format(get_the_best_loss(trials))
def main(): mongo_db_host = os.environ["MONGO_DB_HOST"] mongo_db_port = os.environ["MONGO_DB_PORT"] experiment_name = os.environ.get("EXPERIMENT_NAME", 'cifar10-hyperopt') data_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd())) mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format( mongo_db_host, mongo_db_port) while True: try: logging.info( 'Launching MongoTrials for {}'.format(experiment_name)) trials = MongoTrials(mongo_connect_str, exp_key=experiment_name) except ServerSelectionTimeoutError: logging.warning('No MongoDB server is available for an operation') pass else: space = {'x': hp.uniform('x', -2, 2)} best = fmin(obj, space=space, trials=trials, algo=tpe.suggest, max_evals=100) if os.environ["TYPE"] == "ps": save_path = os.path.join(data_dir, "results.json") with open(save_path, "w") as f: logging.debug('Saving results.json to {}'.format(data_dir)) logging.info('Results: {}'.format((str(best)))) json.dump(json.dumps(best), f) return
def task2(msg): tpe_trials = MongoTrials('mongo://localhost:27018/foo_db/jobs', exp_key='exp1') opt_params = fmin(fn=objective, space=hyper_params_space, algo=tpe.suggest, max_evals=300, trials=tpe_trials, rstate=np.random.RandomState(100)) tpe_results = pd.DataFrame({ 'score': [x['loss'] for x in tpe_trials.results], 'timeperiod': tpe_trials.idxs_vals[1]['timeperiod'], 'nbdevup': tpe_trials.idxs_vals[1]['nbdevup'], 'nbdevdn': tpe_trials.idxs_vals[1]['nbdevdn'] }) tpe_results.sort_values(by=['score'], inplace=True) print(tpe_results.head(10)) print(opt_params) print(msg) print('task2 is running') return opt_params
def hyper_tune(train_model, hparam_def, algo=tpe.suggest, max_evals=25, func=fmin): """ Function to prepare and run hyper parameter tune. :param train_model: User model to tune :param hparam_def: User hyper tune param definition :param algo: Search algorithm :param max_evals: Allow up to this many function evaluations before returning :param func: function that will run hyper tune logic, by default hyperopt fmin function :return: None if there is no result or dict with result for tune """ mongo_connect_str = get_mongo_conn_str() is_connection_available = True while is_connection_available: try: trials = MongoTrials(mongo_connect_str, exp_key=_experiment_name()) except ServerSelectionTimeoutError: logger.warning( "Hyper Tune - MongoTrials server selection Timeout Error") is_connection_available = _hyper_tune_check() else: return func(train_model, space=hparam_def, trials=trials, algo=algo, max_evals=max_evals)
def main(): mongo_db_host = os.environ["MONGO_DB_HOST"] mongo_db_port = os.environ["MONGO_DB_PORT"] experiment_name = os.environ.get("EXPERIMENT_NAME", 'cifar10-hyperopt') data_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd())) mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format( mongo_db_host, mongo_db_port) while True: try: trials = MongoTrials(mongo_connect_str, exp_key=experiment_name) except ServerSelectionTimeoutError: pass else: space = { 'lr': hp.loguniform('lr', -10, 2), 'momentum': hp.uniform('momentum', 0.1, 0.9) } best = fmin(train_cifar10, space=space, trials=trials, algo=tpe.suggest, max_evals=25) if os.environ["TYPE"] == "worker": save_path = os.path.join(data_dir, "results.json") with open(save_path, "w") as f: logging.debug('Saving results.json to {}'.format(data_dir)) logging.info('Results: {}'.format((str(best)))) json.dump(json.dumps(best), f) return
def _fmin_parallel( queue: multiprocessing.Queue, fn: Callable, exp_key: str, space: dict, algo: Callable = tpe.suggest, max_evals: int = 100, show_progressbar: bool = False, mongo_port_address: str = "localhost:1234/scvi_db", ): """Launches a ``hyperopt`` minimization procedure. """ logger.debug("Instantiating trials object.") # instantiate Trials object trials = MongoTrials(as_mongo_str(os.path.join(mongo_port_address, "jobs")), exp_key=exp_key) # run hyperoptimization logger.debug("Calling fmin.") _ = fmin( fn=fn, space=space, algo=algo, max_evals=max_evals, trials=trials, show_progressbar=show_progressbar, ) logger.debug("fmin returned.") # queue.put uses pickle so remove attribute containing thread.lock if hasattr(trials, "handle"): logger.debug("Deleting Trial handle for pickling.") del trials.handle logger.debug("Putting Trials in Queue.") queue.put(trials)
def _fmin_parallel( queue: multiprocessing.Queue, fn: Callable, exp_key: str, space: dict, algo: Callable = tpe.suggest, max_evals: int = 100, fmin_timer: float = None, show_progressbar: bool = False, mongo_port_address: str = "localhost:1234/scvi_db", ): """Launches a ``hyperopt`` minimization procedure. """ logger.debug("Instantiating trials object.") # instantiate Trials object trials = MongoTrials( as_mongo_str(os.path.join(mongo_port_address, "jobs")), exp_key=exp_key ) # run hyperoptimization in another fork to enable the use of fmin_timer fmin_kwargs = { "fn": fn, "space": space, "algo": algo, "max_evals": max_evals, "trials": trials, "show_progressbar": show_progressbar, } fmin_thread = threading.Thread(target=fmin, kwargs=fmin_kwargs) logger.debug("Calling fmin.") # set fmin thread as daemon so it stops when the main process terminates fmin_thread.daemon = True fmin_thread.start() started_threads.append(fmin_thread) if fmin_timer: logging.debug( "Timer set, fmin will run for at most {timer}".format(timer=fmin_timer) ) start_time = time.monotonic() run_time = 0 while run_time < fmin_timer and fmin_thread.is_alive(): time.sleep(10) run_time = time.monotonic() - start_time else: logging.debug("No timer, waiting for fmin") while True: if not fmin_thread.is_alive(): break else: time.sleep(10) logger.debug("fmin returned or timer ran out.") # queue.put uses pickle so remove attribute containing thread.lock if hasattr(trials, "handle"): logger.debug("Deleting Trial handle for pickling.") del trials.handle logger.debug("Putting Trials in Queue.") queue.put(trials)
def hyperOptMain(max_evals, max_trials): '''Run the training using hyper optimization''' #('--num_point', type=int, default=1024, help='Point Number [256/512/1024/2048] [default: 1024]') #('--max_epoch', type=int, default=250, help='Epoch to run [default: 250]') #('--batch_size', type=int, default=32, help='Batch Size during training [default: 32]') #('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]') #('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]') #('--optimizer', default='adam', help='adam or momentum [default: adam]') #('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000] space = { #'num_points': hp.choice('num_points',[256,512,1024,2048]), 'batch_size': hp.choice('batch_size', [2, 4, 8, 16, 32]), #'learning_rate': hp.uniform('learning_rate', 0.01, 0.001), #'momentum': hp.uniform('momentum',0.1, 0.9), #'optimizer': hp.choice('optimizer',['adam','momentum']), #'decay_step': hp.uniform('decay_step',10000, 200000), #'decay_rate': hp.uniform('decay_rate',0.1, 0.7) } #max_evals = 3 #https://github.com/hyperopt/hyperopt/issues/267 # check if any trials file are given to continue hyperopt on trials = Trials() if TRIALS_PATH: trialFilePath = os.path.join(TRIALS_PATH, TRIALS_FILE_NAME) if os.path.exists(trialFilePath): with open(trialFilePath, "rb") as f: trials = pickle.load(f) log_string("Loaded trials.") #otherwise create a new one in the log directory prevTrialsCount = len(trials) if not prevTrialsCount: trialFilePath = os.path.join(LOG_DIR, TRIALS_FILE_NAME) if FLAGS.mongo_mode == 1: trials = MongoTrials('mongo://localhost:27017/hyperopt/jobs', exp_key='exp{}'.format(uuid.uuid4())) # https://github.com/hyperopt/hyperopt-sklearn/issues/80 # Changing the number of initial evaluations to 1 instead of the default 20 runs eval_runs = 0 for i in range(1, max_trials + 1): eval_runs = max_evals * i + prevTrialsCount #print ("max:{}, i:{} and prev count:{}".format(max_evals,i,prevTrialsCount)) best = fmin( main, space=space, algo=tpe. suggest, #partial(tpe.suggest, n_startup_jobs=1), #tpe.suggest, max_evals= eval_runs, #increase the eval count otherwise only previous runs will be used trials=trials) summarizeTrials(i, best, trials) with open(trialFilePath, "wb") as w: pickle.dump(trials, w) log_string("Written trials on run {}.".format(i))
def make_trials(host, port, exp_key, refresh=True): if (host, port) == (None, None): trials = Trials() else: trials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, int(port), dbname), exp_key=exp_key, refresh=refresh) return trials
def load_trials(trials_path, mongo_key=None, reset_trials=False): is_mongo = trials_path.startswith('mongo:') if isinstance(trials_path, str) else False if is_mongo: return MongoTrials(trials_path, exp_key=mongo_key) else: if trials_path is not None and os.path.isfile(trials_path) and not reset_trials: return pickle.load(open(trials_path, 'rb')) else: return Trials()
def target_func1(evals): trials = MongoTrials('mongo://localhost:27017/mongo_hpo/jobs', exp_key='exp1') best = fmin(math.sin, hp.uniform('x', -2, 2), trials=trials, algo=tpe.suggest, max_evals=evals) return best
def tune_parameters(): from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' mongo_url =\ 'mongo://localhost:1234/topicmodel_' +\ Constants.ITEM_TYPE + context_name + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ { Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), Constants.USE_CONTEXT_FIELD: True }, ]) best = fmin(run_recommender, space=space, algo=tpe.suggest, max_evals=1000, trials=trials) print('losses', sorted(trials.losses())) print('best', trials.best_trial['result'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
def main(): """ Performs hyperparameter optimization with hyperopt. It consists of three steps. First define a trials object connected to a mongo database where all the results will be stored. Secondly define a stochastic search space from which hyperopt will sample hyperparameter configurations. Thirdly define the define the objective function and run the minimization function. """ trials = MongoTrials('mongo://localhost:1234/otto-sqrt-pca-95-5/jobs', exp_key='15-11-03') #Search space space={ 'dense_part': {'num_units' : hp.quniform('DL1', 512, 2048, 512), 'more_layers' : {'num_units' : hp.quniform('DL2', 512, 2048, 512), 'more_layers' : hp.choice('MD2', [0, {'num_units' : hp.quniform('DL3', 512, 2048, 512), 'more_layers' : 0,} #DL3 ])},#DL2 },#DL1 'leakiness' : hp.choice('leak', [0, 0.01, 0.15] ), 'weight_init' : hp.choice('weight',['orto','uni']), 'input_dropout' : hp.quniform('p_in', 0.1, 0.4, 0.1), 'learning_rate': hp.choice('lr',[0.001,0.01,0.025,0.05,0.1]), } #Optimize best = fmin(objective, space=space, algo=tpe.suggest, max_evals=100, trials=trials) print(trials.losses()) print(best)
def hp_parallel(self): trials = MongoTrials('mongo://localhost:27017/foo_db/jobs', exp_key=self.task.id + str(random.getrandbits(64))) batch_size = self.n_parallel best_params = fmin(fn=self.hp_objective, space=self.task.hp_space, algo=tpe.suggest, max_evals=self.max_evals * batch_size, trials=trials) scores = [-t['result']['loss'] for t in trials.trials] print("hp parallel task: %s, best: %s, params: %s" % (self.task.id, max(scores), best_params)) search_path = trials.vals search_path['score'] = list(np.array(trials.losses()) * -1) return self.accumulate_max(scores, self.max_evals, batch_size), search_path
def train(air_model, train_epochs=20): """ Runs TPE black box optimization of the neural network to use. After evaluating all points, it saves the best model to disk and sets the status flag as TRAINED. """ from db import get_model, save_model from model import ModelStatus info('Running training on new process') air_model.status = ModelStatus.TRAINING save_model(air_model) fspace = { 'optimizer': hp.choice('optimzer', [ 'rmsprop', 'adagrad' ]), #NEQP (Supongo que si, pero es a proposito que diga 'optimzer'?) 'layers': hp.choice('layers', [(str(x), layer_choice(x)) for x in range(10)]) # Choose from 0 to 9 layers. } if config.DISTRIBUTED_HYPEROPT: # TODO: Probably not send all model from json. Just send the ids and make the worker fetch it from the DB. fspace['model_json'] = air_model.to_json() trials = MongoTrials('mongo://localhost:27017/testdb/jobs', exp_key='userid.trainingid', workdir='/home/paezand/pusher/bottle_air') best = fmin(fn=run_model_fn, space=fspace, trials=trials, algo=tpe.suggest, max_evals=train_epochs) # Run workers with # hyperopt-mongo-worker --mongo=$mongodbURL/testdb --poll-interval=0.1 --workdir=$bottle_air_dir else: trials = Trials( ) #NEQP (Checaste la opcion de hacer parallel search con MongoDB?) best = fmin(fn=air_model.run_model(), space=fspace, algo=tpe.suggest, max_evals=train_epochs, trials=trials) print 'best:', space_eval(fspace, best) print 'trials:' for trial in trials.trials[:2]: print trial model_fn = air_model.run_model(persist=True) model_fn(space_eval(fspace, best)) # Train and persist best model. print 'Training finished' air_model.status = ModelStatus.TRAINED air_model.best_model = best save_model(air_model)
def tune_parameters(): from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' mongo_url =\ 'mongo://localhost:1234/topicmodel_' +\ Constants.ITEM_TYPE + context_name + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ { Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), Constants.USE_CONTEXT_FIELD: True }, ]) best = fmin( run_recommender, space=space, algo=tpe.suggest, max_evals=1000, trials=trials) print('losses', sorted(trials.losses())) print( 'best', trials.best_trial['result'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
def make_trials(host, port, exp_key, refresh=True, dbname='dbname'): if (host, port) == (None, None): trials = Trials() else: if dbname == 'dbname': logger.warn('You probably want to override the default dbname') trials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, int(port), dbname), exp_key=exp_key, refresh=refresh) return trials
def create_mongo_trials(self, mongo_uri): jobs_col = MongoClient(mongo_uri)["netron"]["hyperopt_jobs"] last_job = jobs_col.find({}, {"exp_key": 1}).sort("exp_key", pymongo.DESCENDING).limit(1) last_job = list(last_job) if len(last_job) > 0: exp_key = int(last_job[0]["exp_key"]) + 1 else: exp_key = 0 print "Current experiment key is %s" % exp_key mongo_uri = mongo_uri + 'netron/hyperopt_jobs' return exp_key, MongoTrials(mongo_uri, exp_key=exp_key)
def run_fmin(self, online=True, upload=True, objective=objective_success, max_evals=3, wrap=None, **kwargs): project = 'hyperopt-integration-test' if wrap == 'mongo': trials = MongoTrials('mongo://mongodb:27017/foo_db/jobs', exp_key=str(uuid.uuid4())) elif wrap == 'spark': trials = SparkTrials() else: trials = None trials = SigOptTrials(project=project, online=(online and upload), trials=trials) try: best = fmin(objective, space={ 'x': hp.uniform('x', -10, 10), 'y': hp.uniform('y', -10, 10) }, algo=tpe.suggest, max_evals=max_evals, trials=trials, **kwargs) except hyperopt.exceptions.AllTrialsFailed: best = None if upload and not online: trials.upload() return trials, best
async def delete_gen(request, gen): if request.args.get('really', 'no') == 'yes': gen_trials = MongoTrials('mongo://localhost:1234/covid/jobs', f'covid-{gen}') gen_trials.refresh() gen_trials.delete_all() return redirect(f"/status/?refresh=true") return html(create_delete_prompt(f"GENERATION 'covid-{gen}'"))
def lfw_view2_randomL(host, dbname): trials = MongoTrials('mongo://%s:44556/%s/jobs' % (host, dbname), refresh=False) #B = main_lfw_driver(trials) #E = B.get_experiment(name=('random', 'foo')) mongo_trials = trials.view(exp_key=exp_keys['randomL'], refresh=True) docs = [d for d in mongo_trials.trials if d['result']['status'] == hyperopt.STATUS_OK] local_trials = hyperopt.trials_from_docs(docs) losses = local_trials.losses() best_doc = docs[np.argmin(losses)] #XXX: Potentially affected by the tid/injected jobs bug, # but unlikely. Rerun just in case once dual svm solver is in. print best_doc['spec'] namebase = '%s_randomL_%s' % (dbname, best_doc['tid']) get_view2_features( slm_desc=best_doc['spec']['model']['slm'], preproc=best_doc['spec']['model']['preproc'], comparison=best_doc['spec']['comparison'], namebase=namebase, basedir=os.getcwd(), ) namebases = [namebase] basedirs = [os.getcwd()] * len(namebases) #train_view2(namebases=namebases, basedirs=basedirs) # running on the try2 database # finds id 1674 #train err mean 0.0840740740741 #test err mean 0.199666666667 #running with libsvm: train_view2(namebases=namebases, basedirs=basedirs, use_libsvm={'kernel':'precomputed'})
def __init__(self, host, port, db, config_key=None, model_name=None): super(ProgressTrackerMongo, self).__init__(model_name=model_name, config_key=config_key) self.client = MongoClient(host, port) try: self.client.admin.command('ismaster') except ConnectionFailure: print("Server not available") raise ConnectionFailure self.state['trials'] = MongoTrials('mongo://%s:%d/%s/jobs' % (host, port, db), exp_key=self.model_name) db = self.client[db] self.mongo_collection = db.results
def main(hyperparameter_search_args, tqdm=tqdm, fmin_kwargs=None): if fmin_kwargs is None: fmin_kwargs = {} search_dir = hyperparameter_search_args.search_dir hyperparameter_search_args.to_json_file( os.path.join(search_dir, HYPERPARAMETER_SEARCH_ARGS_FILENAME)) hyperopt_space, constant_params = read_config(search_dir) rotation = hyperparameter_search_args.rotation base_dir = os.path.join(search_dir, str(rotation)) already_existed = os.path.exists(base_dir) and len( os.listdir(base_dir)) > 1 if not os.path.isdir(base_dir): os.makedirs(base_dir) objective = ObjectiveFntr( base_dir, rotation, constant_params, tqdm, single_task=hyperparameter_search_args.single_task_search, do_match_train_windows=hyperparameter_search_args. do_match_train_windows, do_eicu=hyperparameter_search_args.do_eicu, ) algo = HP_ALGS[hyperparameter_search_args.algo] if hyperparameter_search_args.do_use_mongo: mongo_addr = '{base}/{db}/jobs'.format( base=hyperparameter_search_args.mongo_addr, db=hyperparameter_search_args.mongo_db) print("Parallelizing search via Mongo DB: %s" % mongo_addr) trials = MongoTrials(mongo_addr, exp_key=hyperparameter_search_args.mongo_exp_key) elif already_existed: _, _, _, _, trials = read_or_recreate_trials(search_dir, tqdm=tqdm) trials = trials[str(rotation)] else: trials = Trials() best = fmin(objective, space=hyperopt_space, algo=algo, max_evals=hyperparameter_search_args.max_evals, trials=trials, **fmin_kwargs) return trials
def tune_parameters(): # trials = Trials() from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' cycle = '_' + str(Constants.NESTED_CROSS_VALIDATION_CYCLE) mongo_url =\ 'mongo://localhost:1234/' +\ Constants.ITEM_TYPE + context_name + '_db_nested' + cycle + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) params = Constants.get_properties_copy() params.update({ Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, Constants.TOPN_NUM_ITEMS_FIELD: Constants.TOPN_NUM_ITEMS, Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD: Constants.NESTED_CROSS_VALIDATION_CYCLE, # 'fm_init_stdev': hp.uniform('fm_init_stdev', 0, 2), Constants.FM_ITERATIONS_FIELD: hp.quniform( Constants.FM_ITERATIONS_FIELD, 1, 500, 1), Constants.FM_NUM_FACTORS_FIELD: hp.quniform( Constants.FM_NUM_FACTORS_FIELD, 0, 200, 1), # 'fm_use_1way_interactions': hp.choice('fm_use_1way_interactions', [True, False]), # 'fm_use_bias': hp.choice('use_bias', [True, False]), # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), # Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( # Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), # Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( # Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.choice( # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, # [10, 20, 30, 50, 75, 100, 150, 300]), # Constants.TOPIC_MODEL_TYPE_FIELD: hp.choice( # Constants.TOPIC_MODEL_TYPE_FIELD, ['lda', 'mnf']), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), # 'use_no_context_topics_sum': hp.choice( # 'use_no_context_topics_sum', [True, False]), Constants.USE_CONTEXT_FIELD: Constants.USE_CONTEXT }) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ params, ]) if not Constants.USE_CONTEXT: unwanted_args = [ Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, Constants.TOPIC_MODEL_ITERATIONS_FIELD, Constants.TOPIC_MODEL_PASSES_FIELD, Constants.TOPIC_MODEL_NUM_TOPICS_FIELD ] for element in space.pos_args[1].named_args[:]: if element[0] in unwanted_args: space.pos_args[1].named_args.remove(element) # best = fmin( # run_recommender, space=space, algo=tpe.suggest, # max_evals=100, trials=trials) print('losses', sorted(trials.losses())) print( 'best', trials.best_trial['result']['loss'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
P_hi = 0.9 P_lo = 0.9 V_hi = 10 V_lo = 20 threads = 8 date_time_string = str(datetime.datetime.now()).split('.')[0] date_time_string = reduce(lambda y,z: string.replace(y,z,"_"), [date_time_string,":", " ","-"]) log_file_prefix = def_logging_dir + "temp_" + date_time_string objective = make_objective(nengo_path, def_results_dir, log_file_prefix, date_time_string, D, num_neurons, neurons_per_dim, num_vectors, trial_length, learning_pres, testing_pres, clean_learning, learning_noise,testing_noise, num_runs, P_hi, P_lo, V_hi, V_lo, threads, dry_run) if use_mongo: trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key=exp_key) worker_call_string = \ ["hyperopt-mongo-worker", "--mongo=localhost:1234/first_try", "--max-consecutive-failures","1", "--reserve-timeout", "15.0", "--workdir",def_results_dir, ] print worker_call_string workers = [] for i in range(num_mongo_workers): #using Popen causes the processes to run in the background p = subprocess.Popen(worker_call_string) workers.append(p)
def work(self): """ Run a small experiment with several workers running in parallel using Python threads. """ n_threads = self.n_threads jobs_per_thread = self.jobs_per_thread n_trials_per_exp = n_threads * jobs_per_thread n_trials_total = n_trials_per_exp * len(self.exp_keys) with TempMongo() as tm: mj = tm.mongo_jobs('foodb') def newth(ii): n_jobs = jobs_per_thread * len(self.exp_keys) return threading.Thread( target=self.worker_thread_fn, args=(('hostname', ii), n_jobs, 30.0)) threads = map(newth, range(n_threads)) [th.start() for th in threads] exp_list = [] trials_list = [] try: for key in self.exp_keys: print 'running experiment' trials = MongoTrials(tm.connection_string('foodb'), key) assert len(trials) == 0 if hasattr(self, 'prep_trials'): self.prep_trials(trials) bandit = self.bandit if self.use_stop: bandit_algo = RandomStop(n_threads * jobs_per_thread, self.bandit, cmd=self.cmd) print bandit_algo exp = Experiment(trials, bandit_algo, max_queue_len=1) exp.run(sys.maxint, block_until_done=False) else: bandit_algo = Random(self.bandit, cmd=self.cmd) exp = Experiment(trials, bandit_algo, max_queue_len=10000) exp.run(n_threads * jobs_per_thread, block_until_done=(len(self.exp_keys) == 1)) exp_list.append(exp) trials_list.append(trials) finally: print 'joining worker thread...' [th.join() for th in threads] for exp in exp_list: exp.block_until_done() for trials in trials_list: assert trials.count_by_state_synced(JOB_STATE_DONE)\ == n_trials_per_exp, (trials.count_by_state_synced(JOB_STATE_DONE), n_trials_per_exp) assert trials.count_by_state_unsynced(JOB_STATE_DONE)\ == n_trials_per_exp assert len(trials) == n_trials_per_exp, ( 'trials failure %d %d ' % (len(trials) , n_trials_per_exp)) assert len(trials.results) == n_trials_per_exp, ( 'results failure %d %d ' % (len(trials.results), n_trials_per_exp)) all_trials = MongoTrials(tm.connection_string('foodb')) assert len(all_trials) == n_trials_total
while line.find('Trials:') < 0: line = f.readline() trials = line results = f.readline() losses = f.readline() statuses = f.readline() f.close() trials = trials.split('Trials: ')[1] exec "trials=" + trials return trials if __name__=="__main__": trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key='big_run3') x_name = 'test_bias' y_name = 'learn_bias' z_name = 'learning_rate' x = trials.vals[x_name] y = trials.vals[y_name] z = trials.vals[z_name] w = trials.losses() indices = filter(lambda x: w[x] < 1, range(len(w))) x = [x[i] for i in indices] y = [y[i] for i in indices] z = [z[i] for i in indices] w = [w[i] for i in indices]