예제 #1
0
def test_trial_attachments():

    exp_key = "A"
    with TempMongo() as tm:
        mj = tm.mongo_jobs("foo")
        trials = MongoTrials(tm.connection_string("foo"), exp_key=exp_key)

        space = hp.uniform("x", -10, 10)
        max_evals = 3
        fmin_thread = threading.Thread(target=fmin_thread_fn,
                                       args=(space, trials, max_evals))
        fmin_thread.start()

        mw = MongoWorker(mj=mj, logfilename=None, workdir="mongoexp_test_dir")
        n_jobs = max_evals
        while n_jobs:
            try:
                mw.run_one("hostname", 10.0, erase_created_workdir=True)
                print("worker: ran job")
            except Exception as exc:
                print(f"worker: encountered error : {str(exc)}")
                traceback.print_exc()
            n_jobs -= 1
        fmin_thread.join()
        all_trials = MongoTrials(tm.connection_string("foo"))

        assert len(all_trials) == max_evals
        assert trials.count_by_state_synced(JOB_STATE_DONE) == max_evals
        assert trials.count_by_state_unsynced(JOB_STATE_DONE) == max_evals
예제 #2
0
def run_optimization(level=1):
    print(f"Optimizing at level {level}")

    set_random_seeds(4)

    next_lvl_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                  exp_key=f'covid-{level+1}')
    if len(next_lvl_trials.trials) > 0:
        print(f"Already completed level {level} -- skipping")
        return

    exp_key = f'covid-{level}'

    trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=exp_key)

    suggestion_box = hyperopt.tpe.suggest

    if level == 1:
        max_evals = LEVEL_DEFS[0][1]
        depth = 1

    elif level > 1:
        depth, new_budget, extend_budget = LEVEL_DEFS[level - 1]
        last_depth, _, _ = LEVEL_DEFS[level - 2]

        # Minimum one per node for the expensive ones -- no point wasting compute time
        num_new = int(np.ceil((new_budget / depth) / NUM_NODES) * NUM_NODES)

        if len(trials.trials) == 0:
            print("Generating estimates from previous level")
            result_docs = configure_next_level(level, depth, extend_budget)
            num_to_extend = len(result_docs)

            suggestion_box = create_suggestion_box(result_docs)

        last_level_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                        exp_key=f'covid-{level-1}')
        prev_level_count = len(
            [x for x in last_level_trials.losses() if x is not None])

        max_evals = prev_level_count + num_new
        trials.refresh()

    objective = functools.partial(test_parameterization, num_epochs=depth)

    if len([x for x in trials.statuses() if x == 'ok']) >= max_evals:
        print(f"Already completed level {level} -- skipping")
    else:
        best = hyperopt.fmin(objective,
                             space=SEARCH_SPACE,
                             algo=suggestion_box,
                             max_evals=max_evals,
                             trials=trials)

        print(best)
예제 #3
0
 def run_fmin(self,
              online=True,
              upload=True,
              objective=objective_success,
              max_evals=3,
              wrap=None,
              **kwargs):
     project = 'hyperopt-integration-test'
     if wrap == 'mongo':
         trials = MongoTrials('mongo://mongodb:27017/foo_db/jobs',
                              exp_key=str(uuid.uuid4()))
     elif wrap == 'spark':
         trials = SparkTrials()
     else:
         trials = None
     trials = SigOptTrials(project=project,
                           online=(online and upload),
                           trials=trials)
     try:
         best = fmin(objective,
                     space={
                         'x': hp.uniform('x', -10, 10),
                         'y': hp.uniform('y', -10, 10)
                     },
                     algo=tpe.suggest,
                     max_evals=max_evals,
                     trials=trials,
                     **kwargs)
     except hyperopt.exceptions.AllTrialsFailed:
         best = None
     if upload and not online:
         trials.upload()
     return trials, best
예제 #4
0
def load_hp(hp_fname, host, port):

    if hp_fname is not None:

        hp = cPickle.load(open(hp_fname, 'r'))
        hp_space = hp['hp_space']
        trials = hp['trials']
        n_startup_jobs = hp['n_startup_jobs']

    elif host is not None:

        trials = MongoTrials('mongo://%s:%d/%s/jobs' %
                             (host, int(port), mongo_dbname),
                             exp_key=mongo_dbname,
                             refresh=True)

        # -- retrieve hp_space and n_startup_jobs from trials attachment
        cmd0, cmd1 = trials.miscs[0]['cmd']
        assert cmd0 == 'domain_attachment'
        blob = trials.attachments[cmd1]
        domain = cPickle.loads(blob)
        spec = hyperopt.base.spec_from_misc(trials.miscs[0])

        memo = domain.memo_from_config(spec)
        argdict = hyperopt.pyll.rec_eval(domain.expr, memo=memo)

        hp_space = argdict['hp_space']
        n_startup_jobs = argdict['n_startup_trials']

    else:
        raise ValueError('No Pickle file nor MongoDB host informed')

    return hp_space, trials, n_startup_jobs
예제 #5
0
def _fmin_parallel(
    queue: multiprocessing.Queue,
    fn: Callable,
    exp_key: str,
    space: dict,
    algo: Callable = tpe.suggest,
    max_evals: int = 100,
    show_progressbar: bool = False,
    mongo_port_address: str = "localhost:1234/scvi_db",
):
    """Launches a ``hyperopt`` minimization procedure.
    """
    logger.debug("Instantiating trials object.")
    # instantiate Trials object
    trials = MongoTrials(as_mongo_str(os.path.join(mongo_port_address,
                                                   "jobs")),
                         exp_key=exp_key)

    # run hyperoptimization
    logger.debug("Calling fmin.")
    _ = fmin(
        fn=fn,
        space=space,
        algo=algo,
        max_evals=max_evals,
        trials=trials,
        show_progressbar=show_progressbar,
    )
    logger.debug("fmin returned.")
    # queue.put uses pickle so remove attribute containing thread.lock
    if hasattr(trials, "handle"):
        logger.debug("Deleting Trial handle for pickling.")
        del trials.handle
    logger.debug("Putting Trials in Queue.")
    queue.put(trials)
예제 #6
0
def do_test(runs):
    expiriment_name = 'bid_and_mngr_hcc_trials'
    gc_host = '35.187.46.132'

    df = load_train()
    space = {
        'mngr_k': hp.quniform('mngr_k', 3, 50, 1),
        'mngr_f': hp.loguniform('mngr_f', log(0.1), log(5)),
        'mngr_n': hp.choice('mngr_n', [2, 3, 4, 5, 6, 7, 10]),
        'bid_k': hp.quniform('bid_k', 3, 50, 1),
        'bid_f': hp.loguniform('bid_f', log(0.1), log(5)),
        'bid_n': hp.choice('bid_n', [2, 3, 4, 5, 6, 7, 10])
    }

    db_path = 'mongo://{}:27017/{}/jobs'.format(gc_host, expiriment_name)
    trials = MongoTrials(db_path, exp_key='exp1')

    # trials = Trials()

    objective = partial(bid_and_mngr_optimizer.loss_for_batch,
                        df=df,
                        runs=runs)
    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=10000)

    print
    print 'curr={}'.format(best)
    print 'best={}'.format(get_the_best_loss(trials))
def main(output_path):

    # Set up Hyperopt

    space = {
        'max_depth': hp.quniform('max_depth', 13, 13, 1),
        'num_leaves': hp.quniform('num_leaves', 50, 500, 50),
        'bagging_fraction': hp.quniform('bagging_fraction', 0.5, 0.9, 0.05),
        'feature_fraction': hp.quniform('feature_fraction', 0.25, 0.55, 0.05),
        'min_data_in_leaf': hp.quniform('min_data_in_leaf', 100, 500, 50),
        'lambda_l1': hp.loguniform('lambda_l1', -3, 2),
        'lambda_l2': hp.loguniform('lambda_l2', -3, 2),
    }

    # trials = Trials()
    trials = MongoTrials('mongo://localhost:27017/allstate/jobs',
                         exp_key='lightgbm_2')

    # Run optimization

    fmin(fn=evaluate_lightgbm,
         space=space,
         algo=tpe.suggest,
         max_evals=200,
         trials=trials)

    # Print output

    result = dumps(trials.trials)
    with open(output_path, 'w') as f:
        f.write(result)
def main(train_data, validation_data, test_data, trials_output, vw_args,
         max_evals, outer_loss_function, mongo, timeout):
    space = {
        '--ftrl_alpha': hp.loguniform('ftrl_alpha', log(1e-5), log(1e-1)),
        '--ftrl_beta': hp.uniform('ftrl_beta', 0.01, 1.),
        '--l1': hp.loguniform('l1', log(1e-8), log(1e-1)),
        '--l2': hp.loguniform('l2', log(1e-8), log(1e-1)),
        # '--passes': hp.quniform('passes', 1, 5, 1),
        # '--learning_rate': hp.loguniform('learning_rate', log(0.01), log(10)),
        # '--classweight 1:': hp.loguniform('classweight_pos', log(1), log(1000)),
        # '--classweight -1:': hp.loguniform('classweight_neg', log(0.001), log(1)),
    }

    trials = MongoTrials(mongo) if mongo else Trials()

    objective = Objective(train_data=train_data,
                          validation_data=validation_data,
                          test_data=test_data,
                          vw_args=vw_args,
                          outer_loss_function=outer_loss_function,
                          timeout=timeout)

    search(space,
           objective,
           trials=trials,
           trials_output=trials_output,
           max_evals=max_evals)

    return 0
예제 #9
0
def main():
    mongo_db_host = os.environ["MONGO_DB_HOST"]
    mongo_db_port = os.environ["MONGO_DB_PORT"]
    experiment_name = os.environ.get("EXPERIMENT_NAME", 'cifar10-hyperopt')
    data_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd()))

    mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format(
        mongo_db_host, mongo_db_port)

    while True:
        try:
            trials = MongoTrials(mongo_connect_str, exp_key=experiment_name)
        except ServerSelectionTimeoutError:
            pass
        else:
            space = {
                'lr': hp.loguniform('lr', -10, 2),
                'momentum': hp.uniform('momentum', 0.1, 0.9)
            }
            best = fmin(train_cifar10,
                        space=space,
                        trials=trials,
                        algo=tpe.suggest,
                        max_evals=25)

            if os.environ["TYPE"] == "worker":
                save_path = os.path.join(data_dir, "results.json")
                with open(save_path, "w") as f:
                    logging.debug('Saving results.json to {}'.format(data_dir))
                    logging.info('Results: {}'.format((str(best))))
                    json.dump(json.dumps(best), f)
            return
예제 #10
0
def task2(msg):
    tpe_trials = MongoTrials('mongo://localhost:27018/foo_db/jobs',
                             exp_key='exp1')
    opt_params = fmin(fn=objective,
                      space=hyper_params_space,
                      algo=tpe.suggest,
                      max_evals=300,
                      trials=tpe_trials,
                      rstate=np.random.RandomState(100))
    tpe_results = pd.DataFrame({
        'score': [x['loss'] for x in tpe_trials.results],
        'timeperiod':
        tpe_trials.idxs_vals[1]['timeperiod'],
        'nbdevup':
        tpe_trials.idxs_vals[1]['nbdevup'],
        'nbdevdn':
        tpe_trials.idxs_vals[1]['nbdevdn']
    })
    tpe_results.sort_values(by=['score'], inplace=True)

    print(tpe_results.head(10))
    print(opt_params)
    print(msg)
    print('task2 is running')
    return opt_params
예제 #11
0
def main():
    mongo_db_host = os.environ["MONGO_DB_HOST"]
    mongo_db_port = os.environ["MONGO_DB_PORT"]
    experiment_name = os.environ.get("EXPERIMENT_NAME", 'cifar10-hyperopt')
    data_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd()))

    mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format(
        mongo_db_host, mongo_db_port)

    while True:
        try:
            logging.info(
                'Launching MongoTrials for {}'.format(experiment_name))
            trials = MongoTrials(mongo_connect_str, exp_key=experiment_name)
        except ServerSelectionTimeoutError:
            logging.warning('No MongoDB server is available for an operation')
            pass
        else:
            space = {'x': hp.uniform('x', -2, 2)}
            best = fmin(obj,
                        space=space,
                        trials=trials,
                        algo=tpe.suggest,
                        max_evals=100)

            if os.environ["TYPE"] == "ps":
                save_path = os.path.join(data_dir, "results.json")
                with open(save_path, "w") as f:
                    logging.debug('Saving results.json to {}'.format(data_dir))
                    logging.info('Results: {}'.format((str(best))))
                    json.dump(json.dumps(best), f)
            return
def main():
    mongo_db_host = os.environ["MONGO_DB_HOST"]
    mongo_db_port = os.environ["MONGO_DB_PORT"]
    experiment_name = os.environ["EXPERIMENT_NAME"]

    mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format(
        mongo_db_host, mongo_db_port)

    while True:
        try:
            trials = MongoTrials(mongo_connect_str, exp_key=experiment_name)
        except ServerSelectionTimeoutError:
            pass
        else:
            space = {'x': hp.uniform('x', -2, 2)}
            best = fmin(obj,
                        space=space,
                        trials=trials,
                        algo=tpe.suggest,
                        max_evals=100)

            if os.environ["JOB_NAME"] == "ps":
                save_path = os.path.join(get_logs_path("./logs"),
                                         "results.json")
                with open(save_path, "w") as f:
                    json.dump(json.dumps(best), f)

            return
예제 #13
0
def do_test(runs, flder):
    df = load_train()
    space = {
        'k': hp.qnormal('k', 25, 10, 1),
        'f': hp.loguniform('f', log(0.1), log(5))
    }
    trials = MongoTrials(
        'mongo://10.20.0.144:27017/bid_exp_family_and_zero_column/jobs',
        exp_key='exp1')

    log_file = os.path.join(flder, 'log.txt')
    objective = partial(bid_and_zero_optimizer.loss_for_batch,
                        df=df,
                        runs=runs,
                        flder=flder,
                        log_file=log_file)
    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=10000)

    print
    print 'curr={}'.format(best)
    print 'best={}'.format(get_the_best_loss(trials))
예제 #14
0
def hyper_tune(train_model,
               hparam_def,
               algo=tpe.suggest,
               max_evals=25,
               func=fmin):
    """
    Function to prepare and run hyper parameter tune.

    :param train_model: User model to tune
    :param hparam_def: User hyper tune param definition
    :param algo: Search algorithm
    :param max_evals: Allow up to this many function evaluations before returning
    :param func: function that will run hyper tune logic, by default hyperopt fmin function

    :return: None if there is no result or dict with result for tune
    """
    mongo_connect_str = get_mongo_conn_str()
    is_connection_available = True

    while is_connection_available:
        try:
            trials = MongoTrials(mongo_connect_str, exp_key=_experiment_name())
        except ServerSelectionTimeoutError:
            logger.warning(
                "Hyper Tune - MongoTrials server selection Timeout Error")
            is_connection_available = _hyper_tune_check()
        else:
            return func(train_model,
                        space=hparam_def,
                        trials=trials,
                        algo=algo,
                        max_evals=max_evals)
예제 #15
0
def main():
    sys.path.insert(0, PATH_OPT_CONFIGS)
    args = parse_args()
    with open(args.default_config, 'r') as stream:
        default_config = yaml.load(stream, Loader=yaml.FullLoader)
    default_config = replace_path_config(default_config)
    space = __import__(args.space_config[:-3]).space

    exp = ExperimentRunner(default_config=default_config,
                           results_dir=os.path.abspath(args.results_dir),
                           tmp_dir=os.path.abspath(args.tmp_dir),
                           python_path=sys.executable,
                           train_path=os.path.abspath(
                               os.path.join(os.getcwd(), "..", "src",
                                            "train.py")),
                           debug=args.debug,
                           num_gpus=args.num_gpus)

    trials = MongoTrials('mongo://localhost:1234/tweet_sent/jobs',
                         exp_key=args.exp_name)
    best = fmin(exp,
                space,
                trials=trials,
                algo=tpe.suggest,
                max_evals=args.num_trials)
예제 #16
0
async def delete_gen(request, gen):
    if request.args.get('really', 'no') == 'yes':
        gen_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                 f'covid-{gen}')
        gen_trials.refresh()
        gen_trials.delete_all()
        return redirect(f"/status/?refresh=true")
    return html(create_delete_prompt(f"GENERATION 'covid-{gen}'"))
예제 #17
0
def _refresh_trials():
    global TRIALS, TRIALS_REFRESHED

    if TRIALS is None:
        TRIALS = MongoTrials('mongo://localhost:1234/covid/jobs')

    TRIALS_REFRESHED = datetime.now()
    TRIALS.refresh()
def target_func1(evals):
    trials = MongoTrials('mongo://localhost:27017/mongo_hpo/jobs',
                         exp_key='exp1')
    best = fmin(math.sin,
                hp.uniform('x', -2, 2),
                trials=trials,
                algo=tpe.suggest,
                max_evals=evals)
    return best
예제 #19
0
def _fmin_parallel(
    queue: multiprocessing.Queue,
    fn: Callable,
    exp_key: str,
    space: dict,
    algo: Callable = tpe.suggest,
    max_evals: int = 100,
    fmin_timer: float = None,
    show_progressbar: bool = False,
    mongo_port_address: str = "localhost:1234/scvi_db",
):
    """Launches a ``hyperopt`` minimization procedure.
    """
    logger.debug("Instantiating trials object.")
    # instantiate Trials object
    trials = MongoTrials(
        as_mongo_str(os.path.join(mongo_port_address, "jobs")), exp_key=exp_key
    )

    # run hyperoptimization in another fork to enable the use of fmin_timer
    fmin_kwargs = {
        "fn": fn,
        "space": space,
        "algo": algo,
        "max_evals": max_evals,
        "trials": trials,
        "show_progressbar": show_progressbar,
    }
    fmin_thread = threading.Thread(target=fmin, kwargs=fmin_kwargs)
    logger.debug("Calling fmin.")
    # set fmin thread as daemon so it stops when the main process terminates
    fmin_thread.daemon = True
    fmin_thread.start()
    started_threads.append(fmin_thread)
    if fmin_timer:
        logging.debug(
            "Timer set, fmin will run for at most {timer}".format(timer=fmin_timer)
        )
        start_time = time.monotonic()
        run_time = 0
        while run_time < fmin_timer and fmin_thread.is_alive():
            time.sleep(10)
            run_time = time.monotonic() - start_time
    else:
        logging.debug("No timer, waiting for fmin")
        while True:
            if not fmin_thread.is_alive():
                break
            else:
                time.sleep(10)
    logger.debug("fmin returned or timer ran out.")
    # queue.put uses pickle so remove attribute containing thread.lock
    if hasattr(trials, "handle"):
        logger.debug("Deleting Trial handle for pickling.")
        del trials.handle
    logger.debug("Putting Trials in Queue.")
    queue.put(trials)
예제 #20
0
def hyperOptMain(max_evals, max_trials):
    '''Run the training using hyper optimization'''
    #('--num_point', type=int, default=1024, help='Point Number [256/512/1024/2048] [default: 1024]')
    #('--max_epoch', type=int, default=250, help='Epoch to run [default: 250]')
    #('--batch_size', type=int, default=32, help='Batch Size during training [default: 32]')
    #('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')
    #('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]')
    #('--optimizer', default='adam', help='adam or momentum [default: adam]')
    #('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]
    space = {
        #'num_points': hp.choice('num_points',[256,512,1024,2048]),
        'batch_size': hp.choice('batch_size', [2, 4, 8, 16, 32]),
        #'learning_rate': hp.uniform('learning_rate', 0.01, 0.001),
        #'momentum': hp.uniform('momentum',0.1, 0.9),
        #'optimizer': hp.choice('optimizer',['adam','momentum']),
        #'decay_step': hp.uniform('decay_step',10000, 200000),
        #'decay_rate': hp.uniform('decay_rate',0.1, 0.7)
    }
    #max_evals = 3

    #https://github.com/hyperopt/hyperopt/issues/267
    # check if any trials file are given to continue hyperopt on
    trials = Trials()
    if TRIALS_PATH:
        trialFilePath = os.path.join(TRIALS_PATH, TRIALS_FILE_NAME)
        if os.path.exists(trialFilePath):
            with open(trialFilePath, "rb") as f:
                trials = pickle.load(f)
                log_string("Loaded trials.")
    #otherwise create a new one in the log directory
    prevTrialsCount = len(trials)
    if not prevTrialsCount:
        trialFilePath = os.path.join(LOG_DIR, TRIALS_FILE_NAME)

    if FLAGS.mongo_mode == 1:
        trials = MongoTrials('mongo://localhost:27017/hyperopt/jobs',
                             exp_key='exp{}'.format(uuid.uuid4()))

    # https://github.com/hyperopt/hyperopt-sklearn/issues/80
    # Changing the number of initial evaluations to 1 instead of the default 20 runs
    eval_runs = 0
    for i in range(1, max_trials + 1):
        eval_runs = max_evals * i + prevTrialsCount
        #print ("max:{}, i:{} and prev count:{}".format(max_evals,i,prevTrialsCount))
        best = fmin(
            main,
            space=space,
            algo=tpe.
            suggest,  #partial(tpe.suggest, n_startup_jobs=1), #tpe.suggest,
            max_evals=
            eval_runs,  #increase the eval count otherwise only previous runs will be used
            trials=trials)

        summarizeTrials(i, best, trials)
        with open(trialFilePath, "wb") as w:
            pickle.dump(trials, w)
            log_string("Written trials on run {}.".format(i))
예제 #21
0
def make_trials(host, port, exp_key, refresh=True):
    if (host, port) == (None, None):
        trials = Trials()
    else:
        trials = MongoTrials('mongo://%s:%d/%s/jobs' %
                             (host, int(port), dbname),
                             exp_key=exp_key,
                             refresh=refresh)
    return trials
예제 #22
0
def load_trials(trials_path, mongo_key=None, reset_trials=False):
    is_mongo = trials_path.startswith('mongo:') if isinstance(trials_path, str) else False
    if is_mongo:
        return MongoTrials(trials_path, exp_key=mongo_key)
    else:
        if trials_path is not None and os.path.isfile(trials_path) and not reset_trials:
            return pickle.load(open(trials_path, 'rb'))
        else:
            return Trials()
예제 #23
0
def train(air_model, train_epochs=20):
    """ Runs TPE black box optimization of the neural network to use.
  After evaluating all points, it saves the best model to disk and sets the status flag as TRAINED.
  """
    from db import get_model, save_model
    from model import ModelStatus
    info('Running training on new process')
    air_model.status = ModelStatus.TRAINING
    save_model(air_model)

    fspace = {
        'optimizer':
        hp.choice('optimzer', [
            'rmsprop', 'adagrad'
        ]),  #NEQP (Supongo que si, pero es a proposito que diga 'optimzer'?)
        'layers':
        hp.choice('layers', [(str(x), layer_choice(x))
                             for x in range(10)])  # Choose from 0 to 9 layers.
    }

    if config.DISTRIBUTED_HYPEROPT:
        # TODO: Probably not send all model from json. Just send the ids and make the worker fetch it from the DB.
        fspace['model_json'] = air_model.to_json()
        trials = MongoTrials('mongo://localhost:27017/testdb/jobs',
                             exp_key='userid.trainingid',
                             workdir='/home/paezand/pusher/bottle_air')
        best = fmin(fn=run_model_fn,
                    space=fspace,
                    trials=trials,
                    algo=tpe.suggest,
                    max_evals=train_epochs)
        # Run workers with
        # hyperopt-mongo-worker --mongo=$mongodbURL/testdb --poll-interval=0.1 --workdir=$bottle_air_dir
    else:
        trials = Trials(
        )  #NEQP (Checaste la opcion de hacer parallel search con MongoDB?)
        best = fmin(fn=air_model.run_model(),
                    space=fspace,
                    algo=tpe.suggest,
                    max_evals=train_epochs,
                    trials=trials)

    print 'best:', space_eval(fspace, best)

    print 'trials:'
    for trial in trials.trials[:2]:
        print trial

    model_fn = air_model.run_model(persist=True)
    model_fn(space_eval(fspace, best))  # Train and persist best model.

    print 'Training finished'
    air_model.status = ModelStatus.TRAINED
    air_model.best_model = best
    save_model(air_model)
예제 #24
0
def make_trials(host, port, exp_key, refresh=True, dbname='dbname'):
    if (host, port) == (None, None):
        trials = Trials()
    else:
        if dbname == 'dbname':
            logger.warn('You probably want to override the default dbname')
        trials = MongoTrials('mongo://%s:%d/%s/jobs' %
                             (host, int(port), dbname),
                             exp_key=exp_key,
                             refresh=refresh)
    return trials
예제 #25
0
    def create_mongo_trials(self, mongo_uri):
        jobs_col = MongoClient(mongo_uri)["netron"]["hyperopt_jobs"]
        last_job = jobs_col.find({}, {"exp_key": 1}).sort("exp_key", pymongo.DESCENDING).limit(1)
        last_job = list(last_job)
        if len(last_job) > 0:
            exp_key = int(last_job[0]["exp_key"]) + 1
        else:
            exp_key = 0
        print "Current experiment key is %s" % exp_key

        mongo_uri = mongo_uri + 'netron/hyperopt_jobs'
        return exp_key, MongoTrials(mongo_uri, exp_key=exp_key)
예제 #26
0
 def __init__(self, host, port, db, config_key=None, model_name=None):
     super(ProgressTrackerMongo, self).__init__(model_name=model_name,
                                                config_key=config_key)
     self.client = MongoClient(host, port)
     try:
         self.client.admin.command('ismaster')
     except ConnectionFailure:
         print("Server not available")
         raise ConnectionFailure
     self.state['trials'] = MongoTrials('mongo://%s:%d/%s/jobs' %
                                        (host, port, db),
                                        exp_key=self.model_name)
     db = self.client[db]
     self.mongo_collection = db.results
예제 #27
0
def tune_hyperparams(
    task_type,
    dataset,
    model,
    train_data,
    tuning_step_size,
    max_tuning_time,
    max_trials_wo_improvement,
    tuning_step_max_time,
    mongo_address,
):
    kfold, train_data = create_kfold(task_type, dataset, train_data)
    objective_fct = create_tuning_objective(dataset, model, train_data, kfold)

    # No tuning for models without hyper-parameters
    is_model_tunable = hasattr(model, "hp_space")
    if not is_model_tunable:
        loss = objective_fct(None)
        print(f"Resulting {dataset.metric}: {-loss}")
        return

    if tuning_step_max_time > 0:
        make_tuning_step_w_timeout = set_timeout(make_tuning_step, tuning_step_max_time)
    else:
        make_tuning_step_w_timeout = make_tuning_step

    # Tuning loop
    if mongo_address is not None:
        trials = MongoTrials(
            mongo_address, exp_key=f"{dataset.__name__}-{model.__name__}"
        )
    else:
        trials = Trials()
    start_time = time.time()
    rstate = np.random.RandomState(RANDOM_STATE)
    n_trials_wo_improvement = 0
    time_left = True
    while time_left and n_trials_wo_improvement < max_trials_wo_improvement:
        try:
            make_tuning_step_w_timeout(
                objective_fct, model.hp_space, trials, rstate, tuning_step_size
            )
        except TimeoutError:
            pass
        n_trials_wo_improvement = update_n_trials_wo_improvement(trials)
        time_left = (time.time() - start_time) < max_tuning_time
    tuning_time = time.time() - start_time

    process_tuning_result(trials, tuning_time, model, dataset)
예제 #28
0
def main(hyperparameter_search_args, tqdm=tqdm, fmin_kwargs=None):
    if fmin_kwargs is None: fmin_kwargs = {}

    search_dir = hyperparameter_search_args.search_dir
    hyperparameter_search_args.to_json_file(
        os.path.join(search_dir, HYPERPARAMETER_SEARCH_ARGS_FILENAME))

    hyperopt_space, constant_params = read_config(search_dir)

    rotation = hyperparameter_search_args.rotation
    base_dir = os.path.join(search_dir, str(rotation))
    already_existed = os.path.exists(base_dir) and len(
        os.listdir(base_dir)) > 1
    if not os.path.isdir(base_dir): os.makedirs(base_dir)

    objective = ObjectiveFntr(
        base_dir,
        rotation,
        constant_params,
        tqdm,
        single_task=hyperparameter_search_args.single_task_search,
        do_match_train_windows=hyperparameter_search_args.
        do_match_train_windows,
        do_eicu=hyperparameter_search_args.do_eicu,
    )

    algo = HP_ALGS[hyperparameter_search_args.algo]

    if hyperparameter_search_args.do_use_mongo:
        mongo_addr = '{base}/{db}/jobs'.format(
            base=hyperparameter_search_args.mongo_addr,
            db=hyperparameter_search_args.mongo_db)
        print("Parallelizing search via Mongo DB: %s" % mongo_addr)
        trials = MongoTrials(mongo_addr,
                             exp_key=hyperparameter_search_args.mongo_exp_key)
    elif already_existed:
        _, _, _, _, trials = read_or_recreate_trials(search_dir, tqdm=tqdm)
        trials = trials[str(rotation)]
    else:
        trials = Trials()

    best = fmin(objective,
                space=hyperopt_space,
                algo=algo,
                max_evals=hyperparameter_search_args.max_evals,
                trials=trials,
                **fmin_kwargs)

    return trials
예제 #29
0
def main():
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    # trials = Trials()
    trials = MongoTrials('mongo://localhost:27017/ts_hyperopt/jobs',
                         exp_key='exp1')
    best = fmin(fn_obj,
                hp.uniform('x', -2, 2),
                trials=trials,
                algo=tpe.suggest,
                max_evals=100)

    # for t in trials:
    #     print(t)

    print(best)
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
예제 #30
0
 def __init__(self,
              tuning_metric='mse',
              trials='trials',
              bottom_coding=None,
              transform=None,
              **kwargs):
     """Initialize hyperparameters."""
     super(LightGBM, self).__init__(bottom_coding=bottom_coding,
                                    transform=transform)
     self.model = LGBMRegressor
     self.tuning_metric = tuning_metric
     self.trials = Trials() \
         if trials == 'trials' \
         else MongoTrials('mongo://localhost:1234/foo_db/jobs',
                          exp_key='exp1')
     self.set_parameters()