def get_status(client, namespace): messages = client.monitor().messages(WORK_QUEUE, namespace, mtype=HPO_ITEM) hpo_state = None for m in messages: if m.mtype == HPO_ITEM: hpo_state = m.message assert hpo_state is not None args = hpo_state['hpo']['args'] kwargs = hpo_state['hpo']['kwargs'] hpo = HPOptimizer(*args, **hpo_state['hpo']['kwargs']) if hpo_state['hpo_state']: hpo.load_state_dict(hpo_state['hpo_state']) state = dict(completed=0, broken=0, pending=0) if hpo.is_done(): state['status'] = 'completed' else: state['status'] = 'pending' for uid, trial in hpo.trials.items(): if trial.objective: state['completed'] += 1 else: state['pending'] += 1 # TODO: We don't detect any broken trial so far. state['missing'] = hpo.hpo.count - len(hpo.trials) return state
def main(**kwargs): show_dict(kwargs) args = Namespace(**kwargs) set_verbose_level(args.verbose) device = fetch_device() experiment_name = args.experiment_name.format(**kwargs) # save partial results here state_storage = StateStorage( folder=option('state.storage', '/tmp/olympus/classification')) def main_task(): task = classification_baseline(device=device, storage=state_storage, **kwargs) if args.uri is not None: logger = metric_logger(args.uri, args.database, experiment_name) task.metrics.append(logger) return task space = main_task().get_space() # If space is not empty we search the best hyper parameters params = {} if space: show_dict(space) hpo = HPOptimizer('hyperband', space=space, fidelity=Fidelity(args.min_epochs, args.epochs).to_dict()) hpo_task = HPO(hpo, main_task) hpo_task.metrics.append(ElapsedRealTime()) trial = hpo_task.fit(objective='validation_accuracy') print(f'HPO is done, objective: {trial.objective}') params = trial.params else: print('No hyper parameter missing, running the experiment...') # ------ # Run the experiment with the best hyper parameters # ------------------------------------------------- if params is not None: # Train using train + valid for the final result final_task = classification_baseline(device=device, **kwargs, hpo_done=True) final_task.init(**params) final_task.fit(epochs=args.epochs) print('=' * 40) print('Final Trial Results') show_dict(flatten(params)) final_task.report(pprint=True, print_fun=print) print('=' * 40)
def build_robo(model_type, n_init=2, count=5): params = { 'x': 'uniform(-5, 10)', 'y': 'uniform(0, 15)' } return HPOptimizer('robo', fidelity=FIDELITY.to_dict(), space=params, model_type=model_type, count=count, n_init=n_init)
def get_hpo(client, namespace, partial=False): result_state = None if partial: result_state = get_hpo_work_state(client, namespace) if result_state is None: result_state = get_hpo_result_state(client, namespace) if result_state is None: raise RuntimeError(f'No HPO for namespace {namespace} or HPO is not completed') args = result_state['hpo']['args'] kwargs = result_state['hpo']['kwargs'] remote_call = result_state['work'] hpo = HPOptimizer(*args, **kwargs) if result_state['hpo_state']: hpo.load_state_dict(result_state['hpo_state']) return hpo, remote_call
def run_master_hpo(hpo_name, uri): """HPO is in the main process, works along side the workers""" params = { 'a': 'uniform(0, 1)', 'b': 'uniform(0, 1)', 'c': 'uniform(0, 1)', 'lr': 'uniform(0, 1)' } hpo = HPOptimizer(hpo_name, count=30, fidelity=FIDELITY, space=params) with HPOWorkGroup(uri, 'olympus', 'classification-master-1', clean=True, launch_server=True) as group: group.launch_workers(10) group.run_hpo(hpo, my_trial) group.wait() group.archive('data.zip') os.remove('data.zip')
def parallel_hpo(**kwargs): args = argparse.Namespace(**kwargs) # Arguments required for the HPO workers to synchronize parser = argparse.ArgumentParser() parser.add_argument('--rank', type=int, help='Worker rank, use to initialize the HPO') parser.add_argument('--uri', type=str, default='cockroach://192.168.0.1:8123', help='Resource URI pointing to the database') parser.add_argument('--experiment', type=str, default='classification', help='Database namespace to use for this experiment') parser.parse_args(namespace=args) params = { 'a': 'uniform(0, 1)', 'b': 'uniform(0, 1)', 'c': 'uniform(0, 1)', 'lr': 'uniform(0, 1)' } hpo = HPOptimizer('hyperband', fidelity=Fidelity(1, 30).to_dict(), space=params) # Wrap your HPO into Olympus ParallelHPO hpo = ParallelHPO( hpo, rank=args.rank, uri=args.uri, experiment=args.experiment) # Iterate over your configs distributed across workers for config in hpo: print('Worker: ', args.rank, config) validation_error = train(**config) hpo.observe(config, validation_error) # get the result of the HPO print(f'Worker {args.rank} is done') best_trial = hpo.result() if best_trial is not None: print(best_trial.params, best_trial.objective)
loader = DataLoader(splits, sampler_seed=1, batch_size=32) main_task = Classification( classifier=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=loader.train(), device=device, storage=StateStorage(folder=f'{base}/hpo_simple')) main_task.metrics.append( Accuracy(name='validation', loader=loader.valid(batch_size=64))) return main_task space = make_task().get_space() hp_optimizer = HPOptimizer('hyperband', fidelity=Fidelity(1, 30).to_dict(), space=space) hpo_task = HPO(hp_optimizer, make_task) result = hpo_task.fit(objective='validation_accuracy') print('Best Params:') print('-' * 40) print(f'validation_accuracy: {result.objective}') show_dict(result.params)