def test_fit_model(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective) fit_model(X, y, space, seed=1)
def main(**kwargs): show_dict(kwargs) args = Namespace(**kwargs) set_verbose_level(args.verbose) device = fetch_device() experiment_name = args.experiment_name.format(**kwargs) # save partial results here state_storage = StateStorage( folder=option('state.storage', '/tmp/olympus/classification')) def main_task(): task = classification_baseline(device=device, storage=state_storage, **kwargs) if args.uri is not None: logger = metric_logger(args.uri, args.database, experiment_name) task.metrics.append(logger) return task space = main_task().get_space() # If space is not empty we search the best hyper parameters params = {} if space: show_dict(space) hpo = HPOptimizer('hyperband', space=space, fidelity=Fidelity(args.min_epochs, args.epochs).to_dict()) hpo_task = HPO(hpo, main_task) hpo_task.metrics.append(ElapsedRealTime()) trial = hpo_task.fit(objective='validation_accuracy') print(f'HPO is done, objective: {trial.objective}') params = trial.params else: print('No hyper parameter missing, running the experiment...') # ------ # Run the experiment with the best hyper parameters # ------------------------------------------------- if params is not None: # Train using train + valid for the final result final_task = classification_baseline(device=device, **kwargs, hpo_done=True) final_task.init(**params) final_task.fit(epochs=args.epochs) print('=' * 40) print('Final Trial Results') show_dict(flatten(params)) final_task.report(pprint=True, print_fun=print) print('=' * 40)
def test_fetch_hpos_valid_results_first_time(client): config = copy.deepcopy(CONFIG) num_trials = 5 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE + '1', foo, config, {'e': 2}) register_hpo(client, NAMESPACE + '2', foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run() namespaces = {'hpo' + str(i): [NAMESPACE + str(i)] for i in range(1, 3)} data = defaultdict(dict) _ = fetch_hpos_valid_curves(client, namespaces, ['e'], data) assert len(data) == 2 assert len(data['hpo1']) == 1 assert len(data['hpo2']) == 1 namespace = f'{NAMESPACE}1' assert data['hpo1'][namespace].attrs['namespace'] == namespace assert data['hpo1'][namespace].epoch.values.tolist() == [0, 1] assert data['hpo1'][namespace].order.values.tolist() == list( range(num_trials)) assert data['hpo1'][namespace].seed.values.tolist() == [1] assert data['hpo1'][namespace].params.values.tolist() == list('abcd') assert data['hpo1'][namespace].noise.values.tolist() == ['e'] assert data['hpo1'][namespace].obj.shape == (2, num_trials, 1) assert data['hpo1'][namespace].valid.shape == (2, num_trials, 1) assert data['hpo1'][namespace] == data['hpo2'][f'{NAMESPACE}2']
def test_generate_biased_replicates_last_epoch(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) replicates = generate_biased_replicates( data, configs['random_search'][f'{NAMESPACE}-random-search-s-0'], variables, objective, num_replicates, hpo_budget, early_stopping=False) best_trial_index = 6 rng = numpy.random.RandomState( configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed']) for replicate in replicates: should_be = copy.deepcopy(defaults) for param in space.keys(): assert replicate[param] == float( data.sel(order=best_trial_index)[param].values) should_be[param] = replicate[param] for variable in variables: assert replicate[variable] == rng.randint(2**30) should_be[variable] = replicate[variable] assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
def test_plot(client): config = copy.deepcopy(CONFIG) num_trials = 10 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) plot(config['space'], 'obj', data, 'test.png')
def test_generate_simulated_fix(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) config = configs['random_search'][f'{NAMESPACE}-random-search-s-0'] # Make sure the defaults have been replaced by randomized seeds assert config['defaults']['d'] != defaults['d'] assert config['defaults']['e'] != defaults['e'] replicates = generate_simulated_fix(data, config, variables, objective, hpo_budget, num_replicates, early_stopping=False) assert len(replicates) == num_replicates for i in range(1, num_replicates): assert replicates[i]['a'] != replicates[0]['a'] assert replicates[i]['b'] != replicates[0]['b'] assert replicates[i]['c'] != replicates[0]['c'] assert replicates[i]['uid'] != replicates[0]['uid'] assert replicates[i]['d'] == config['defaults']['d'] assert replicates[i]['e'] == config['defaults']['e']
def test_save_load_results(client): config = copy.deepcopy(CONFIG) num_trials = 2 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) save_results(NAMESPACE, data, '.') assert load_results(NAMESPACE, '.')
def generate_mocked_replicates(num_replicates, num_experiments=5): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 3} defaults = {'d': 1, 'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) randomize_seeds(configs['random_search'], variables, seed) hpos_ready = dict(random_search=[]) data = dict(random_search=dict()) for hpo_namespace, config in configs['random_search'].items(): hpos_ready['random_search'].append(hpo_namespace) data['random_search'][hpo_namespace] = build_data( surrogate_budget, variables, defaults, space) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) return generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False)
def parallel_hpo(**kwargs): args = argparse.Namespace(**kwargs) # Arguments required for the HPO workers to synchronize parser = argparse.ArgumentParser() parser.add_argument('--rank', type=int, help='Worker rank, use to initialize the HPO') parser.add_argument('--uri', type=str, default='cockroach://192.168.0.1:8123', help='Resource URI pointing to the database') parser.add_argument('--experiment', type=str, default='classification', help='Database namespace to use for this experiment') parser.parse_args(namespace=args) params = { 'a': 'uniform(0, 1)', 'b': 'uniform(0, 1)', 'c': 'uniform(0, 1)', 'lr': 'uniform(0, 1)' } hpo = HPOptimizer('hyperband', fidelity=Fidelity(1, 30).to_dict(), space=params) # Wrap your HPO into Olympus ParallelHPO hpo = ParallelHPO( hpo, rank=args.rank, uri=args.uri, experiment=args.experiment) # Iterate over your configs distributed across workers for config in hpo: print('Worker: ', args.rank, config) validation_error = train(**config) hpo.observe(config, validation_error) # get the result of the HPO print(f'Worker {args.rank} is done') best_trial = hpo.result() if best_trial is not None: print(best_trial.params, best_trial.objective)
def test_fetch_hpo_valid_results_no_epochs(client): config = copy.deepcopy(CONFIG) num_trials = 5 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) assert data.attrs['namespace'] == NAMESPACE assert data.epoch.values.tolist() == [0, 1] assert data.order.values.tolist() == list(range(num_trials)) assert data.seed.values.tolist() == [1] assert data.params.values.tolist() == list('abcd') assert data.noise.values.tolist() == ['e'] assert data.obj.shape == (2, num_trials, 1) assert data.valid.shape == (2, num_trials, 1)
def test_simulate_hpo(): # fit a model # simulate # test what? ... space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective) model = fit_model(X, y, space, seed=1) sample = simulate_hpo( model, space, hpo_budget, configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed']) assert sample.keys() == space.keys()
def test_reset_pool_size(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)', 'd': 'uniform(lower=-1, upper=1)' } defaults = {'d': 1, 'e': 2} num_experiments = 5 hpo = 'random_search' fidelity = Fidelity(1, 1, name='epoch').to_dict() configs = generate_hpos(list(range(num_experiments)), [hpo], budget=200, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) reset_pool_size(configs['random_search']) for config in configs['random_search'].values(): assert config['pool_size'] is None
def test_randomize_seeds(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = ['d', 'e'] defaults = {} seed = 2 num_experiments = 5 hpo = 'random_search' fidelity = Fidelity(1, 1, name='epoch').to_dict() configs = generate_hpos(list(range(num_experiments)), [hpo], budget=200, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed, compute_id=True) rng = numpy.random.RandomState(seed) for config in configs['random_search'].values(): for variable in variables: assert config['defaults'][variable] == rng.randint(2**30) uid = config['defaults'].pop('uid') assert uid == compute_identity(config['defaults'], IDENTITY_SIZE) randomize_seeds(configs['random_search'], variables, seed, compute_id=False) rng = numpy.random.RandomState(seed) for config in configs['random_search'].values(): for variable in variables: assert config['defaults'][variable] == rng.randint(2**30) assert 'uid' not in config['defaults']
def test_register_hpos(client): namespace = 'test-hpo' hpos = [ 'grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search', 'bayesopt' ] # , 'hyperband', 'bayesopt'] num_experiments = 10 budget = 200 fidelity = Fidelity(1, 10, name='d').to_dict() num_experiments = 2 search_space = { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'd': 'uniform(-1, 1)' } defaults = {'e': 2} configs = generate_hpos(range(num_experiments), hpos, budget, fidelity, search_space, NAMESPACE, defaults) stats = {} assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=HPO_ITEM) == 0 new_namespaces = register_hpos(client, namespace, foo, configs, defaults, stats) assert len(set(new_namespaces)) == len(configs) for hpo, hpo_namespaces in new_namespaces.items(): for i, hpo_namespace in enumerate(hpo_namespaces): messages = client.monitor().messages(WORK_QUEUE, hpo_namespace, mtype=HPO_ITEM) assert len(messages) == 1 assert messages[0].message['hpo']['kwargs'] == configs[hpo][ hpo_namespace] assert messages[0].message['work']['kwargs'] == defaults
def test_convert_xarray_to_scipy_results(client): config = copy.deepcopy(CONFIG) num_trials = 10 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) scipy_results = xarray_to_scipy_results(config['space'], 'obj', data) min_idx = numpy.argmin(data.obj.values[1, :, 0]) assert scipy_results.x[0] == data.a.values[min_idx, 0] assert scipy_results.x[1] == data.b.values[min_idx, 0] assert scipy_results.x[2] == data.c.values[min_idx, 0] assert scipy_results.x[3] == numpy.log(data.d.values[min_idx, 0]) assert scipy_results.fun == data.obj.values[1, min_idx, 0] assert len(scipy_results.x_iters) == num_trials
def test_convert_data_to_xy(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective, early_stopping=False) assert numpy.array_equal(X[:, 0], data['a'].values.reshape(-1)) assert numpy.array_equal(X[:, 1], data['b'].values.reshape(-1)) assert numpy.array_equal(X[:, 2], numpy.log(data['c'].values.reshape(-1))) assert numpy.array_equal(y, data[objective].isel(epoch=-1).values.reshape(-1)) assert y.shape == (surrogate_budget, )
def test_register_hpo_is_actionable(client): """Test that the registered HPO have valid workitems and can be executed.""" namespace = 'test-hpo' config = { 'name': 'random_search', 'seed': 1, 'count': 1, 'fidelity': Fidelity(1, 10, name='d').to_dict(), 'space': { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'd': 'uniform(-1, 1)' } } defaults = {} register_hpo(client, namespace, foo, config, defaults) worker = TrialWorker(URI, DATABASE, 0, namespace) worker.max_retry = 0 worker.run() assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=WORK_ITEM) == 1 assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=HPO_ITEM) == 2 messages = client.monitor().unread_messages(RESULT_QUEUE, namespace, mtype=HPO_ITEM) compressed_state = messages[0].message.get('hpo_state') assert compressed_state is not None state = decompress_dict(compressed_state) assert len(state['trials']) == 1 assert state['trials'][0][1]['objectives'] == [10.715799430116764]
def test_check_diversified_search(): space = { 'a': 'uniform(0, 1)', 'b': 'uniform(0, 1)', } def add(uid, epoch, a, b): return a + b hpo = DiversifiedSearch(Fidelity(0, 30, name='epoch'), space) print(hpo.budget) while not hpo.is_done(): for args in hpo: epoch = args['epoch'] for e in range(epoch): r = add(**args) args['epoch'] = e + 1 hpo.observe(args, r) for p in hpo.result(): print(p)
def test_fetch_hpos_valid_results_update(client): config = copy.deepcopy(CONFIG) num_trials = 5 config['count'] = num_trials config['fidelity'] = Fidelity(0, 0, name='epoch').to_dict() namespaces = { f'hpo-{i}': [f'{NAMESPACE}-{i}-{j}' for j in range(1, 3)] for i in range(1, 3) } def run_hpos(namespaces): for namespace in namespaces: register_hpo(client, namespace, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run() run_hpos([namespaces['hpo-1'][0]]) data = defaultdict(dict) hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces, ['e'], data) assert len(remainings) == 2 assert len(remainings['hpo-1']) == 1 assert len(remainings['hpo-2']) == 2 assert len(hpos_ready) == 1 assert len(hpos_ready['hpo-1']) == 1 assert hpos_ready['hpo-1'][0] == f'{NAMESPACE}-1-1' assert len(data) == 1 assert len(data['hpo-1']) == 1 assert data['hpo-1'][f'{NAMESPACE}-1-1'].attrs[ 'namespace'] == f'{NAMESPACE}-1-1' run_hpos([namespaces['hpo-1'][1], namespaces['hpo-2'][0]]) hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, ['e'], data) assert len(remainings) == 1 assert len(remainings['hpo-2']) == 1 assert len(hpos_ready) == 2 assert len(hpos_ready['hpo-1']) == 1 assert hpos_ready['hpo-1'][0] == f'{NAMESPACE}-1-2' assert len(hpos_ready['hpo-2']) == 1 assert hpos_ready['hpo-2'][0] == f'{NAMESPACE}-2-1' assert len(data) == 2 assert len(data['hpo-1']) == 2 assert len(data['hpo-2']) == 1 assert data['hpo-1'][f'{NAMESPACE}-1-2'].attrs[ 'namespace'] == f'{NAMESPACE}-1-2' assert data['hpo-2'][f'{NAMESPACE}-2-1'].attrs[ 'namespace'] == f'{NAMESPACE}-2-1' run_hpos([namespaces['hpo-2'][1]]) hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, ['e'], data) assert len(remainings) == 0 assert len(hpos_ready) == 1 assert len(hpos_ready['hpo-2']) == 1 assert hpos_ready['hpo-2'][0] == f'{NAMESPACE}-2-2' assert len(data) == 2 assert len(data['hpo-1']) == 2 assert len(data['hpo-2']) == 2 assert data['hpo-2'][f'{NAMESPACE}-2-2'].attrs[ 'namespace'] == f'{NAMESPACE}-2-2'
def run(uri, database, namespace, function, num_experiments, budget, fidelity, space, objective, variables, defaults, sleep_time=60, do_full_train=False, save_dir='.', partial=False, register=True): # TODO: Add hyperband hpos = ['grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search', 'bayesopt'] if fidelity is None: fidelity = Fidelity(1, 1, name='epoch').to_dict() # TODO: Add back when hyperband is implemented # if fidelity['min'] == fidelity['max']: # hpos.remove(hpos.index('hyperband')) if num_experiments is None: num_experiments = 2 client = new_client(uri, database) hpo_stats = fetch_all_hpo_stats(client, namespace) configs = generate_hpos( list(range(num_experiments)), hpos, budget, fidelity, space, namespace, defaults) variable_names = list(sorted(variables.keys())) if partial: namespaces = defaultdict(list) for hpo, hpo_configs in configs.items(): for hpo_namespace, config in hpo_configs.items(): namespaces[hpo].append(hpo_namespace) data = defaultdict(dict) fetch_hpos_valid_curves(client, namespaces, variable_names, data, partial=True) data = consolidate_results(data) save_results(namespace, data, save_dir) return namespaces = register_hpos( client, namespace, function, configs, dict(list(variables.items()) + list(defaults.items())), hpo_stats, register) remainings = namespaces print_status(client, namespace, namespaces) data = defaultdict(dict) while sum(remainings.values(), []): hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, variable_names, data) # TODO: Implement full-train part if do_full_train: configs = generate_tests(data, defaults, registered) new_registered_tests = register_tests(client, namespace, function, configs) if not sum(hpos_ready.values(), []): print_status(client, namespace, namespaces) time.sleep(sleep_time) # Save valid results data = consolidate_results(data) save_results(namespace, data, save_dir) if not do_full_train: return # TODO: Implement full-train part wait(completed) # take the sum of all hpo_namespaces # NOTE & TODO: This should follow the same format as valid results, but we need to # make sure the mapping in order of trials is the same. data = fetch_results(client, namespace, namespaces) # Save test results save_results(namespace, data, save_dir)
from olympus.observers.msgtracker import METRIC_QUEUE, METRIC_ITEM from olympus.hpo.parallel import (RESULT_QUEUE, WORK_QUEUE) from olympus.hpo.worker import TrialWorker from olympus.hpo import Fidelity from studies import register_hpo, fetch_hpo_valid_curves from studies import xarray_to_scipy_results, plot URI = 'mongo://127.0.0.1:27017' DATABASE = 'olympus' NAMESPACE = 'test-hpo' CONFIG = { 'name': 'random_search', 'seed': 0, 'count': 1, 'fidelity': Fidelity(1, 10, name='epoch').to_dict(), 'space': { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)', 'd': 'loguniform(lower=1, upper=10)', } } DEFAULTS = {} def foo(uid, a, b, c, d, e=1, epoch=0, experiment_name=NAMESPACE, client=None): result = a + 2 * b - c**2 + d + e for i in range(epoch + 1): data = {'obj': i + result, 'valid': i + result, 'uid': uid, 'epoch': i} client.push(METRIC_QUEUE, experiment_name, data, mtype=METRIC_ITEM)
def run(uri, database, namespace, function, num_experiments, num_simuls, fidelity, space, objective, variables, defaults, num_replicates=None, sleep_time=60, do_full_train=False, save_dir='.', seed=1, register=True, rep_types=REP_TYPES): hpo_budget = 100 surrogate_budget = 200 if num_replicates is None: num_replicates = num_experiments # We use 200 trials to fit the surrogate models (surrogate_budget is 200) # but we only need 100 for the ideal (hpo_budget is 100) # therefore, since num_simuls is at least half smaller than number of # replicates, we can run only (num_replicates / 2) hpo runs and use # first half and second 100 half as 2 separe ideal runs. # This is possible since we are using random search. assert (num_experiments % 2) == 0 assert num_simuls <= (num_experiments / 2) num_ideal = num_experiments // 2 hpo = 'random_search' # TODO # for each repetition, vary all sources of variations # when one hpo is done, create all biased and simulations if fidelity is None: fidelity = Fidelity(1, 1, name='epoch').to_dict() client = new_client(uri, database) configs = generate_hpos(list(range(num_ideal)), [hpo], surrogate_budget, fidelity, space, namespace, defaults) to_replicate = get_configs_to_replicate(configs, num_simuls) reset_pool_size(configs['random_search']) randomize_seeds(configs['random_search'], variables, seed) variable_names = list(sorted(variables.keys())) hpo_stats = fetch_all_hpo_stats(client, namespace) namespaces = register_hpos(client, namespace, function, configs, defaults, hpo_stats, register=register) remainings = namespaces data_hpo = defaultdict(dict) all_replicates = dict(random_search=dict()) while sum(remainings.values(), []): print_status(client, namespace, namespaces) hpos_ready, remainings = fetch_hpos_valid_curves( client, remainings, variable_names, data_hpo) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) replicates = generate_replicates(ready_configs, data_hpo, variables, objective, hpo_budget, num_replicates, early_stopping=False, rep_types=rep_types) if register: registered_replicates = register_all_replicates( client, function, namespace, replicates) if replicates.get('random_search'): all_replicates['random_search'].update(replicates['random_search']) if sum(remainings.values(), []) and not registered_replicates: time.sleep(sleep_time) wait(client, namespace, sleep=sleep_time) data_replicates = fetch_hpos_replicates(client, configs, all_replicates, variable_names, space, rep_types) # Save valid results data = consolidate_results(data_hpo, data_replicates, rep_types) save_results(namespace, data, save_dir)
loader = DataLoader(splits, sampler_seed=1, batch_size=32) main_task = Classification( classifier=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=loader.train(), device=device, storage=StateStorage(folder=f'{base}/hpo_simple')) main_task.metrics.append( Accuracy(name='validation', loader=loader.valid(batch_size=64))) return main_task space = make_task().get_space() hp_optimizer = HPOptimizer('hyperband', fidelity=Fidelity(1, 30).to_dict(), space=space) hpo_task = HPO(hp_optimizer, make_task) result = hpo_task.fit(objective='validation_accuracy') print('Best Params:') print('-' * 40) print(f'validation_accuracy: {result.objective}') show_dict(result.params)
def test_consolidate_results(client): num_experiments = 5 num_replicates = 10 space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 5} defaults = {'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) reset_pool_size(configs['random_search']) randomize_seeds(configs['random_search'], variables, seed) variable_names = list(sorted(variables.keys())) hpo_stats = fetch_all_hpo_stats(client, NAMESPACE) namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults, hpo_stats) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() data = defaultdict(dict) hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces, variable_names, data) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) replicates = generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False) register(client, foo, NAMESPACE, replicates) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() print(fetch_vars_stats(client, NAMESPACE)) data = fetch_hpos_replicates(client, configs, replicates, variable_names, space, data) data = consolidate_results(data) assert len(data) == 1 assert len(data['random_search']) == 4 hpo_reps = data['random_search'] assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget, num_experiments) assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-free'].obj.shape == (6, num_replicates, num_experiments) def count_unique(attr): return len(set(attr.values.reshape(-1).tolist())) # Test sources of variation # NOTE: In ideal, source of variation will vary across ideal after consolidation # but it stays fixed during the HPO itself assert count_unique(hpo_reps['ideal']['d']) == num_experiments assert count_unique(hpo_reps['biased']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments # Test HPs assert count_unique(hpo_reps['ideal']['a']) == (num_experiments * surrogate_budget) assert count_unique(hpo_reps['biased']['a']) == num_experiments assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates * num_experiments) assert numpy.allclose(hpo_reps['simul-free']['a'].values, hpo_reps['simul-fix']['a'].values)
def test_register_hpos_resume(client, monkeypatch): namespace = 'test-hpo' hpos = [ 'grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search', 'bayesopt' ] # , 'hyperband', 'bayesopt'] num_experiments = 10 budget = 200 fidelity = Fidelity(1, 10, name='d').to_dict() num_experiments = 2 search_space = { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'd': 'uniform(-1, 1)' } defaults = {} stats = {} configs = generate_hpos(range(num_experiments), hpos, budget, fidelity, search_space, NAMESPACE, defaults) assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=HPO_ITEM) == 0 new_namespaces = register_hpos(client, namespace, foo, configs, defaults, stats) assert len(set(new_namespaces)) == len(configs) print(new_namespaces) stats = {namespace: {} for namespace in sum(new_namespaces.values(), [])} more_configs = generate_hpos(range(num_experiments + 2), hpos, budget, fidelity, search_space, NAMESPACE, defaults) # Save new namespaces for test new_namespaces = defaultdict(list) def mock_register_hpo(client, namespace, function, config, defaults): new_namespaces[config['name']].append(namespace) return register_hpo(client, namespace, function, config, defaults) def flatten_configs(confs): return sum((list(configs.keys()) for configs in confs.values()), []) monkeypatch.setattr('olympus.studies.hpo.main.register_hpo', mock_register_hpo) namespaces = register_hpos(client, namespace, foo, more_configs, defaults, stats) assert (len(set(sum(new_namespaces.values(), []))) == len(flatten_configs(more_configs)) - len(flatten_configs(configs))) # Verify new registered configs for hpo, configs in more_configs.items(): for hpo_namespace, config in configs.items(): messages = client.monitor().messages(WORK_QUEUE, hpo_namespace, mtype=HPO_ITEM) assert len(messages) == 1 assert messages[0].message['hpo']['kwargs'] == config