def test_fit_model(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective) fit_model(X, y, space, seed=1)
def test_generate_hpos(): hpos = [ 'grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search', 'bayesopt' ] # , 'hyperband', 'bayesopt'] num_experiments = 10 budget = 200 fidelity = 'fidelity(1, 10)' num_experiments = 10 search_space = { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'lr': 'uniform(-1, 1)' } defaults = {'d': 1, 'e': 2} configs = generate_hpos(range(num_experiments), hpos, budget, fidelity, search_space, NAMESPACE, defaults) assert len(configs) == len(hpos) assert len(configs['grid_search']) == 1 assert len(configs['nudged_grid_search']) == 1 assert configs['grid_search'][f'{NAMESPACE}-grid-search-p-4'][ 'name'] == 'grid_search' assert configs['grid_search'][f'{NAMESPACE}-grid-search-p-4'].get( 'nudge') is None assert (configs['nudged_grid_search'] [f'{NAMESPACE}-grid-search-nudged-p-4']['name'] == 'grid_search') assert configs['nudged_grid_search'][ f'{NAMESPACE}-grid-search-nudged-p-4']['nudge'] == 0.5 for hpo in hpos[2:]: for i in range(num_experiments): if hpo == 'noisy_grid_search': namespace = '{}-noisy-grid-search-p-4-s-{}'.format( NAMESPACE, i) else: namespace = '{}-{}-s-{}'.format(NAMESPACE, hpo.replace('_', '-'), i) if hpo == 'bayesopt': rng = numpy.random.RandomState(i) assert configs[hpo][namespace]['name'] == 'robo' assert configs[hpo][namespace]['model_seed'] == rng.randint( 2**30) assert configs[hpo][namespace]['prior_seed'] == rng.randint( 2**30) assert configs[hpo][namespace]['init_seed'] == rng.randint( 2**30) assert configs[hpo][namespace][ 'maximizer_seed'] == rng.randint(2**30) else: assert configs[hpo][namespace]['name'] == hpo assert configs[hpo][namespace]['seed'] == i
def test_generate_biased_replicates_last_epoch(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) replicates = generate_biased_replicates( data, configs['random_search'][f'{NAMESPACE}-random-search-s-0'], variables, objective, num_replicates, hpo_budget, early_stopping=False) best_trial_index = 6 rng = numpy.random.RandomState( configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed']) for replicate in replicates: should_be = copy.deepcopy(defaults) for param in space.keys(): assert replicate[param] == float( data.sel(order=best_trial_index)[param].values) should_be[param] = replicate[param] for variable in variables: assert replicate[variable] == rng.randint(2**30) should_be[variable] = replicate[variable] assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
def test_generate_simulated_fix(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) config = configs['random_search'][f'{NAMESPACE}-random-search-s-0'] # Make sure the defaults have been replaced by randomized seeds assert config['defaults']['d'] != defaults['d'] assert config['defaults']['e'] != defaults['e'] replicates = generate_simulated_fix(data, config, variables, objective, hpo_budget, num_replicates, early_stopping=False) assert len(replicates) == num_replicates for i in range(1, num_replicates): assert replicates[i]['a'] != replicates[0]['a'] assert replicates[i]['b'] != replicates[0]['b'] assert replicates[i]['c'] != replicates[0]['c'] assert replicates[i]['uid'] != replicates[0]['uid'] assert replicates[i]['d'] == config['defaults']['d'] assert replicates[i]['e'] == config['defaults']['e']
def generate_mocked_replicates(num_replicates, num_experiments=5): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 3} defaults = {'d': 1, 'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) randomize_seeds(configs['random_search'], variables, seed) hpos_ready = dict(random_search=[]) data = dict(random_search=dict()) for hpo_namespace, config in configs['random_search'].items(): hpos_ready['random_search'].append(hpo_namespace) data['random_search'][hpo_namespace] = build_data( surrogate_budget, variables, defaults, space) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) return generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False)
def test_simulate_hpo(): # fit a model # simulate # test what? ... space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective) model = fit_model(X, y, space, seed=1) sample = simulate_hpo( model, space, hpo_budget, configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed']) assert sample.keys() == space.keys()
def test_reset_pool_size(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)', 'd': 'uniform(lower=-1, upper=1)' } defaults = {'d': 1, 'e': 2} num_experiments = 5 hpo = 'random_search' fidelity = Fidelity(1, 1, name='epoch').to_dict() configs = generate_hpos(list(range(num_experiments)), [hpo], budget=200, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) reset_pool_size(configs['random_search']) for config in configs['random_search'].values(): assert config['pool_size'] is None
def test_randomize_seeds(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = ['d', 'e'] defaults = {} seed = 2 num_experiments = 5 hpo = 'random_search' fidelity = Fidelity(1, 1, name='epoch').to_dict() configs = generate_hpos(list(range(num_experiments)), [hpo], budget=200, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed, compute_id=True) rng = numpy.random.RandomState(seed) for config in configs['random_search'].values(): for variable in variables: assert config['defaults'][variable] == rng.randint(2**30) uid = config['defaults'].pop('uid') assert uid == compute_identity(config['defaults'], IDENTITY_SIZE) randomize_seeds(configs['random_search'], variables, seed, compute_id=False) rng = numpy.random.RandomState(seed) for config in configs['random_search'].values(): for variable in variables: assert config['defaults'][variable] == rng.randint(2**30) assert 'uid' not in config['defaults']
def test_register_hpos(client): namespace = 'test-hpo' hpos = [ 'grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search', 'bayesopt' ] # , 'hyperband', 'bayesopt'] num_experiments = 10 budget = 200 fidelity = Fidelity(1, 10, name='d').to_dict() num_experiments = 2 search_space = { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'd': 'uniform(-1, 1)' } defaults = {'e': 2} configs = generate_hpos(range(num_experiments), hpos, budget, fidelity, search_space, NAMESPACE, defaults) stats = {} assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=HPO_ITEM) == 0 new_namespaces = register_hpos(client, namespace, foo, configs, defaults, stats) assert len(set(new_namespaces)) == len(configs) for hpo, hpo_namespaces in new_namespaces.items(): for i, hpo_namespace in enumerate(hpo_namespaces): messages = client.monitor().messages(WORK_QUEUE, hpo_namespace, mtype=HPO_ITEM) assert len(messages) == 1 assert messages[0].message['hpo']['kwargs'] == configs[hpo][ hpo_namespace] assert messages[0].message['work']['kwargs'] == defaults
def test_convert_data_to_xy(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective, early_stopping=False) assert numpy.array_equal(X[:, 0], data['a'].values.reshape(-1)) assert numpy.array_equal(X[:, 1], data['b'].values.reshape(-1)) assert numpy.array_equal(X[:, 2], numpy.log(data['c'].values.reshape(-1))) assert numpy.array_equal(y, data[objective].isel(epoch=-1).values.reshape(-1)) assert y.shape == (surrogate_budget, )
def test_consolidate_results(client): num_experiments = 5 num_replicates = 10 space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 5} defaults = {'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) reset_pool_size(configs['random_search']) randomize_seeds(configs['random_search'], variables, seed) variable_names = list(sorted(variables.keys())) hpo_stats = fetch_all_hpo_stats(client, NAMESPACE) namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults, hpo_stats) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() data = defaultdict(dict) hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces, variable_names, data) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) replicates = generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False) register(client, foo, NAMESPACE, replicates) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() print(fetch_vars_stats(client, NAMESPACE)) data = fetch_hpos_replicates(client, configs, replicates, variable_names, space, data) data = consolidate_results(data) assert len(data) == 1 assert len(data['random_search']) == 4 hpo_reps = data['random_search'] assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget, num_experiments) assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-free'].obj.shape == (6, num_replicates, num_experiments) def count_unique(attr): return len(set(attr.values.reshape(-1).tolist())) # Test sources of variation # NOTE: In ideal, source of variation will vary across ideal after consolidation # but it stays fixed during the HPO itself assert count_unique(hpo_reps['ideal']['d']) == num_experiments assert count_unique(hpo_reps['biased']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments # Test HPs assert count_unique(hpo_reps['ideal']['a']) == (num_experiments * surrogate_budget) assert count_unique(hpo_reps['biased']['a']) == num_experiments assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates * num_experiments) assert numpy.allclose(hpo_reps['simul-free']['a'].values, hpo_reps['simul-fix']['a'].values)
def run(uri, database, namespace, function, num_experiments, num_simuls, fidelity, space, objective, variables, defaults, num_replicates=None, sleep_time=60, do_full_train=False, save_dir='.', seed=1, register=True, rep_types=REP_TYPES): hpo_budget = 100 surrogate_budget = 200 if num_replicates is None: num_replicates = num_experiments # We use 200 trials to fit the surrogate models (surrogate_budget is 200) # but we only need 100 for the ideal (hpo_budget is 100) # therefore, since num_simuls is at least half smaller than number of # replicates, we can run only (num_replicates / 2) hpo runs and use # first half and second 100 half as 2 separe ideal runs. # This is possible since we are using random search. assert (num_experiments % 2) == 0 assert num_simuls <= (num_experiments / 2) num_ideal = num_experiments // 2 hpo = 'random_search' # TODO # for each repetition, vary all sources of variations # when one hpo is done, create all biased and simulations if fidelity is None: fidelity = Fidelity(1, 1, name='epoch').to_dict() client = new_client(uri, database) configs = generate_hpos(list(range(num_ideal)), [hpo], surrogate_budget, fidelity, space, namespace, defaults) to_replicate = get_configs_to_replicate(configs, num_simuls) reset_pool_size(configs['random_search']) randomize_seeds(configs['random_search'], variables, seed) variable_names = list(sorted(variables.keys())) hpo_stats = fetch_all_hpo_stats(client, namespace) namespaces = register_hpos(client, namespace, function, configs, defaults, hpo_stats, register=register) remainings = namespaces data_hpo = defaultdict(dict) all_replicates = dict(random_search=dict()) while sum(remainings.values(), []): print_status(client, namespace, namespaces) hpos_ready, remainings = fetch_hpos_valid_curves( client, remainings, variable_names, data_hpo) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) replicates = generate_replicates(ready_configs, data_hpo, variables, objective, hpo_budget, num_replicates, early_stopping=False, rep_types=rep_types) if register: registered_replicates = register_all_replicates( client, function, namespace, replicates) if replicates.get('random_search'): all_replicates['random_search'].update(replicates['random_search']) if sum(remainings.values(), []) and not registered_replicates: time.sleep(sleep_time) wait(client, namespace, sleep=sleep_time) data_replicates = fetch_hpos_replicates(client, configs, all_replicates, variable_names, space, rep_types) # Save valid results data = consolidate_results(data_hpo, data_replicates, rep_types) save_results(namespace, data, save_dir)
def test_register_hpos_resume(client, monkeypatch): namespace = 'test-hpo' hpos = [ 'grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search', 'bayesopt' ] # , 'hyperband', 'bayesopt'] num_experiments = 10 budget = 200 fidelity = Fidelity(1, 10, name='d').to_dict() num_experiments = 2 search_space = { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'd': 'uniform(-1, 1)' } defaults = {} stats = {} configs = generate_hpos(range(num_experiments), hpos, budget, fidelity, search_space, NAMESPACE, defaults) assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=HPO_ITEM) == 0 new_namespaces = register_hpos(client, namespace, foo, configs, defaults, stats) assert len(set(new_namespaces)) == len(configs) print(new_namespaces) stats = {namespace: {} for namespace in sum(new_namespaces.values(), [])} more_configs = generate_hpos(range(num_experiments + 2), hpos, budget, fidelity, search_space, NAMESPACE, defaults) # Save new namespaces for test new_namespaces = defaultdict(list) def mock_register_hpo(client, namespace, function, config, defaults): new_namespaces[config['name']].append(namespace) return register_hpo(client, namespace, function, config, defaults) def flatten_configs(confs): return sum((list(configs.keys()) for configs in confs.values()), []) monkeypatch.setattr('olympus.studies.hpo.main.register_hpo', mock_register_hpo) namespaces = register_hpos(client, namespace, foo, more_configs, defaults, stats) assert (len(set(sum(new_namespaces.values(), []))) == len(flatten_configs(more_configs)) - len(flatten_configs(configs))) # Verify new registered configs for hpo, configs in more_configs.items(): for hpo_namespace, config in configs.items(): messages = client.monitor().messages(WORK_QUEUE, hpo_namespace, mtype=HPO_ITEM) assert len(messages) == 1 assert messages[0].message['hpo']['kwargs'] == config