def test_get_hpo_completed(client): register_hpo(client, NAMESPACE, foo, CONFIG, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() hpo, remote_call = get_hpo(client, NAMESPACE) assert len(hpo.trials) == 1 state_dict = hpo.state_dict(compressed=False) assert state_dict['seed'] == CONFIG['seed'] assert state_dict['fidelity'] == CONFIG['fidelity'] state_dict['space'].pop('uid') assert state_dict['space'] == CONFIG['space'] # Verify default was passed properly assert remote_call['kwargs']['e'] == 2 remote_call['kwargs'].update(dict(a=1, b=1, c=1, d=1, uid=0, client=client)) # Verify that the remote_call is indeed callable. a = 1 b = 1 c = 1 d = 1 e = 2 assert exec_remote_call(remote_call) == a + 2 * b - c**2 + d + e
def test_is_hpo_completed(client): assert not is_hpo_completed(client, NAMESPACE) register_hpo(client, NAMESPACE, foo, CONFIG, DEFAULTS) assert not is_hpo_completed(client, NAMESPACE) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() assert is_hpo_completed(client, NAMESPACE)
def test_plot(client): config = copy.deepcopy(CONFIG) num_trials = 10 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) plot(config['space'], 'obj', data, 'test.png')
def test_save_load_results(client): config = copy.deepcopy(CONFIG) num_trials = 2 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) save_results(NAMESPACE, data, '.') assert load_results(NAMESPACE, '.')
def test_fetch_hpo_valid_results_no_epochs(client): config = copy.deepcopy(CONFIG) num_trials = 5 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) assert data.attrs['namespace'] == NAMESPACE assert data.epoch.values.tolist() == [0, 1] assert data.order.values.tolist() == list(range(num_trials)) assert data.seed.values.tolist() == [1] assert data.params.values.tolist() == list('abcd') assert data.noise.values.tolist() == ['e'] assert data.obj.shape == (2, num_trials, 1) assert data.valid.shape == (2, num_trials, 1)
def test_fetch_hpos_valid_results_first_time(client): config = copy.deepcopy(CONFIG) num_trials = 5 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE + '1', foo, config, {'e': 2}) register_hpo(client, NAMESPACE + '2', foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run() namespaces = {'hpo' + str(i): [NAMESPACE + str(i)] for i in range(1, 3)} data = defaultdict(dict) _ = fetch_hpos_valid_curves(client, namespaces, ['e'], data) assert len(data) == 2 assert len(data['hpo1']) == 1 assert len(data['hpo2']) == 1 namespace = f'{NAMESPACE}1' assert data['hpo1'][namespace].attrs['namespace'] == namespace assert data['hpo1'][namespace].epoch.values.tolist() == [0, 1] assert data['hpo1'][namespace].order.values.tolist() == list( range(num_trials)) assert data['hpo1'][namespace].seed.values.tolist() == [1] assert data['hpo1'][namespace].params.values.tolist() == list('abcd') assert data['hpo1'][namespace].noise.values.tolist() == ['e'] assert data['hpo1'][namespace].obj.shape == (2, num_trials, 1) assert data['hpo1'][namespace].valid.shape == (2, num_trials, 1) assert data['hpo1'][namespace] == data['hpo2'][f'{NAMESPACE}2']
def run_hpos(namespaces): for namespace in namespaces: register_hpo(client, namespace, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run()
def test_convert_xarray_to_scipy_results(client): config = copy.deepcopy(CONFIG) num_trials = 10 config['count'] = num_trials config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict() register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) scipy_results = xarray_to_scipy_results(config['space'], 'obj', data) min_idx = numpy.argmin(data.obj.values[1, :, 0]) assert scipy_results.x[0] == data.a.values[min_idx, 0] assert scipy_results.x[1] == data.b.values[min_idx, 0] assert scipy_results.x[2] == data.c.values[min_idx, 0] assert scipy_results.x[3] == numpy.log(data.d.values[min_idx, 0]) assert scipy_results.fun == data.obj.values[1, min_idx, 0] assert len(scipy_results.x_iters) == num_trials
def test_fetch_hpo_valid_results(client): config = copy.deepcopy(CONFIG) num_trials = 5 config['count'] = num_trials register_hpo(client, NAMESPACE, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, NAMESPACE) worker.max_retry = 0 worker.run() data = fetch_hpo_valid_curves(client, NAMESPACE, ['e']) assert data.attrs['namespace'] == NAMESPACE assert data.epoch.values.tolist() == list( range(config['fidelity']['max'] + 1)) assert data.order.values.tolist() == list(range(num_trials)) assert data.seed.values.tolist() == [1] assert data.params.values.tolist() == list('abcd') assert data.noise.values.tolist() == ['e'] assert data.obj.shape == (config['fidelity']['max'] + 1, num_trials, 1) assert numpy.all( (data.obj.loc[dict(epoch=10)] - data.obj.loc[dict(epoch=0)]) == (numpy.ones((num_trials, 1)) * 10))
def test_register_hpo_is_actionable(client): """Test that the registered HPO have valid workitems and can be executed.""" namespace = 'test-hpo' config = { 'name': 'random_search', 'seed': 1, 'count': 1, 'fidelity': Fidelity(1, 10, name='d').to_dict(), 'space': { 'a': 'uniform(-1, 1)', 'b': 'uniform(-1, 1)', 'c': 'uniform(-1, 1)', 'd': 'uniform(-1, 1)' } } defaults = {} register_hpo(client, namespace, foo, config, defaults) worker = TrialWorker(URI, DATABASE, 0, namespace) worker.max_retry = 0 worker.run() assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=WORK_ITEM) == 1 assert client.monitor().read_count(WORK_QUEUE, namespace, mtype=HPO_ITEM) == 2 messages = client.monitor().unread_messages(RESULT_QUEUE, namespace, mtype=HPO_ITEM) compressed_state = messages[0].message.get('hpo_state') assert compressed_state is not None state = decompress_dict(compressed_state) assert len(state['trials']) == 1 assert state['trials'][0][1]['objectives'] == [10.715799430116764]
def run_hpos(namespaces): for i, namespace in enumerate(namespaces): config['seed'] = i register_hpo(client, namespace, foo, config, {'e': 2}) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run()
def test_hpo_serializable(model_type): namespace = 'test-robo-' + model_type n_init = 2 count = 10 # First run using a remote worker where serialization is necessary # and for which hpo is resumed between each braning call hpo = build_robo(model_type, n_init=n_init, count=count) namespace = 'test_hpo_serializable' hpo = { 'hpo': make_remote_call(HPOptimizer, **hpo.kwargs), 'hpo_state': None, 'work': make_remote_call(branin), 'experiment': namespace } client = new_client(URI, DATABASE) client.push(WORK_QUEUE, namespace, message=hpo, mtype=HPO_ITEM) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run() messages = client.monitor().unread_messages(RESULT_QUEUE, namespace) for m in messages: if m.mtype == HPO_ITEM: break assert m.mtype == HPO_ITEM, 'HPO not completed' worker_hpo = build_robo(model_type) worker_hpo.load_state_dict(m.message['hpo_state']) assert len(worker_hpo.trials) == count # Then run locally where BO is not resumed local_hpo = build_robo(model_type, n_init=n_init, count=count) i = 0 best = float('inf') while local_hpo.remaining() and i < local_hpo.hpo.count: samples = local_hpo.suggest() for sample in samples: z = branin(**sample) local_hpo.observe(sample['uid'], z) best = min(z, best) i += 1 assert i == local_hpo.hpo.count # Although remote worker was resumed many times, it should give the same # results as the local one which was executed in a single run. assert worker_hpo.trials == local_hpo.trials
def test_fetch_results_all_completed(client): defaults = {'a': 1000, 'b': 1001} params = {'c': 2, 'd': 3, 'epoch': 5} defaults.update(params) medians = ['a'] num_items = 2 configs = generate(range(num_items), 'ab', defaults=defaults) namespace = 'test' register(client, foo, namespace, configs) print(configs) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 1 worker.run() print(fetch_vars_stats(client, namespace)) data = fetch_results(client, namespace, configs, medians, params, defaults) assert data.medians == ['a'] assert data.noise.values.tolist() == ['a', 'b'] assert data.params.values.tolist() == ['c', 'd'] assert data.order.values.tolist() == [0, 1] assert data.epoch.values.tolist() == list(range(params['epoch'] + 1)) assert data.uid.shape == (3, 2) assert data.seed.values.tolist( ) == data.noise.values.tolist() + ['reference'] assert data.a.values.tolist() == [[0, 1000, 1000], [1, 1000, 1000]] assert data.b.values.tolist() == [[1001, 0, 1001], [1001, 1, 1001]] assert data.c.values.tolist() == [[2, 2, 2], [2, 2, 2]] assert data.d.values.tolist() == [[3, 3, 3], [3, 3, 3]] assert (data.obj.loc[dict(order=0, seed='a')].values.tolist() == list( range(2002, 2002 + params['epoch'] + 1)))
def repair_hpo_lost_results(client, uri, database, namespace, test_only=False): stats = get_hpo_status(client, namespace, WORK_QUEUE, HPO_ITEM) if namespace == 'sst2_hpo-bayesopt-s-10': client.db[RESULT_QUEUE].update_many( { 'namespace': namespace, 'mtype': RESULT_ITEM }, {'$set': { 'read': False, 'actioned': False }}) trials = client.db[RESULT_QUEUE].find( { 'namespace': namespace, 'mtype': RESULT_ITEM, 'actioned': True }, {'message.uid': 1}) # Do this instead of count to avoid duplicates n_completed_and_observed = len( set(doc['message'][0]['uid'] for doc in trials)) # Reset HPOs if stats['count'] == stats['completed']: hpo = get_hpo(client, namespace)[0] n_observed = sum(1 for trial in hpo.trials.values() if trial.objective is not None) if n_observed < n_completed_and_observed: print('ERROR: HPO lost {} observations and we cannot restore!!!'. format(n_completed_and_observed - n_observed)) else: print('OK: No HPO observation lost') else: hpo = get_hpo(client, namespace, partial=True)[0] n_observed = sum(1 for trial in hpo.trials.values() if trial.objective is not None) if n_observed < n_completed_and_observed: print('ERROR: HPO lost {} observations'.format( n_completed_and_observed - n_observed)) if test_only: return # Update trials client.db[RESULT_QUEUE].update_many( { 'namespace': namespace, 'mtype': RESULT_ITEM }, {'$set': { 'read': False, 'actioned': False }}) # Reset trial in work queue (if lost) # Run HPO to get it completed or in a repaired state where it can sample. worker = TrialWorker(uri, database, 1, namespace, hpo_allowed=True, work_allowed=False) worker.timeout = 1 # Make sur it only runs the HPO once def stop(): worker.running = False threading.Timer(2, stop).start() worker.run() else: print('OK: No HPO observation lost')
def test_consolidate_results(client): num_experiments = 5 num_replicates = 10 space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 5} defaults = {'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) reset_pool_size(configs['random_search']) randomize_seeds(configs['random_search'], variables, seed) variable_names = list(sorted(variables.keys())) hpo_stats = fetch_all_hpo_stats(client, NAMESPACE) namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults, hpo_stats) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() data = defaultdict(dict) hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces, variable_names, data) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) replicates = generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False) register(client, foo, NAMESPACE, replicates) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() print(fetch_vars_stats(client, NAMESPACE)) data = fetch_hpos_replicates(client, configs, replicates, variable_names, space, data) data = consolidate_results(data) assert len(data) == 1 assert len(data['random_search']) == 4 hpo_reps = data['random_search'] assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget, num_experiments) assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-free'].obj.shape == (6, num_replicates, num_experiments) def count_unique(attr): return len(set(attr.values.reshape(-1).tolist())) # Test sources of variation # NOTE: In ideal, source of variation will vary across ideal after consolidation # but it stays fixed during the HPO itself assert count_unique(hpo_reps['ideal']['d']) == num_experiments assert count_unique(hpo_reps['biased']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments # Test HPs assert count_unique(hpo_reps['ideal']['a']) == (num_experiments * surrogate_budget) assert count_unique(hpo_reps['biased']['a']) == num_experiments assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates * num_experiments) assert numpy.allclose(hpo_reps['simul-free']['a'].values, hpo_reps['simul-fix']['a'].values)