Beispiel #1
0
def run(uri,
        database,
        namespace,
        function,
        num_experiments,
        num_repro,
        objective,
        variables,
        defaults,
        params,
        resumable,
        sleep_time=60,
        save_dir='.'):

    client = new_client(uri, database)

    defaults.update(dict(list(variables.items()) + list(params.items())))

    configs = generate(num_experiments, num_repro, objective,
                       list(sorted(variables)), defaults, resumable)
    register(client, function, namespace, configs)

    wait(client, namespace, sleep=sleep_time)

    data = fetch_results(client, namespace, configs, list(sorted(variables)),
                         params, defaults)

    save_results(namespace, data, save_dir)

    test(data, num_experiments, num_repro, objective, variables, resumable)
def test_register_uniques(client):
    defaults = {'a': 1000, 'b': 1001, 'c': 1002, 'd': 3}
    namespace = 'test'
    configs = generate(range(3), 'abc', defaults=defaults)
    variables = list('abc') + ['reference']
    namespaces = [env(namespace, v) for v in variables]

    assert fetch_registered(client, namespaces) == set()
    register(client, foo, namespace, configs)
    assert len(fetch_registered(client, namespaces)) == 4 * 3
def test_fetch_results_non_completed(client):
    defaults = {'a': 0, 'b': 1}
    params = {'c': 2, 'd': 3}
    defaults = {'epoch': 0}
    medians = ['a']
    configs = generate(range(2), 'ab', params)
    namespace = 'test'
    register(client, foo, namespace, configs)

    with pytest.raises(RuntimeError) as exc:
        fetch_results(client, namespace, configs, medians, params, defaults)

    assert exc.match('Not all trials are completed')
def test_register_uniques(client):
    num_replicates = 10
    num_experiments = 5
    replicates = generate_mocked_replicates(num_replicates, num_experiments)

    def count_registered():
        status = fetch_vars_stats(client, NAMESPACE)
        return sum(status[key]['count'] for key in status.keys())

    assert count_registered() == 0
    register(client, foo, NAMESPACE, replicates)
    assert count_registered(
    ) == num_experiments * num_replicates * 3  # (biased+simfree+simfixed)
def test_register_resume(client):
    defaults = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
    namespace = 'test'
    configs = generate(range(3), 'abc', defaults=defaults)
    variables = list('abc') + ['reference']
    namespaces = [env(namespace, v) for v in variables]

    assert fetch_registered(client, namespaces) == set()
    new_registered = register(client, foo, namespace, configs)
    assert len(fetch_registered(client, namespaces)) == 4 * 3
    assert fetch_registered(client, namespaces) == new_registered

    # Resume with 10 seeds per configs this time.
    configs = generate(range(10), 'abc', defaults=defaults)
    new_registered = register(client, foo, namespace, configs)
    assert len(fetch_registered(client, namespaces)) == 4 * 3 + 4 * 7
    assert fetch_registered(client, namespaces) != new_registered
    assert len(new_registered) == 4 * 7
def test_fetch_results_corrupt_completed(client):
    defaults = {'a': 0, 'b': 1}
    params = {'c': 2, 'd': 3}
    defaults = {'epoch': 0}
    medians = ['a']
    num_items = 2
    configs = generate(range(num_items), 'ab', defaults=defaults)
    namespace = 'test'
    register(client, foo, namespace, configs)

    for variable in configs.keys():
        for i in range(num_items):
            workitem = client.dequeue(WORK_QUEUE, env(namespace, variable))
            client.mark_actioned(WORK_QUEUE, workitem)

    with pytest.raises(RuntimeError) as exc:
        fetch_results(client, namespace, configs, medians, params, defaults)

    assert exc.match('Nothing found in result queue for trial')
def test_register_resume(client):
    num_replicates = 10
    num_experiments = 5
    replicates = generate_mocked_replicates(num_replicates, num_experiments)

    def count_registered():
        status = fetch_vars_stats(client, NAMESPACE)
        return sum(status[key]['count'] for key in status.keys())

    assert count_registered() == 0
    register(client, foo, NAMESPACE, replicates)
    assert count_registered(
    ) == num_experiments * num_replicates * 3  # (biased+simfree+simfixed)

    # Resume with 10 more replicates per configs this time.
    replicates = generate_mocked_replicates(num_replicates + 10,
                                            num_experiments)
    register(client, foo, NAMESPACE, replicates)
    assert count_registered() == num_experiments * (num_replicates + 10) * 3
def test_fetch_results_all_completed(client):
    defaults = {'a': 1000, 'b': 1001}
    params = {'c': 2, 'd': 3, 'epoch': 5}
    defaults.update(params)
    medians = ['a']
    num_items = 2
    configs = generate(range(num_items), 'ab', defaults=defaults)
    namespace = 'test'
    register(client, foo, namespace, configs)

    print(configs)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 1
    worker.run()

    print(fetch_vars_stats(client, namespace))

    data = fetch_results(client, namespace, configs, medians, params, defaults)

    assert data.medians == ['a']
    assert data.noise.values.tolist() == ['a', 'b']
    assert data.params.values.tolist() == ['c', 'd']
    assert data.order.values.tolist() == [0, 1]
    assert data.epoch.values.tolist() == list(range(params['epoch'] + 1))
    assert data.uid.shape == (3, 2)
    assert data.seed.values.tolist(
    ) == data.noise.values.tolist() + ['reference']
    assert data.a.values.tolist() == [[0, 1000, 1000], [1, 1000, 1000]]
    assert data.b.values.tolist() == [[1001, 0, 1001], [1001, 1, 1001]]
    assert data.c.values.tolist() == [[2, 2, 2], [2, 2, 2]]
    assert data.d.values.tolist() == [[3, 3, 3], [3, 3, 3]]

    assert (data.obj.loc[dict(order=0, seed='a')].values.tolist() == list(
        range(2002, 2002 + params['epoch'] + 1)))
def test_remaining(client):
    defaults = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
    configs = generate(range(1), 'ab', defaults=defaults)
    namespace = 'test'
    register(client, foo, namespace, configs)

    def get_stats(variables):
        trial_stats = fetch_vars_stats(client, namespace)
        print(trial_stats)
        return {v: trial_stats[f'test-{v}'] for v in variables}

    assert remaining(get_stats('ab'))

    assert remaining(get_stats('a'))
    workitem = client.dequeue(WORK_QUEUE, env(namespace, 'a'))
    assert remaining(get_stats('a'))
    client.mark_actioned(WORK_QUEUE, workitem)
    assert not remaining(get_stats('a'))

    assert remaining(get_stats('ab'))
    workitem = client.dequeue(WORK_QUEUE, env(namespace, 'b'))
    assert remaining(get_stats('ab'))
    client.mark_actioned(WORK_QUEUE, workitem)
    assert not remaining(get_stats('ab'))
Beispiel #10
0
def test_consolidate_results(client):
    num_experiments = 5
    num_replicates = 10
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 5}
    defaults = {'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, NAMESPACE)

    namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults,
                               hpo_stats)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()

    data = defaultdict(dict)
    hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces,
                                                     variable_names, data)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    replicates = generate_replicates(ready_configs,
                                     data,
                                     variables,
                                     objective,
                                     hpo_budget,
                                     num_replicates,
                                     early_stopping=False)
    register(client, foo, NAMESPACE, replicates)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()
    print(fetch_vars_stats(client, NAMESPACE))

    data = fetch_hpos_replicates(client, configs, replicates, variable_names,
                                 space, data)
    data = consolidate_results(data)

    assert len(data) == 1
    assert len(data['random_search']) == 4

    hpo_reps = data['random_search']
    assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget,
                                           num_experiments)
    assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments)
    assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates,
                                               num_experiments)
    assert hpo_reps['simul-free'].obj.shape == (6, num_replicates,
                                                num_experiments)

    def count_unique(attr):
        return len(set(attr.values.reshape(-1).tolist()))

    # Test sources of variation
    # NOTE: In ideal, source of variation will vary across ideal after consolidation
    #       but it stays fixed during the HPO itself
    assert count_unique(hpo_reps['ideal']['d']) == num_experiments
    assert count_unique(hpo_reps['biased']['d']) == (num_replicates *
                                                     num_experiments)
    assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments

    # Test HPs
    assert count_unique(hpo_reps['ideal']['a']) == (num_experiments *
                                                    surrogate_budget)
    assert count_unique(hpo_reps['biased']['a']) == num_experiments
    assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates *
                                                        num_experiments)
    assert numpy.allclose(hpo_reps['simul-free']['a'].values,
                          hpo_reps['simul-fix']['a'].values)