Example #1
0
def test_fit_model():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective)

    fit_model(X, y, space, seed=1)
Example #2
0
def test_generate_hpos():
    hpos = [
        'grid_search', 'nudged_grid_search', 'noisy_grid_search',
        'random_search', 'bayesopt'
    ]
    #       , 'hyperband', 'bayesopt']
    num_experiments = 10
    budget = 200
    fidelity = 'fidelity(1, 10)'
    num_experiments = 10
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'lr': 'uniform(-1, 1)'
    }
    defaults = {'d': 1, 'e': 2}

    configs = generate_hpos(range(num_experiments), hpos, budget, fidelity,
                            search_space, NAMESPACE, defaults)

    assert len(configs) == len(hpos)
    assert len(configs['grid_search']) == 1
    assert len(configs['nudged_grid_search']) == 1
    assert configs['grid_search'][f'{NAMESPACE}-grid-search-p-4'][
        'name'] == 'grid_search'
    assert configs['grid_search'][f'{NAMESPACE}-grid-search-p-4'].get(
        'nudge') is None
    assert (configs['nudged_grid_search']
            [f'{NAMESPACE}-grid-search-nudged-p-4']['name'] == 'grid_search')
    assert configs['nudged_grid_search'][
        f'{NAMESPACE}-grid-search-nudged-p-4']['nudge'] == 0.5
    for hpo in hpos[2:]:
        for i in range(num_experiments):
            if hpo == 'noisy_grid_search':
                namespace = '{}-noisy-grid-search-p-4-s-{}'.format(
                    NAMESPACE, i)
            else:
                namespace = '{}-{}-s-{}'.format(NAMESPACE,
                                                hpo.replace('_', '-'), i)
            if hpo == 'bayesopt':
                rng = numpy.random.RandomState(i)
                assert configs[hpo][namespace]['name'] == 'robo'
                assert configs[hpo][namespace]['model_seed'] == rng.randint(
                    2**30)
                assert configs[hpo][namespace]['prior_seed'] == rng.randint(
                    2**30)
                assert configs[hpo][namespace]['init_seed'] == rng.randint(
                    2**30)
                assert configs[hpo][namespace][
                    'maximizer_seed'] == rng.randint(2**30)
            else:
                assert configs[hpo][namespace]['name'] == hpo
                assert configs[hpo][namespace]['seed'] == i
Example #3
0
def test_generate_biased_replicates_last_epoch():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    replicates = generate_biased_replicates(
        data,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0'],
        variables,
        objective,
        num_replicates,
        hpo_budget,
        early_stopping=False)

    best_trial_index = 6
    rng = numpy.random.RandomState(
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    for replicate in replicates:
        should_be = copy.deepcopy(defaults)
        for param in space.keys():
            assert replicate[param] == float(
                data.sel(order=best_trial_index)[param].values)
            should_be[param] = replicate[param]
        for variable in variables:
            assert replicate[variable] == rng.randint(2**30)
            should_be[variable] = replicate[variable]

        assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
Example #4
0
def test_generate_simulated_fix():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    config = configs['random_search'][f'{NAMESPACE}-random-search-s-0']
    # Make sure the defaults have been replaced by randomized seeds
    assert config['defaults']['d'] != defaults['d']
    assert config['defaults']['e'] != defaults['e']

    replicates = generate_simulated_fix(data,
                                        config,
                                        variables,
                                        objective,
                                        hpo_budget,
                                        num_replicates,
                                        early_stopping=False)

    assert len(replicates) == num_replicates
    for i in range(1, num_replicates):
        assert replicates[i]['a'] != replicates[0]['a']
        assert replicates[i]['b'] != replicates[0]['b']
        assert replicates[i]['c'] != replicates[0]['c']
        assert replicates[i]['uid'] != replicates[0]['uid']
        assert replicates[i]['d'] == config['defaults']['d']
        assert replicates[i]['e'] == config['defaults']['e']
Example #5
0
def generate_mocked_replicates(num_replicates, num_experiments=5):

    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 3}
    defaults = {'d': 1, 'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)
    randomize_seeds(configs['random_search'], variables, seed)

    hpos_ready = dict(random_search=[])
    data = dict(random_search=dict())
    for hpo_namespace, config in configs['random_search'].items():
        hpos_ready['random_search'].append(hpo_namespace)
        data['random_search'][hpo_namespace] = build_data(
            surrogate_budget, variables, defaults, space)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    return generate_replicates(ready_configs,
                               data,
                               variables,
                               objective,
                               hpo_budget,
                               num_replicates,
                               early_stopping=False)
Example #6
0
def test_simulate_hpo():
    # fit a model
    # simulate
    # test what? ...
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective)

    model = fit_model(X, y, space, seed=1)

    sample = simulate_hpo(
        model, space, hpo_budget,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    assert sample.keys() == space.keys()
Example #7
0
def test_reset_pool_size():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)',
        'd': 'uniform(lower=-1, upper=1)'
    }
    defaults = {'d': 1, 'e': 2}
    num_experiments = 5
    hpo = 'random_search'
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=200,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)
    reset_pool_size(configs['random_search'])

    for config in configs['random_search'].values():
        assert config['pool_size'] is None
Example #8
0
def test_randomize_seeds():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = ['d', 'e']
    defaults = {}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=200,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed, compute_id=True)

    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        uid = config['defaults'].pop('uid')
        assert uid == compute_identity(config['defaults'], IDENTITY_SIZE)

    randomize_seeds(configs['random_search'],
                    variables,
                    seed,
                    compute_id=False)
    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        assert 'uid' not in config['defaults']
Example #9
0
def test_register_hpos(client):
    namespace = 'test-hpo'
    hpos = [
        'grid_search', 'nudged_grid_search', 'noisy_grid_search',
        'random_search', 'bayesopt'
    ]
    #       , 'hyperband', 'bayesopt']
    num_experiments = 10
    budget = 200
    fidelity = Fidelity(1, 10, name='d').to_dict()
    num_experiments = 2
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'd': 'uniform(-1, 1)'
    }
    defaults = {'e': 2}

    configs = generate_hpos(range(num_experiments), hpos, budget, fidelity,
                            search_space, NAMESPACE, defaults)

    stats = {}

    assert client.monitor().read_count(WORK_QUEUE, namespace,
                                       mtype=HPO_ITEM) == 0
    new_namespaces = register_hpos(client, namespace, foo, configs, defaults,
                                   stats)
    assert len(set(new_namespaces)) == len(configs)
    for hpo, hpo_namespaces in new_namespaces.items():
        for i, hpo_namespace in enumerate(hpo_namespaces):
            messages = client.monitor().messages(WORK_QUEUE,
                                                 hpo_namespace,
                                                 mtype=HPO_ITEM)
            assert len(messages) == 1
            assert messages[0].message['hpo']['kwargs'] == configs[hpo][
                hpo_namespace]
            assert messages[0].message['work']['kwargs'] == defaults
Example #10
0
def test_convert_data_to_xy():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective, early_stopping=False)

    assert numpy.array_equal(X[:, 0], data['a'].values.reshape(-1))
    assert numpy.array_equal(X[:, 1], data['b'].values.reshape(-1))
    assert numpy.array_equal(X[:, 2], numpy.log(data['c'].values.reshape(-1)))
    assert numpy.array_equal(y,
                             data[objective].isel(epoch=-1).values.reshape(-1))
    assert y.shape == (surrogate_budget, )
Example #11
0
def test_consolidate_results(client):
    num_experiments = 5
    num_replicates = 10
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 5}
    defaults = {'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, NAMESPACE)

    namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults,
                               hpo_stats)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()

    data = defaultdict(dict)
    hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces,
                                                     variable_names, data)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    replicates = generate_replicates(ready_configs,
                                     data,
                                     variables,
                                     objective,
                                     hpo_budget,
                                     num_replicates,
                                     early_stopping=False)
    register(client, foo, NAMESPACE, replicates)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()
    print(fetch_vars_stats(client, NAMESPACE))

    data = fetch_hpos_replicates(client, configs, replicates, variable_names,
                                 space, data)
    data = consolidate_results(data)

    assert len(data) == 1
    assert len(data['random_search']) == 4

    hpo_reps = data['random_search']
    assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget,
                                           num_experiments)
    assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments)
    assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates,
                                               num_experiments)
    assert hpo_reps['simul-free'].obj.shape == (6, num_replicates,
                                                num_experiments)

    def count_unique(attr):
        return len(set(attr.values.reshape(-1).tolist()))

    # Test sources of variation
    # NOTE: In ideal, source of variation will vary across ideal after consolidation
    #       but it stays fixed during the HPO itself
    assert count_unique(hpo_reps['ideal']['d']) == num_experiments
    assert count_unique(hpo_reps['biased']['d']) == (num_replicates *
                                                     num_experiments)
    assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments

    # Test HPs
    assert count_unique(hpo_reps['ideal']['a']) == (num_experiments *
                                                    surrogate_budget)
    assert count_unique(hpo_reps['biased']['a']) == num_experiments
    assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates *
                                                        num_experiments)
    assert numpy.allclose(hpo_reps['simul-free']['a'].values,
                          hpo_reps['simul-fix']['a'].values)
Example #12
0
def run(uri,
        database,
        namespace,
        function,
        num_experiments,
        num_simuls,
        fidelity,
        space,
        objective,
        variables,
        defaults,
        num_replicates=None,
        sleep_time=60,
        do_full_train=False,
        save_dir='.',
        seed=1,
        register=True,
        rep_types=REP_TYPES):

    hpo_budget = 100
    surrogate_budget = 200

    if num_replicates is None:
        num_replicates = num_experiments

    # We use 200 trials to fit the surrogate models (surrogate_budget is 200)
    # but we only need 100 for the ideal (hpo_budget is 100)
    # therefore, since num_simuls is at least half smaller than number of
    # replicates, we can run only (num_replicates / 2) hpo runs and use
    # first half and second 100 half as 2 separe ideal runs.
    # This is possible since we are using random search.

    assert (num_experiments % 2) == 0
    assert num_simuls <= (num_experiments / 2)

    num_ideal = num_experiments // 2

    hpo = 'random_search'

    # TODO
    # for each repetition, vary all sources of variations
    # when one hpo is done, create all biased and simulations

    if fidelity is None:
        fidelity = Fidelity(1, 1, name='epoch').to_dict()

    client = new_client(uri, database)

    configs = generate_hpos(list(range(num_ideal)), [hpo], surrogate_budget,
                            fidelity, space, namespace, defaults)

    to_replicate = get_configs_to_replicate(configs, num_simuls)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, namespace)

    namespaces = register_hpos(client,
                               namespace,
                               function,
                               configs,
                               defaults,
                               hpo_stats,
                               register=register)
    remainings = namespaces

    data_hpo = defaultdict(dict)
    all_replicates = dict(random_search=dict())
    while sum(remainings.values(), []):
        print_status(client, namespace, namespaces)
        hpos_ready, remainings = fetch_hpos_valid_curves(
            client, remainings, variable_names, data_hpo)

        ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

        replicates = generate_replicates(ready_configs,
                                         data_hpo,
                                         variables,
                                         objective,
                                         hpo_budget,
                                         num_replicates,
                                         early_stopping=False,
                                         rep_types=rep_types)
        if register:
            registered_replicates = register_all_replicates(
                client, function, namespace, replicates)

        if replicates.get('random_search'):
            all_replicates['random_search'].update(replicates['random_search'])
        if sum(remainings.values(), []) and not registered_replicates:
            time.sleep(sleep_time)

    wait(client, namespace, sleep=sleep_time)

    data_replicates = fetch_hpos_replicates(client, configs, all_replicates,
                                            variable_names, space, rep_types)

    # Save valid results
    data = consolidate_results(data_hpo, data_replicates, rep_types)
    save_results(namespace, data, save_dir)
Example #13
0
def test_register_hpos_resume(client, monkeypatch):
    namespace = 'test-hpo'
    hpos = [
        'grid_search', 'nudged_grid_search', 'noisy_grid_search',
        'random_search', 'bayesopt'
    ]
    #       , 'hyperband', 'bayesopt']
    num_experiments = 10
    budget = 200
    fidelity = Fidelity(1, 10, name='d').to_dict()
    num_experiments = 2
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'd': 'uniform(-1, 1)'
    }
    defaults = {}
    stats = {}

    configs = generate_hpos(range(num_experiments), hpos, budget, fidelity,
                            search_space, NAMESPACE, defaults)

    assert client.monitor().read_count(WORK_QUEUE, namespace,
                                       mtype=HPO_ITEM) == 0
    new_namespaces = register_hpos(client, namespace, foo, configs, defaults,
                                   stats)
    assert len(set(new_namespaces)) == len(configs)

    print(new_namespaces)

    stats = {namespace: {} for namespace in sum(new_namespaces.values(), [])}

    more_configs = generate_hpos(range(num_experiments + 2), hpos, budget,
                                 fidelity, search_space, NAMESPACE, defaults)

    # Save new namespaces for test
    new_namespaces = defaultdict(list)

    def mock_register_hpo(client, namespace, function, config, defaults):
        new_namespaces[config['name']].append(namespace)
        return register_hpo(client, namespace, function, config, defaults)

    def flatten_configs(confs):
        return sum((list(configs.keys()) for configs in confs.values()), [])

    monkeypatch.setattr('olympus.studies.hpo.main.register_hpo',
                        mock_register_hpo)
    namespaces = register_hpos(client, namespace, foo, more_configs, defaults,
                               stats)
    assert (len(set(sum(new_namespaces.values(),
                        []))) == len(flatten_configs(more_configs)) -
            len(flatten_configs(configs)))

    # Verify new registered configs
    for hpo, configs in more_configs.items():
        for hpo_namespace, config in configs.items():
            messages = client.monitor().messages(WORK_QUEUE,
                                                 hpo_namespace,
                                                 mtype=HPO_ITEM)
            assert len(messages) == 1
            assert messages[0].message['hpo']['kwargs'] == config