Ejemplo n.º 1
0
def test_fit_model():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective)

    fit_model(X, y, space, seed=1)
Ejemplo n.º 2
0
def main(**kwargs):
    show_dict(kwargs)

    args = Namespace(**kwargs)
    set_verbose_level(args.verbose)

    device = fetch_device()
    experiment_name = args.experiment_name.format(**kwargs)

    # save partial results here
    state_storage = StateStorage(
        folder=option('state.storage', '/tmp/olympus/classification'))

    def main_task():
        task = classification_baseline(device=device,
                                       storage=state_storage,
                                       **kwargs)

        if args.uri is not None:
            logger = metric_logger(args.uri, args.database, experiment_name)
            task.metrics.append(logger)

        return task

    space = main_task().get_space()

    # If space is not empty we search the best hyper parameters
    params = {}
    if space:
        show_dict(space)
        hpo = HPOptimizer('hyperband',
                          space=space,
                          fidelity=Fidelity(args.min_epochs,
                                            args.epochs).to_dict())

        hpo_task = HPO(hpo, main_task)
        hpo_task.metrics.append(ElapsedRealTime())

        trial = hpo_task.fit(objective='validation_accuracy')
        print(f'HPO is done, objective: {trial.objective}')
        params = trial.params
    else:
        print('No hyper parameter missing, running the experiment...')
    # ------

    # Run the experiment with the best hyper parameters
    # -------------------------------------------------
    if params is not None:
        # Train using train + valid for the final result
        final_task = classification_baseline(device=device,
                                             **kwargs,
                                             hpo_done=True)
        final_task.init(**params)
        final_task.fit(epochs=args.epochs)

        print('=' * 40)
        print('Final Trial Results')
        show_dict(flatten(params))
        final_task.report(pprint=True, print_fun=print)
        print('=' * 40)
Ejemplo n.º 3
0
def test_fetch_hpos_valid_results_first_time(client):
    config = copy.deepcopy(CONFIG)
    num_trials = 5
    config['count'] = num_trials
    config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict()

    register_hpo(client, NAMESPACE + '1', foo, config, {'e': 2})
    register_hpo(client, NAMESPACE + '2', foo, config, {'e': 2})

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 1
    worker.run()

    namespaces = {'hpo' + str(i): [NAMESPACE + str(i)] for i in range(1, 3)}

    data = defaultdict(dict)
    _ = fetch_hpos_valid_curves(client, namespaces, ['e'], data)

    assert len(data) == 2
    assert len(data['hpo1']) == 1
    assert len(data['hpo2']) == 1

    namespace = f'{NAMESPACE}1'
    assert data['hpo1'][namespace].attrs['namespace'] == namespace
    assert data['hpo1'][namespace].epoch.values.tolist() == [0, 1]
    assert data['hpo1'][namespace].order.values.tolist() == list(
        range(num_trials))
    assert data['hpo1'][namespace].seed.values.tolist() == [1]
    assert data['hpo1'][namespace].params.values.tolist() == list('abcd')
    assert data['hpo1'][namespace].noise.values.tolist() == ['e']
    assert data['hpo1'][namespace].obj.shape == (2, num_trials, 1)
    assert data['hpo1'][namespace].valid.shape == (2, num_trials, 1)

    assert data['hpo1'][namespace] == data['hpo2'][f'{NAMESPACE}2']
Ejemplo n.º 4
0
def test_generate_biased_replicates_last_epoch():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    replicates = generate_biased_replicates(
        data,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0'],
        variables,
        objective,
        num_replicates,
        hpo_budget,
        early_stopping=False)

    best_trial_index = 6
    rng = numpy.random.RandomState(
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    for replicate in replicates:
        should_be = copy.deepcopy(defaults)
        for param in space.keys():
            assert replicate[param] == float(
                data.sel(order=best_trial_index)[param].values)
            should_be[param] = replicate[param]
        for variable in variables:
            assert replicate[variable] == rng.randint(2**30)
            should_be[variable] = replicate[variable]

        assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
Ejemplo n.º 5
0
def test_plot(client):
    config = copy.deepcopy(CONFIG)
    num_trials = 10
    config['count'] = num_trials
    config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict()

    register_hpo(client, NAMESPACE, foo, config, {'e': 2})
    worker = TrialWorker(URI, DATABASE, 0, NAMESPACE)
    worker.max_retry = 0
    worker.run()

    data = fetch_hpo_valid_curves(client, NAMESPACE, ['e'])

    plot(config['space'], 'obj', data, 'test.png')
Ejemplo n.º 6
0
def test_generate_simulated_fix():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    config = configs['random_search'][f'{NAMESPACE}-random-search-s-0']
    # Make sure the defaults have been replaced by randomized seeds
    assert config['defaults']['d'] != defaults['d']
    assert config['defaults']['e'] != defaults['e']

    replicates = generate_simulated_fix(data,
                                        config,
                                        variables,
                                        objective,
                                        hpo_budget,
                                        num_replicates,
                                        early_stopping=False)

    assert len(replicates) == num_replicates
    for i in range(1, num_replicates):
        assert replicates[i]['a'] != replicates[0]['a']
        assert replicates[i]['b'] != replicates[0]['b']
        assert replicates[i]['c'] != replicates[0]['c']
        assert replicates[i]['uid'] != replicates[0]['uid']
        assert replicates[i]['d'] == config['defaults']['d']
        assert replicates[i]['e'] == config['defaults']['e']
Ejemplo n.º 7
0
def test_save_load_results(client):
    config = copy.deepcopy(CONFIG)
    num_trials = 2
    config['count'] = num_trials
    config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict()

    register_hpo(client, NAMESPACE, foo, config, {'e': 2})
    worker = TrialWorker(URI, DATABASE, 0, NAMESPACE)
    worker.max_retry = 0
    worker.run()

    data = fetch_hpo_valid_curves(client, NAMESPACE, ['e'])

    save_results(NAMESPACE, data, '.')

    assert load_results(NAMESPACE, '.')
Ejemplo n.º 8
0
def generate_mocked_replicates(num_replicates, num_experiments=5):

    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 3}
    defaults = {'d': 1, 'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)
    randomize_seeds(configs['random_search'], variables, seed)

    hpos_ready = dict(random_search=[])
    data = dict(random_search=dict())
    for hpo_namespace, config in configs['random_search'].items():
        hpos_ready['random_search'].append(hpo_namespace)
        data['random_search'][hpo_namespace] = build_data(
            surrogate_budget, variables, defaults, space)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    return generate_replicates(ready_configs,
                               data,
                               variables,
                               objective,
                               hpo_budget,
                               num_replicates,
                               early_stopping=False)
Ejemplo n.º 9
0
def parallel_hpo(**kwargs):
    args = argparse.Namespace(**kwargs)

    # Arguments required for the HPO workers to synchronize
    parser = argparse.ArgumentParser()
    parser.add_argument('--rank', type=int,
                        help='Worker rank, use to initialize the HPO')
    parser.add_argument('--uri', type=str, default='cockroach://192.168.0.1:8123',
                        help='Resource URI pointing to the database')
    parser.add_argument('--experiment', type=str, default='classification',
                        help='Database namespace to use for this experiment')

    parser.parse_args(namespace=args)

    params = {
        'a': 'uniform(0, 1)',
        'b': 'uniform(0, 1)',
        'c': 'uniform(0, 1)',
        'lr': 'uniform(0, 1)'
    }

    hpo = HPOptimizer('hyperband', fidelity=Fidelity(1, 30).to_dict(), space=params)

    # Wrap your HPO into Olympus ParallelHPO
    hpo = ParallelHPO(
        hpo,
        rank=args.rank,
        uri=args.uri,
        experiment=args.experiment)

    # Iterate over your configs distributed across workers
    for config in hpo:
        print('Worker: ', args.rank, config)
        validation_error = train(**config)
        hpo.observe(config, validation_error)

    # get the result of the HPO
    print(f'Worker {args.rank} is done')
    best_trial = hpo.result()
    if best_trial is not None:
        print(best_trial.params, best_trial.objective)
Ejemplo n.º 10
0
def test_fetch_hpo_valid_results_no_epochs(client):
    config = copy.deepcopy(CONFIG)
    num_trials = 5
    config['count'] = num_trials
    config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict()

    register_hpo(client, NAMESPACE, foo, config, {'e': 2})
    worker = TrialWorker(URI, DATABASE, 0, NAMESPACE)
    worker.max_retry = 0
    worker.run()

    data = fetch_hpo_valid_curves(client, NAMESPACE, ['e'])

    assert data.attrs['namespace'] == NAMESPACE
    assert data.epoch.values.tolist() == [0, 1]
    assert data.order.values.tolist() == list(range(num_trials))
    assert data.seed.values.tolist() == [1]
    assert data.params.values.tolist() == list('abcd')
    assert data.noise.values.tolist() == ['e']
    assert data.obj.shape == (2, num_trials, 1)
    assert data.valid.shape == (2, num_trials, 1)
Ejemplo n.º 11
0
def test_simulate_hpo():
    # fit a model
    # simulate
    # test what? ...
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective)

    model = fit_model(X, y, space, seed=1)

    sample = simulate_hpo(
        model, space, hpo_budget,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    assert sample.keys() == space.keys()
Ejemplo n.º 12
0
def test_reset_pool_size():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)',
        'd': 'uniform(lower=-1, upper=1)'
    }
    defaults = {'d': 1, 'e': 2}
    num_experiments = 5
    hpo = 'random_search'
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=200,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)
    reset_pool_size(configs['random_search'])

    for config in configs['random_search'].values():
        assert config['pool_size'] is None
Ejemplo n.º 13
0
def test_randomize_seeds():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = ['d', 'e']
    defaults = {}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=200,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed, compute_id=True)

    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        uid = config['defaults'].pop('uid')
        assert uid == compute_identity(config['defaults'], IDENTITY_SIZE)

    randomize_seeds(configs['random_search'],
                    variables,
                    seed,
                    compute_id=False)
    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        assert 'uid' not in config['defaults']
Ejemplo n.º 14
0
def test_register_hpos(client):
    namespace = 'test-hpo'
    hpos = [
        'grid_search', 'nudged_grid_search', 'noisy_grid_search',
        'random_search', 'bayesopt'
    ]
    #       , 'hyperband', 'bayesopt']
    num_experiments = 10
    budget = 200
    fidelity = Fidelity(1, 10, name='d').to_dict()
    num_experiments = 2
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'd': 'uniform(-1, 1)'
    }
    defaults = {'e': 2}

    configs = generate_hpos(range(num_experiments), hpos, budget, fidelity,
                            search_space, NAMESPACE, defaults)

    stats = {}

    assert client.monitor().read_count(WORK_QUEUE, namespace,
                                       mtype=HPO_ITEM) == 0
    new_namespaces = register_hpos(client, namespace, foo, configs, defaults,
                                   stats)
    assert len(set(new_namespaces)) == len(configs)
    for hpo, hpo_namespaces in new_namespaces.items():
        for i, hpo_namespace in enumerate(hpo_namespaces):
            messages = client.monitor().messages(WORK_QUEUE,
                                                 hpo_namespace,
                                                 mtype=HPO_ITEM)
            assert len(messages) == 1
            assert messages[0].message['hpo']['kwargs'] == configs[hpo][
                hpo_namespace]
            assert messages[0].message['work']['kwargs'] == defaults
Ejemplo n.º 15
0
def test_convert_xarray_to_scipy_results(client):
    config = copy.deepcopy(CONFIG)
    num_trials = 10
    config['count'] = num_trials
    config['fidelity'] = Fidelity(1, 1, name='epoch').to_dict()

    register_hpo(client, NAMESPACE, foo, config, {'e': 2})
    worker = TrialWorker(URI, DATABASE, 0, NAMESPACE)
    worker.max_retry = 0
    worker.run()

    data = fetch_hpo_valid_curves(client, NAMESPACE, ['e'])

    scipy_results = xarray_to_scipy_results(config['space'], 'obj', data)

    min_idx = numpy.argmin(data.obj.values[1, :, 0])

    assert scipy_results.x[0] == data.a.values[min_idx, 0]
    assert scipy_results.x[1] == data.b.values[min_idx, 0]
    assert scipy_results.x[2] == data.c.values[min_idx, 0]
    assert scipy_results.x[3] == numpy.log(data.d.values[min_idx, 0])
    assert scipy_results.fun == data.obj.values[1, min_idx, 0]
    assert len(scipy_results.x_iters) == num_trials
Ejemplo n.º 16
0
def test_convert_data_to_xy():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective, early_stopping=False)

    assert numpy.array_equal(X[:, 0], data['a'].values.reshape(-1))
    assert numpy.array_equal(X[:, 1], data['b'].values.reshape(-1))
    assert numpy.array_equal(X[:, 2], numpy.log(data['c'].values.reshape(-1)))
    assert numpy.array_equal(y,
                             data[objective].isel(epoch=-1).values.reshape(-1))
    assert y.shape == (surrogate_budget, )
Ejemplo n.º 17
0
def test_register_hpo_is_actionable(client):
    """Test that the registered HPO have valid workitems and can be executed."""
    namespace = 'test-hpo'
    config = {
        'name': 'random_search',
        'seed': 1,
        'count': 1,
        'fidelity': Fidelity(1, 10, name='d').to_dict(),
        'space': {
            'a': 'uniform(-1, 1)',
            'b': 'uniform(-1, 1)',
            'c': 'uniform(-1, 1)',
            'd': 'uniform(-1, 1)'
        }
    }

    defaults = {}
    register_hpo(client, namespace, foo, config, defaults)
    worker = TrialWorker(URI, DATABASE, 0, namespace)
    worker.max_retry = 0
    worker.run()

    assert client.monitor().read_count(WORK_QUEUE, namespace,
                                       mtype=WORK_ITEM) == 1
    assert client.monitor().read_count(WORK_QUEUE, namespace,
                                       mtype=HPO_ITEM) == 2

    messages = client.monitor().unread_messages(RESULT_QUEUE,
                                                namespace,
                                                mtype=HPO_ITEM)

    compressed_state = messages[0].message.get('hpo_state')
    assert compressed_state is not None
    state = decompress_dict(compressed_state)

    assert len(state['trials']) == 1
    assert state['trials'][0][1]['objectives'] == [10.715799430116764]
Ejemplo n.º 18
0
def test_check_diversified_search():
    space = {
        'a': 'uniform(0, 1)',
        'b': 'uniform(0, 1)',
    }

    def add(uid, epoch, a, b):
        return a + b

    hpo = DiversifiedSearch(Fidelity(0, 30, name='epoch'), space)

    print(hpo.budget)

    while not hpo.is_done():
        for args in hpo:
            epoch = args['epoch']

            for e in range(epoch):
                r = add(**args)
                args['epoch'] = e + 1
                hpo.observe(args, r)

    for p in hpo.result():
        print(p)
Ejemplo n.º 19
0
def test_fetch_hpos_valid_results_update(client):
    config = copy.deepcopy(CONFIG)
    num_trials = 5
    config['count'] = num_trials
    config['fidelity'] = Fidelity(0, 0, name='epoch').to_dict()

    namespaces = {
        f'hpo-{i}': [f'{NAMESPACE}-{i}-{j}' for j in range(1, 3)]
        for i in range(1, 3)
    }

    def run_hpos(namespaces):
        for namespace in namespaces:
            register_hpo(client, namespace, foo, config, {'e': 2})

        worker = TrialWorker(URI, DATABASE, 0, None)
        worker.max_retry = 0
        worker.timeout = 1
        worker.run()

    run_hpos([namespaces['hpo-1'][0]])

    data = defaultdict(dict)
    hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces, ['e'],
                                                     data)
    assert len(remainings) == 2
    assert len(remainings['hpo-1']) == 1
    assert len(remainings['hpo-2']) == 2

    assert len(hpos_ready) == 1
    assert len(hpos_ready['hpo-1']) == 1
    assert hpos_ready['hpo-1'][0] == f'{NAMESPACE}-1-1'

    assert len(data) == 1
    assert len(data['hpo-1']) == 1

    assert data['hpo-1'][f'{NAMESPACE}-1-1'].attrs[
        'namespace'] == f'{NAMESPACE}-1-1'

    run_hpos([namespaces['hpo-1'][1], namespaces['hpo-2'][0]])

    hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, ['e'],
                                                     data)
    assert len(remainings) == 1
    assert len(remainings['hpo-2']) == 1

    assert len(hpos_ready) == 2
    assert len(hpos_ready['hpo-1']) == 1
    assert hpos_ready['hpo-1'][0] == f'{NAMESPACE}-1-2'
    assert len(hpos_ready['hpo-2']) == 1
    assert hpos_ready['hpo-2'][0] == f'{NAMESPACE}-2-1'

    assert len(data) == 2
    assert len(data['hpo-1']) == 2
    assert len(data['hpo-2']) == 1

    assert data['hpo-1'][f'{NAMESPACE}-1-2'].attrs[
        'namespace'] == f'{NAMESPACE}-1-2'
    assert data['hpo-2'][f'{NAMESPACE}-2-1'].attrs[
        'namespace'] == f'{NAMESPACE}-2-1'

    run_hpos([namespaces['hpo-2'][1]])

    hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, ['e'],
                                                     data)
    assert len(remainings) == 0
    assert len(hpos_ready) == 1

    assert len(hpos_ready['hpo-2']) == 1
    assert hpos_ready['hpo-2'][0] == f'{NAMESPACE}-2-2'

    assert len(data) == 2
    assert len(data['hpo-1']) == 2
    assert len(data['hpo-2']) == 2

    assert data['hpo-2'][f'{NAMESPACE}-2-2'].attrs[
        'namespace'] == f'{NAMESPACE}-2-2'
Ejemplo n.º 20
0
def run(uri, database, namespace, function, num_experiments, budget, fidelity, space, objective,
        variables, defaults, sleep_time=60, do_full_train=False, save_dir='.', partial=False,
        register=True):

    # TODO: Add hyperband
    hpos = ['grid_search', 'nudged_grid_search', 'noisy_grid_search', 'random_search',
            'bayesopt']

    if fidelity is None:
        fidelity = Fidelity(1, 1, name='epoch').to_dict()

    # TODO: Add back when hyperband is implemented
    # if fidelity['min'] == fidelity['max']:
    #     hpos.remove(hpos.index('hyperband'))

    if num_experiments is None:
        num_experiments = 2

    client = new_client(uri, database)

    hpo_stats = fetch_all_hpo_stats(client, namespace)

    configs = generate_hpos(
        list(range(num_experiments)), hpos, budget,
        fidelity, space, namespace, defaults)

    variable_names = list(sorted(variables.keys()))

    if partial:
        namespaces = defaultdict(list)
        for hpo, hpo_configs in configs.items():
            for hpo_namespace, config in hpo_configs.items():
                namespaces[hpo].append(hpo_namespace)

        data = defaultdict(dict)
        fetch_hpos_valid_curves(client, namespaces, variable_names, data, partial=True)

        data = consolidate_results(data)
        save_results(namespace, data, save_dir)

        return

    namespaces = register_hpos(
        client, namespace, function, configs,
        dict(list(variables.items()) + list(defaults.items())),
        hpo_stats, register)
    remainings = namespaces

    print_status(client, namespace, namespaces)
    data = defaultdict(dict)
    while sum(remainings.values(), []):
        hpos_ready, remainings = fetch_hpos_valid_curves(client, remainings, variable_names, data)

        # TODO: Implement full-train part
        if do_full_train:
            configs = generate_tests(data, defaults, registered)
            new_registered_tests = register_tests(client, namespace, function, configs)

        if not sum(hpos_ready.values(), []):
            print_status(client, namespace, namespaces)
            time.sleep(sleep_time)

    # Save valid results
    data = consolidate_results(data)
    save_results(namespace, data, save_dir)

    if not do_full_train:
        return

    # TODO: Implement full-train part
    wait(completed)  # take the sum of all hpo_namespaces

    # NOTE & TODO: This should follow the same format as valid results, but we need to
    #              make sure the mapping in order of trials is the same.
    data = fetch_results(client, namespace, namespaces)

    # Save test results
    save_results(namespace, data, save_dir)
Ejemplo n.º 21
0
from olympus.observers.msgtracker import METRIC_QUEUE, METRIC_ITEM
from olympus.hpo.parallel import (RESULT_QUEUE, WORK_QUEUE)
from olympus.hpo.worker import TrialWorker
from olympus.hpo import Fidelity
from studies import register_hpo, fetch_hpo_valid_curves
from studies import xarray_to_scipy_results, plot

URI = 'mongo://127.0.0.1:27017'
DATABASE = 'olympus'

NAMESPACE = 'test-hpo'
CONFIG = {
    'name': 'random_search',
    'seed': 0,
    'count': 1,
    'fidelity': Fidelity(1, 10, name='epoch').to_dict(),
    'space': {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)',
        'd': 'loguniform(lower=1, upper=10)',
    }
}
DEFAULTS = {}


def foo(uid, a, b, c, d, e=1, epoch=0, experiment_name=NAMESPACE, client=None):
    result = a + 2 * b - c**2 + d + e
    for i in range(epoch + 1):
        data = {'obj': i + result, 'valid': i + result, 'uid': uid, 'epoch': i}
        client.push(METRIC_QUEUE, experiment_name, data, mtype=METRIC_ITEM)
Ejemplo n.º 22
0
def run(uri,
        database,
        namespace,
        function,
        num_experiments,
        num_simuls,
        fidelity,
        space,
        objective,
        variables,
        defaults,
        num_replicates=None,
        sleep_time=60,
        do_full_train=False,
        save_dir='.',
        seed=1,
        register=True,
        rep_types=REP_TYPES):

    hpo_budget = 100
    surrogate_budget = 200

    if num_replicates is None:
        num_replicates = num_experiments

    # We use 200 trials to fit the surrogate models (surrogate_budget is 200)
    # but we only need 100 for the ideal (hpo_budget is 100)
    # therefore, since num_simuls is at least half smaller than number of
    # replicates, we can run only (num_replicates / 2) hpo runs and use
    # first half and second 100 half as 2 separe ideal runs.
    # This is possible since we are using random search.

    assert (num_experiments % 2) == 0
    assert num_simuls <= (num_experiments / 2)

    num_ideal = num_experiments // 2

    hpo = 'random_search'

    # TODO
    # for each repetition, vary all sources of variations
    # when one hpo is done, create all biased and simulations

    if fidelity is None:
        fidelity = Fidelity(1, 1, name='epoch').to_dict()

    client = new_client(uri, database)

    configs = generate_hpos(list(range(num_ideal)), [hpo], surrogate_budget,
                            fidelity, space, namespace, defaults)

    to_replicate = get_configs_to_replicate(configs, num_simuls)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, namespace)

    namespaces = register_hpos(client,
                               namespace,
                               function,
                               configs,
                               defaults,
                               hpo_stats,
                               register=register)
    remainings = namespaces

    data_hpo = defaultdict(dict)
    all_replicates = dict(random_search=dict())
    while sum(remainings.values(), []):
        print_status(client, namespace, namespaces)
        hpos_ready, remainings = fetch_hpos_valid_curves(
            client, remainings, variable_names, data_hpo)

        ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

        replicates = generate_replicates(ready_configs,
                                         data_hpo,
                                         variables,
                                         objective,
                                         hpo_budget,
                                         num_replicates,
                                         early_stopping=False,
                                         rep_types=rep_types)
        if register:
            registered_replicates = register_all_replicates(
                client, function, namespace, replicates)

        if replicates.get('random_search'):
            all_replicates['random_search'].update(replicates['random_search'])
        if sum(remainings.values(), []) and not registered_replicates:
            time.sleep(sleep_time)

    wait(client, namespace, sleep=sleep_time)

    data_replicates = fetch_hpos_replicates(client, configs, all_replicates,
                                            variable_names, space, rep_types)

    # Save valid results
    data = consolidate_results(data_hpo, data_replicates, rep_types)
    save_results(namespace, data, save_dir)
Ejemplo n.º 23
0
    loader = DataLoader(splits, sampler_seed=1, batch_size=32)

    main_task = Classification(
        classifier=model,
        optimizer=optimizer,
        lr_scheduler=lr_schedule,
        dataloader=loader.train(),
        device=device,
        storage=StateStorage(folder=f'{base}/hpo_simple'))

    main_task.metrics.append(
        Accuracy(name='validation', loader=loader.valid(batch_size=64)))

    return main_task


space = make_task().get_space()

hp_optimizer = HPOptimizer('hyperband',
                           fidelity=Fidelity(1, 30).to_dict(),
                           space=space)

hpo_task = HPO(hp_optimizer, make_task)

result = hpo_task.fit(objective='validation_accuracy')

print('Best Params:')
print('-' * 40)
print(f'validation_accuracy: {result.objective}')
show_dict(result.params)
Ejemplo n.º 24
0
def test_consolidate_results(client):
    num_experiments = 5
    num_replicates = 10
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 5}
    defaults = {'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, NAMESPACE)

    namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults,
                               hpo_stats)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()

    data = defaultdict(dict)
    hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces,
                                                     variable_names, data)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    replicates = generate_replicates(ready_configs,
                                     data,
                                     variables,
                                     objective,
                                     hpo_budget,
                                     num_replicates,
                                     early_stopping=False)
    register(client, foo, NAMESPACE, replicates)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()
    print(fetch_vars_stats(client, NAMESPACE))

    data = fetch_hpos_replicates(client, configs, replicates, variable_names,
                                 space, data)
    data = consolidate_results(data)

    assert len(data) == 1
    assert len(data['random_search']) == 4

    hpo_reps = data['random_search']
    assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget,
                                           num_experiments)
    assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments)
    assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates,
                                               num_experiments)
    assert hpo_reps['simul-free'].obj.shape == (6, num_replicates,
                                                num_experiments)

    def count_unique(attr):
        return len(set(attr.values.reshape(-1).tolist()))

    # Test sources of variation
    # NOTE: In ideal, source of variation will vary across ideal after consolidation
    #       but it stays fixed during the HPO itself
    assert count_unique(hpo_reps['ideal']['d']) == num_experiments
    assert count_unique(hpo_reps['biased']['d']) == (num_replicates *
                                                     num_experiments)
    assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments

    # Test HPs
    assert count_unique(hpo_reps['ideal']['a']) == (num_experiments *
                                                    surrogate_budget)
    assert count_unique(hpo_reps['biased']['a']) == num_experiments
    assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates *
                                                        num_experiments)
    assert numpy.allclose(hpo_reps['simul-free']['a'].values,
                          hpo_reps['simul-fix']['a'].values)
Ejemplo n.º 25
0
def test_register_hpos_resume(client, monkeypatch):
    namespace = 'test-hpo'
    hpos = [
        'grid_search', 'nudged_grid_search', 'noisy_grid_search',
        'random_search', 'bayesopt'
    ]
    #       , 'hyperband', 'bayesopt']
    num_experiments = 10
    budget = 200
    fidelity = Fidelity(1, 10, name='d').to_dict()
    num_experiments = 2
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'd': 'uniform(-1, 1)'
    }
    defaults = {}
    stats = {}

    configs = generate_hpos(range(num_experiments), hpos, budget, fidelity,
                            search_space, NAMESPACE, defaults)

    assert client.monitor().read_count(WORK_QUEUE, namespace,
                                       mtype=HPO_ITEM) == 0
    new_namespaces = register_hpos(client, namespace, foo, configs, defaults,
                                   stats)
    assert len(set(new_namespaces)) == len(configs)

    print(new_namespaces)

    stats = {namespace: {} for namespace in sum(new_namespaces.values(), [])}

    more_configs = generate_hpos(range(num_experiments + 2), hpos, budget,
                                 fidelity, search_space, NAMESPACE, defaults)

    # Save new namespaces for test
    new_namespaces = defaultdict(list)

    def mock_register_hpo(client, namespace, function, config, defaults):
        new_namespaces[config['name']].append(namespace)
        return register_hpo(client, namespace, function, config, defaults)

    def flatten_configs(confs):
        return sum((list(configs.keys()) for configs in confs.values()), [])

    monkeypatch.setattr('olympus.studies.hpo.main.register_hpo',
                        mock_register_hpo)
    namespaces = register_hpos(client, namespace, foo, more_configs, defaults,
                               stats)
    assert (len(set(sum(new_namespaces.values(),
                        []))) == len(flatten_configs(more_configs)) -
            len(flatten_configs(configs)))

    # Verify new registered configs
    for hpo, configs in more_configs.items():
        for hpo_namespace, config in configs.items():
            messages = client.monitor().messages(WORK_QUEUE,
                                                 hpo_namespace,
                                                 mtype=HPO_ITEM)
            assert len(messages) == 1
            assert messages[0].message['hpo']['kwargs'] == config