def test_fit_model():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective)

    fit_model(X, y, space, seed=1)
def test_generate_biased_replicates_last_epoch():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    replicates = generate_biased_replicates(
        data,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0'],
        variables,
        objective,
        num_replicates,
        hpo_budget,
        early_stopping=False)

    best_trial_index = 6
    rng = numpy.random.RandomState(
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    for replicate in replicates:
        should_be = copy.deepcopy(defaults)
        for param in space.keys():
            assert replicate[param] == float(
                data.sel(order=best_trial_index)[param].values)
            should_be[param] = replicate[param]
        for variable in variables:
            assert replicate[variable] == rng.randint(2**30)
            should_be[variable] = replicate[variable]

        assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
def test_generate_simulated_fix():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    config = configs['random_search'][f'{NAMESPACE}-random-search-s-0']
    # Make sure the defaults have been replaced by randomized seeds
    assert config['defaults']['d'] != defaults['d']
    assert config['defaults']['e'] != defaults['e']

    replicates = generate_simulated_fix(data,
                                        config,
                                        variables,
                                        objective,
                                        hpo_budget,
                                        num_replicates,
                                        early_stopping=False)

    assert len(replicates) == num_replicates
    for i in range(1, num_replicates):
        assert replicates[i]['a'] != replicates[0]['a']
        assert replicates[i]['b'] != replicates[0]['b']
        assert replicates[i]['c'] != replicates[0]['c']
        assert replicates[i]['uid'] != replicates[0]['uid']
        assert replicates[i]['d'] == config['defaults']['d']
        assert replicates[i]['e'] == config['defaults']['e']
def generate_mocked_replicates(num_replicates, num_experiments=5):

    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 3}
    defaults = {'d': 1, 'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)
    randomize_seeds(configs['random_search'], variables, seed)

    hpos_ready = dict(random_search=[])
    data = dict(random_search=dict())
    for hpo_namespace, config in configs['random_search'].items():
        hpos_ready['random_search'].append(hpo_namespace)
        data['random_search'][hpo_namespace] = build_data(
            surrogate_budget, variables, defaults, space)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    return generate_replicates(ready_configs,
                               data,
                               variables,
                               objective,
                               hpo_budget,
                               num_replicates,
                               early_stopping=False)
def test_simulate_hpo():
    # fit a model
    # simulate
    # test what? ...
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective)

    model = fit_model(X, y, space, seed=1)

    sample = simulate_hpo(
        model, space, hpo_budget,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    assert sample.keys() == space.keys()
def test_randomize_seeds():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = ['d', 'e']
    defaults = {}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=200,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed, compute_id=True)

    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        uid = config['defaults'].pop('uid')
        assert uid == compute_identity(config['defaults'], IDENTITY_SIZE)

    randomize_seeds(configs['random_search'],
                    variables,
                    seed,
                    compute_id=False)
    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        assert 'uid' not in config['defaults']
def test_convert_data_to_xy():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'loguniform(lower=1, upper=10)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    X, y = convert_data_to_xy(data, space, objective, early_stopping=False)

    assert numpy.array_equal(X[:, 0], data['a'].values.reshape(-1))
    assert numpy.array_equal(X[:, 1], data['b'].values.reshape(-1))
    assert numpy.array_equal(X[:, 2], numpy.log(data['c'].values.reshape(-1)))
    assert numpy.array_equal(y,
                             data[objective].isel(epoch=-1).values.reshape(-1))
    assert y.shape == (surrogate_budget, )
def test_consolidate_results(client):
    num_experiments = 5
    num_replicates = 10
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 5}
    defaults = {'e': 2, 'epoch': 5}
    seed = 2
    hpo = 'random_search'
    objective = 'obj'
    fidelity = Fidelity(5, 5, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    to_replicate = get_configs_to_replicate(configs, num_experiments)

    reset_pool_size(configs['random_search'])
    randomize_seeds(configs['random_search'], variables, seed)

    variable_names = list(sorted(variables.keys()))

    hpo_stats = fetch_all_hpo_stats(client, NAMESPACE)

    namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults,
                               hpo_stats)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()

    data = defaultdict(dict)
    hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces,
                                                     variable_names, data)

    ready_configs = get_ready_configs(hpos_ready, configs, to_replicate)

    replicates = generate_replicates(ready_configs,
                                     data,
                                     variables,
                                     objective,
                                     hpo_budget,
                                     num_replicates,
                                     early_stopping=False)
    register(client, foo, NAMESPACE, replicates)

    worker = TrialWorker(URI, DATABASE, 0, None)
    worker.max_retry = 0
    worker.timeout = 0.02
    worker.run()
    print(fetch_vars_stats(client, NAMESPACE))

    data = fetch_hpos_replicates(client, configs, replicates, variable_names,
                                 space, data)
    data = consolidate_results(data)

    assert len(data) == 1
    assert len(data['random_search']) == 4

    hpo_reps = data['random_search']
    assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget,
                                           num_experiments)
    assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments)
    assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates,
                                               num_experiments)
    assert hpo_reps['simul-free'].obj.shape == (6, num_replicates,
                                                num_experiments)

    def count_unique(attr):
        return len(set(attr.values.reshape(-1).tolist()))

    # Test sources of variation
    # NOTE: In ideal, source of variation will vary across ideal after consolidation
    #       but it stays fixed during the HPO itself
    assert count_unique(hpo_reps['ideal']['d']) == num_experiments
    assert count_unique(hpo_reps['biased']['d']) == (num_replicates *
                                                     num_experiments)
    assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments

    # Test HPs
    assert count_unique(hpo_reps['ideal']['a']) == (num_experiments *
                                                    surrogate_budget)
    assert count_unique(hpo_reps['biased']['a']) == num_experiments
    assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates *
                                                         num_experiments)
    assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates *
                                                        num_experiments)
    assert numpy.allclose(hpo_reps['simul-free']['a'].values,
                          hpo_reps['simul-fix']['a'].values)