Example #1
0
def generate(seeds, variables, defaults, add_reference=True):
    configs = dict()
    for variable in variables:
        configs[variable] = []
        for seed in seeds:
            kwargs = copy.copy(defaults)
            kwargs[variable] = int(seed)
            # We want the duplicates across the study of the different sources of variation.
            # So each have a uid which depends on variable name
            kwargs['_variable'] = variable
            uid = compute_identity(kwargs, IDENTITY_SIZE)
            kwargs.pop('_variable')
            kwargs['uid'] = uid
            configs[variable].append(kwargs)

    if add_reference:
        variable = 'reference'
        configs[variable] = []
        for i in range(len(seeds)):
            kwargs = copy.copy(defaults)
            kwargs['_repetition'] = i
            kwargs['_variable'] = variable
            uid = compute_identity(kwargs, IDENTITY_SIZE)
            kwargs.pop('_repetition')
            kwargs.pop('_variable')
            kwargs['uid'] = uid
            configs[variable].append(kwargs)

    return configs
Example #2
0
def generate_simulated_fix(data,
                           config,
                           variables,
                           objective,
                           hpo_budget,
                           num_replicates,
                           early_stopping=True):
    # Don't forget to update uid of trial after sampling new hps.
    seeds = numpy.random.RandomState(config['seed']).randint(
        2**30, size=num_replicates + 1)
    space = config['space']

    X, y = convert_data_to_xy(data, space, objective, early_stopping)
    X, y = cutoff(X, y, percentile=0.85)
    model = fit_model(X, y, space, seed=seeds[0])
    configs = []
    for i, seed in enumerate(seeds[1:]):
        params = simulate_hpo(model, space, hpo_budget, seed)
        replicate = copy.deepcopy(config['defaults'])
        replicate.update(params)
        replicate.pop('uid', None)
        replicate['uid'] = compute_identity(replicate, IDENTITY_SIZE)
        configs.append(replicate)

    return configs
 def test_doc(name, i):
     a_doc = copy.copy(defaults)
     a_doc[name] = i
     a_doc['_variable'] = name
     a_doc['uid'] = compute_identity(a_doc, 16)
     a_doc.pop('_variable')
     return a_doc
Example #4
0
def main(study='lin_reg'):
    batch_sizes = [32, 64, 128, 256]
    lrs = [0.005, 0.1, 0.05, 0.01]
    seeds = [0, 1, 2, 3, 4, 5]
    epochs = [30]

    def arguments():
        for d in batch_sizes:
            for l in lrs:
                for e in epochs:
                    for s in seeds:
                        yield dict(epochs=e, batch_size=d, lr=l, seed=s)

    with HPOWorkGroup('mongo://127.0.0.1:27017', 'olympus', None) as group:
        group.launch_workers(2)
        group.clear_queue()
        group.client.monitor().clear('OLYMETRIC', group.experiment)

        for i, kwargs in enumerate(arguments()):
            bs = kwargs['batch_size']
            lr = kwargs['lr']

            for j in range(10):
                kwargs['rs'] = j
                kwargs['uid'] = compute_identity(kwargs, 16)
                kwargs.pop('rs')
                group.queue_work(tiny_task,
                                 namespace=f'{study}-{bs}-{lr}',
                                 **kwargs)

        group.wait()
Example #5
0
def test_generate_bayesopt():
    defaults = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
    budget = 200
    fidelity = 'fidelity(1, 10)'
    num_experiments = 10
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'lr': 'uniform(-1, 1)'
    }
    configs = generate_bayesopt(budget, fidelity, search_space,
                                range(num_experiments))

    assert len(configs) == num_experiments

    for i in range(num_experiments):
        rng = numpy.random.RandomState(i)
        assert configs[i]['name'] == 'robo'
        assert configs[i]['space'] == search_space
        assert configs[i]['n_init'] == 20
        assert configs[i]['count'] == budget
        assert configs[i]['model_seed'] == rng.randint(2**30)
        assert configs[i]['prior_seed'] == rng.randint(2**30)
        assert configs[i]['init_seed'] == rng.randint(2**30)
        assert configs[i]['maximizer_seed'] == rng.randint(2**30)
        assert configs[i]['namespace'] == f'bayesopt-s-{i}'
        assert configs[i].pop('uid') == compute_identity(configs[i], 16)
Example #6
0
def fetch_registered(client, namespace, hpo, seed):
    registered = set()


    for message in client.monitor().messages(WORK_QUEUE, env(namespace, hpo, seed), mtype=HPO_ITEM):
        registered.add(compute_identity(message.message['kwargs'], IDENTITY_SIZE))
    return registered
Example #7
0
def generate_nudged_grid_search(budget, fidelity, search_space, seeds):
    configs = generate_grid_search(budget, fidelity, search_space, [])
    config = configs[0]

    config['namespace'] = f'grid-search-nudged-p-{config["n_points"]}'
    config['nudge'] = 0.5
    config['uid'] = compute_identity(config, IDENTITY_SIZE)

    return [config]
Example #8
0
 def test_doc(name, i, j):
     a_doc = copy.copy(defaults)
     a_doc[name] = int(i)
     a_doc['variable'] = name
     a_doc['repetition'] = j
     a_doc['uid'] = compute_identity(a_doc, 16)
     a_doc.pop('repetition')
     a_doc.pop('variable')
     return a_doc
Example #9
0
    def init(self, model, uid=None):
        self.model.init(**model)

        # Get a unique identifier for this configuration
        if uid is None:
            uid = compute_identity(model, size=16)

        # broadcast a signal that the model is ready
        # so we can setup logging, data, etc...
        self.metrics.new_trial(model, uid)
Example #10
0
def generate_hyperband(budget, fidelity, search_space, seeds):
    configs = []
    # TODO: Compute budget based on cumulative number of epochs.
    #       Let infinite repetitions and stop when reaching corresponding budget.
    for seed in seeds:
        config = {'name': 'hyperband', 'seed': seed}
        config['uid'] = compute_identity(config, IDENTITY_SIZE)
        config['namespace'] = f'hyperband-s-{seed}'

        configs.append(config)

    return []  # configs
 def test_doc(name, i):
     a_doc = copy.copy(defaults)
     if name == 'reference':
         a_doc['_repetition'] = i
     else:
         a_doc[name] = i
     a_doc['_variable'] = name
     a_doc['uid'] = compute_identity(a_doc, 16)
     if name == 'reference':
         a_doc.pop('_repetition')
     a_doc.pop('_variable')
     return a_doc
Example #12
0
 def test_doc(name, i, j, interupt):
     a_doc = copy.copy(defaults)
     a_doc[name] = i
     if interupt:
         a_doc['_interrupt'] = True
     a_doc['variable'] = name
     a_doc['repetition'] = j
     a_doc.pop('uid', None)
     a_doc['uid'] = compute_identity(a_doc, 16)
     a_doc.pop('repetition')
     a_doc.pop('variable')
     return a_doc
Example #13
0
def test_generate_biased_replicates_last_epoch():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = {'d': 2, 'e': 1}
    defaults = {'d': 1, 'e': 2}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    objective = 'obj'
    num_replicates = 10
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    surrogate_budget = 10
    hpo_budget = 5

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=surrogate_budget,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed)

    data = build_data(surrogate_budget, variables, defaults, space)

    replicates = generate_biased_replicates(
        data,
        configs['random_search'][f'{NAMESPACE}-random-search-s-0'],
        variables,
        objective,
        num_replicates,
        hpo_budget,
        early_stopping=False)

    best_trial_index = 6
    rng = numpy.random.RandomState(
        configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed'])
    for replicate in replicates:
        should_be = copy.deepcopy(defaults)
        for param in space.keys():
            assert replicate[param] == float(
                data.sel(order=best_trial_index)[param].values)
            should_be[param] = replicate[param]
        for variable in variables:
            assert replicate[variable] == rng.randint(2**30)
            should_be[variable] = replicate[variable]

        assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
Example #14
0
def randomize_seeds(configs, variables, seed, compute_id=False):
    rng = numpy.random.RandomState(seed)

    for hpo_namespace, config in configs.items():
        # config['defaults'] = copy.deepcopy(config.get('defaults', {}))
        config.setdefault('defaults', {})
        for variable in variables:
            config['defaults'][variable] = rng.randint(2**30)

        if compute_id:
            config['defaults'].pop('uid', None)
            config['defaults']['uid'] = compute_identity(
                config['defaults'], IDENTITY_SIZE)
Example #15
0
def generate(num_experiments, num_repro, objective, variables, defaults,
             resumable):

    # TODO: Add a resume test as well
    # Run 5 times full
    # Run 5 times half stopped then resumed

    # NOTE TO TEST: make the uid dependent of repetition number, otherwise there will be collisions in
    # checkpoints
    # NOTE Set the checkpointer buffer to 0 to make sure checkpoints are done
    # NOTE Not all tasks need checkpoints. Do not launch checkpoint tests for them.

    configs = dict()
    for variable in variables:
        configs[variable] = []
        for seed in range(1, num_experiments + 1):
            for repetition in range(1, num_repro + 1):
                kwargs = copy.copy(defaults)
                kwargs[variable] = int(seed)
                kwargs['variable'] = variable
                kwargs['repetition'] = repetition
                uid = compute_identity(kwargs, IDENTITY_SIZE)
                kwargs.pop('repetition')
                kwargs.pop('variable')
                kwargs['uid'] = uid
                configs[variable].append(kwargs)
                if resumable:
                    kwargs = copy.copy(kwargs)
                    kwargs['_interrupt'] = True
                    kwargs['variable'] = variable
                    kwargs['repetition'] = repetition
                    kwargs.pop('uid')
                    uid = compute_identity(kwargs, IDENTITY_SIZE)
                    kwargs.pop('repetition')
                    kwargs.pop('variable')
                    kwargs['uid'] = uid
                    configs[variable].append(kwargs)

    return configs
Example #16
0
def generate_random_search(budget, fidelity, search_space, seeds):
    configs = []
    for seed in seeds:
        config = {'name': 'random_search', 'seed': seed, 'pool_size': 20}
        config['namespace'] = f'random-search-s-{seed}'
        config['count'] = budget
        config['fidelity'] = fidelity
        config['space'] = search_space
        config['uid'] = compute_identity(config, IDENTITY_SIZE)

        configs.append(config)

    return configs
Example #17
0
def generate_hpos(seeds, hpos, budget, fidelity, search_space, namespace, defaults):

    configs = dict()
    for hpo in hpos:
        configs[hpo] = dict()
        hpo_configs = generate_hpo_configs[hpo](budget, fidelity, search_space, seeds)
        for config in hpo_configs:
            config['namespace'] = env(namespace, config['namespace'])
            config['defaults'] = copy.deepcopy(defaults)
            uid = config.pop('uid')
            config['uid'] = compute_identity(config, IDENTITY_SIZE)
            configs[hpo][config['namespace']] = config

    return configs
Example #18
0
def generate_noisy_grid_search(budget, fidelity, search_space, seeds):
    configs = []
    for seed in seeds:
        seed_configs = generate_grid_search(budget, fidelity, search_space, [])
        for config in seed_configs:
            config['name'] = 'noisy_grid_search'
            config['seed'] = seed
            config['count'] = budget
            config['namespace'] = f'noisy-grid-search-p-{config["n_points"]}-s-{seed}'
            config.pop('uid')
            config['uid'] = compute_identity(config, IDENTITY_SIZE)

            configs.append(config)
    
    return configs
Example #19
0
def limit_to_var(configs, ref_config, var):
    new_configs = []
    for config in configs:
        # Make sure we have HPs from config
        new_config = copy.deepcopy(config)
        # But update variables with default values
        new_config.update(ref_config)
        # And bring back the single var we want to vary
        new_config[var] = config[var]
        # Update corresponding uid
        new_config.pop('uid', None)
        new_config['uid'] = compute_identity(new_config, IDENTITY_SIZE)
        new_configs.append(new_config)

    return new_configs
Example #20
0
def generate_grid_search(budget, fidelity, search_space, seeds):
    configs = []
    dim = len(search_space)
    n_points = 2
    while n_points ** dim < budget:
        n_points += 1

    config = {'name': 'grid_search', 'n_points': n_points, 'seed': 1, 'pool_size': 20}
    config['namespace'] = f'grid-search-p-{n_points}'
    config['space'] = search_space
    config['fidelity'] = fidelity
    config['uid'] = compute_identity(config, IDENTITY_SIZE)

    configs.append(config)

    return configs
Example #21
0
    def create_and_register_new_point(self, point, **variables):
        sample = dict()
        for i, (name, dim) in enumerate(self.orion_space.items()):
            sample[name] = point[i]
            if dim.prior_name == 'reciprocal':
                sample[name] = numpy.exp(sample[name])

        sample.update(variables)
        sample = unflatten(sample)
        sample[self.identity] = compute_identity(sample,
                                                 self.space._identity_size)

        trial = Trial(sample)
        self.trials[sample[self.identity]] = trial

        return sample, trial
Example #22
0
def test_generate_simulated_replicates():
    num_replicates = 10
    fake_simulated_replicates = []
    variables = ['d', 'e']
    for i in range(num_replicates):
        replicate = {'a': i, 'b': i, 'c': i, 'd': 1, 'e': 1}
        replicate['uid'] = compute_identity(replicate, IDENTITY_SIZE)
        fake_simulated_replicates.append(replicate)

    replicates = generate_simulated_replicates(fake_simulated_replicates,
                                               {'seed': 1}, variables)

    assert len(replicates) == num_replicates
    for fix_replicate, var_replicate in zip(fake_simulated_replicates,
                                            replicates):
        assert fix_replicate['a'] == var_replicate['a']
        assert fix_replicate['b'] == var_replicate['b']
        assert fix_replicate['c'] == var_replicate['c']
        assert fix_replicate['d'] != var_replicate['d']
        assert fix_replicate['e'] != var_replicate['e']
Example #23
0
def test_generate_random_search():
    defaults = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
    budget = 200
    fidelity = 'fidelity(1, 10)'
    num_experiments = 10
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'lr': 'uniform(-1, 1)'
    }
    configs = generate_random_search(budget, fidelity, search_space,
                                     range(num_experiments))

    assert len(configs) == num_experiments

    for i in range(num_experiments):
        assert configs[i]['name'] == 'random_search'
        assert configs[i]['space'] == search_space
        assert configs[i]['seed'] == i
        assert configs[i]['namespace'] == f'random-search-s-{i}'
        assert configs[i].pop('uid') == compute_identity(configs[i], 16)
Example #24
0
def test_generate_grid_search():
    defaults = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
    budget = 200
    fidelity = 'fidelity(1, 10)'
    num_experiments = 10
    search_space = {
        'a': 'uniform(-1, 1)',
        'b': 'uniform(-1, 1)',
        'c': 'uniform(-1, 1)',
        'd': 'uniform(-1, 1)'
    }
    configs = generate_grid_search(budget, fidelity, search_space,
                                   range(num_experiments))

    assert len(configs) == 1

    assert configs[0]['name'] == 'grid_search'
    assert configs[0]['n_points'] == 4
    assert configs[0]['space'] == search_space
    assert configs[0]['seed'] == 1
    assert configs[0]['namespace'] == 'grid-search-p-4'
    assert configs[0].pop('uid') == compute_identity(configs[0], 16)
Example #25
0
def build_data(budget, variables, defaults, space):

    epochs = 5
    # defaults = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
    # params = {'c': 2, 'd': 3, 'epoch': epochs}
    n_vars = len(variables)

    objectives = numpy.arange(budget * (epochs + 1))
    numpy.random.RandomState(0).shuffle(objectives)
    objectives = objectives.reshape((epochs + 1, budget, 1))

    params = Space.from_dict(space).sample(budget, seed=1)

    trials = OrderedDict()
    for trial_params in params:
        config = copy.deepcopy(
            dict(list(variables.items()) + list(defaults.items())))
        config.update(trial_params)
        config['uid'] = compute_identity(config, IDENTITY_SIZE)
        # NOTE: We don't need objectives
        trials[config['uid']] = Trial(config)

    metrics = dict()
    for trial_i, trial_uid in enumerate(trials.keys()):
        metrics[trial_uid] = [{
            'epoch': i,
            'obj': objectives[i, trial_i, 0]
        } for i in range(epochs + 1)]

    data = []
    param_names = list(sorted(space.keys()))

    return create_valid_curves_xarray(trials,
                                      metrics,
                                      sorted(variables.keys()),
                                      epochs,
                                      param_names,
                                      seed=1)
Example #26
0
def test_randomize_seeds():
    space = {
        'a': 'uniform(lower=-1, upper=1)',
        'b': 'uniform(lower=-1, upper=1)',
        'c': 'uniform(lower=-1, upper=1)'
    }
    variables = ['d', 'e']
    defaults = {}
    seed = 2
    num_experiments = 5
    hpo = 'random_search'
    fidelity = Fidelity(1, 1, name='epoch').to_dict()

    configs = generate_hpos(list(range(num_experiments)), [hpo],
                            budget=200,
                            fidelity=fidelity,
                            search_space=space,
                            namespace=NAMESPACE,
                            defaults=defaults)

    randomize_seeds(configs['random_search'], variables, seed, compute_id=True)

    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        uid = config['defaults'].pop('uid')
        assert uid == compute_identity(config['defaults'], IDENTITY_SIZE)

    randomize_seeds(configs['random_search'],
                    variables,
                    seed,
                    compute_id=False)
    rng = numpy.random.RandomState(seed)
    for config in configs['random_search'].values():
        for variable in variables:
            assert config['defaults'][variable] == rng.randint(2**30)
        assert 'uid' not in config['defaults']
Example #27
0
def generate_bayesopt(budget, fidelity, search_space, seeds):
    configs = []
    for seed in seeds:
        rng = numpy.random.RandomState(seed)
        config = {
            'name': 'robo', 
            'model_type': 'gp_mcmc',
            'maximizer': 'random',
            'n_init': 20,
            'count': budget,
            'acquisition_func': 'log_ei',
            'model_seed': rng.randint(2**30),
            'prior_seed': rng.randint(2**30),
            'init_seed': rng.randint(2**30),
            'maximizer_seed': rng.randint(2**30)
        }
        config['fidelity'] = fidelity
        config['namespace'] = f'bayesopt-s-{seed}'
        config['space'] = search_space
        config['uid'] = compute_identity(config, IDENTITY_SIZE)

        configs.append(config)

    return configs
Example #28
0
    def sample(self, count=1, **variables):
        samples = []

        submitted_count = len(self.trials)

        for point in self.grid[submitted_count:submitted_count + count]:
            sample = dict(zip(self.orion_space.keys(), point))
            sample.update(variables)
            sample = unflatten(sample)
            sample[self.identity] = compute_identity(sample, self.space._identity_size)
            samples.append(sample)

        self.seed_time += 1
        trials = []

        # Register all the samples
        for s in samples:
            t = Trial(s)
            trials.append(t)
            self.trials[s[self.identity]] = t

        self.new_trials(trials)

        return samples
Example #29
0
def reset_pool_size(configs):
    for hpo_namespace, config in configs.items():
        config['pool_size'] = None
        config.pop('uid', None)
        config['uid'] = compute_identity(config, IDENTITY_SIZE)
Example #30
0
def main():
    from sspace.space import compute_identity

    args = arguments()
    tickers = [
        # 1     2      3     4      5       6     7     8      9    10
        'MO',
        'AEP',
        'BA',
        'BMY',
        'CPB',
        'CAT',
        'CVX',
        'KO',
        'CL',
        'COP',  # 1
        'ED',
        'CVS',
        'DHI',
        'DHR',
        'DRI',
        'DE',
        'D',
        'DTE',
        'ETN',
        'EBAY',  # 2
        'F',
        'BEN',
        'HSY',
        'HBAN',
        'IBM',
        'K',
        'GIS',
        'MSI',
        'NSC',
        'TXN'
    ]
    start, end = '2000-01-01', '2019-05-10'

    device = fetch_device()

    task = finance_baseline(tickers, start, end, args.optimizer,
                            args.batch_size, device, args.window)

    lr = 1e-8
    uid = compute_identity(
        dict(tickers=tickers,
             start=start,
             end=end,
             window=args.window,
             lr=lr,
             epochs=args.epochs), 16)

    if args.uri is not None:
        logger = metric_logger(args.uri, args.database,
                               f'{DEFAULT_EXP_NAME}_{uid}')
        task.metrics.append(logger)

    if args.storage is not None:
        storage = StateStorage(
            folder=option('state.storage', '/home/setepenre/zshare/tmp'))
        task.metrics.append(
            CheckPointer(storage=storage,
                         time_buffer=5,
                         keep_best='validation_loss',
                         save_init=True))

    optimizer = task.optimizer.defaults
    optimizer['lr'] = lr

    task.init(optimizer=optimizer, uid=uid)
    task.fit(args.epochs)

    stats = task.metrics.value()
    print(stats)
    return float(stats['validation_loss'])