Esempio n. 1
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations, eta, min_budget, max_budget, input_dir,
         output_dir):

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(eta=eta, min_budget=min_budget, max_budget=max_budget)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    for run_id in range(run_start, num_runs):

        NS = hpns.NameServer(run_id=run_id, host='localhost', port=0)
        ns_host, ns_port = NS.start()

        num_workers = 1

        workers = []
        for worker_id in range(num_workers):
            w = BenchmarkWorker(benchmark=benchmark,
                                nameserver=ns_host,
                                nameserver_port=ns_port,
                                run_id=run_id,
                                id=worker_id)
            w.run(background=True)
            workers.append(w)

        rs = RandomSearch(configspace=benchmark.get_config_space(),
                          run_id=run_id,
                          nameserver=ns_host,
                          nameserver_port=ns_port,
                          ping_interval=10,
                          **options)

        results = rs.run(num_iterations, min_n_workers=num_workers)

        rs.shutdown(shutdown_workers=True)
        NS.shutdown()

        data = HpBandSterLogs(results).to_frame()
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 2
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations, input_dir, output_dir):

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict()
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    def objective(config, seed):
        return benchmark.evaluate(config).value

    for run_id in range(run_start, num_runs):

        random_state = np.random.RandomState(run_id)
        scenario = Scenario({
            "run_obj": "quality",
            "runcount-limit": num_iterations,
            "cs": benchmark.get_config_space(),
            "deterministic": "true",
            "output_dir": "foo/"
        })
        run_history = RunHistory()

        smac = SMAC4HPO(scenario=scenario,
                        tae_runner=objective,
                        runhistory=run_history,
                        rng=random_state)
        smac.optimize()

        data = SMACLogs(run_history).to_frame()
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 3
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations, input_dir, output_dir):

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    space = benchmark.get_search_space()
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict()
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    def objective(kws):
        evaluation = benchmark.evaluate(kws)
        return dict(loss=evaluation.value,
                    status=STATUS_OK,
                    info=evaluation.duration)

    for run_id in range(run_start, num_runs):

        trials = Trials()
        best = fmin(objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=num_iterations,
                    trials=trials)

        data = HyperOptLogs(trials).to_frame()
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 4
0
def main(
        benchmark_name,
        dataset_name,
        dimensions,
        method_name,
        num_runs,
        run_start,
        num_iterations,
        acquisition_name,
        # acquisition_optimizer_name,
        gamma,
        num_random_init,
        mc_samples,
        batch_size,
        num_fantasies,
        num_restarts,
        raw_samples,
        noise_variance_init,
        # use_ard,
        # use_input_warping,
        standardize_targets,
        input_dir,
        output_dir):

    # TODO(LT): Turn into options
    # device = "cpu"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dtype = torch.double

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(gamma=gamma,
                   num_random_init=num_random_init,
                   acquisition_name=acquisition_name,
                   mc_samples=mc_samples,
                   batch_size=batch_size,
                   num_restarts=num_restarts,
                   raw_samples=raw_samples,
                   num_fantasies=num_fantasies,
                   noise_variance_init=noise_variance_init,
                   standardize_targets=standardize_targets)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    config_space = DenseConfigurationSpace(benchmark.get_config_space())
    bounds = create_bounds(config_space.get_bounds(),
                           device=device,
                           dtype=dtype)
    input_dim = config_space.get_dimensions()

    def func(tensor, *args, **kwargs):
        """
        Wrapper that receives and returns torch.Tensor
        """
        config = dict_from_tensor(tensor, cs=config_space)
        # turn into maximization problem
        res = -benchmark.evaluate(config).value
        return torch.tensor(res, device=device, dtype=dtype)

    for run_id in trange(run_start, num_runs, unit="run"):

        run_begin_t = batch_end_t_adj = batch_end_t = datetime.now()

        frames = []

        features = []
        targets = []

        noise_variance = torch.tensor(noise_variance_init,
                                      device=device,
                                      dtype=dtype)
        state_dict = None

        with trange(num_iterations) as iterations:

            for batch in iterations:

                if len(targets) < num_random_init:
                    # click.echo(f"Completed {i}/{num_random_init} initial runs. "
                    #            "Suggesting random candidate...")
                    # TODO(LT): support random seed
                    X_batch = torch.rand(size=(batch_size, input_dim),
                                         device=device,
                                         dtype=dtype)
                else:

                    # construct dataset
                    X = torch.vstack(features)
                    y = torch.hstack(targets).unsqueeze(axis=-1)
                    y = standardize(y) if standardize_targets else y

                    # construct model
                    # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y),
                    model = FixedNoiseGP(X,
                                         y,
                                         noise_variance.expand_as(y),
                                         input_transform=None).to(X)
                    mll = ExactMarginalLogLikelihood(model.likelihood, model)

                    if state_dict is not None:
                        model.load_state_dict(state_dict)

                    # update model
                    fit_gpytorch_model(mll)

                    # construct acquisition function
                    tau = torch.quantile(y, q=1 - gamma)
                    iterations.set_postfix(tau=tau.item())

                    if acquisition_name == "q-KG":
                        assert num_fantasies is not None and num_fantasies > 0
                        acq = qKnowledgeGradient(model,
                                                 num_fantasies=num_fantasies)
                    elif acquisition_name == "q-EI":
                        assert mc_samples is not None and mc_samples > 0
                        qmc_sampler = SobolQMCNormalSampler(
                            num_samples=mc_samples)
                        acq = qExpectedImprovement(model=model,
                                                   best_f=tau,
                                                   sampler=qmc_sampler)

                    # optimize acquisition function
                    X_batch, b = optimize_acqf(acq_function=acq,
                                               bounds=bounds,
                                               q=batch_size,
                                               num_restarts=num_restarts,
                                               raw_samples=raw_samples,
                                               options=dict(batch_limit=5,
                                                            maxiter=200))

                    state_dict = model.state_dict()

                # begin batch evaluation
                batch_begin_t = datetime.now()
                decision_duration = batch_begin_t - batch_end_t
                batch_begin_t_adj = batch_end_t_adj + decision_duration

                eval_end_times = []

                # TODO(LT): Deliberately not doing broadcasting for now since
                # batch sizes are so small anyway. Can revisit later if there
                # is a compelling reason to do it.
                rows = []
                for j, x_next in enumerate(X_batch):

                    # eval begin time
                    eval_begin_t = datetime.now()

                    # evaluate blackbox objective
                    y_next = func(x_next)

                    # eval end time
                    eval_end_t = datetime.now()

                    # eval duration
                    eval_duration = eval_end_t - eval_begin_t

                    # adjusted eval end time is the duration added to the
                    # time at which batch eval was started
                    eval_end_t_adj = batch_begin_t_adj + eval_duration

                    eval_end_times.append(eval_end_t_adj)
                    elapsed = eval_end_t_adj - run_begin_t

                    # update dataset
                    features.append(x_next)
                    targets.append(y_next)

                    row = dict_from_tensor(x_next, cs=config_space)
                    row["loss"] = -y_next.item()
                    row["cost_eval"] = eval_duration.total_seconds()
                    row["finished"] = elapsed.total_seconds()
                    rows.append(row)

                batch_end_t = datetime.now()
                batch_end_t_adj = max(eval_end_times)

                frame = pd.DataFrame(data=rows) \
                          .assign(batch=batch,
                                  cost_decision=decision_duration.total_seconds())
                frames.append(frame)

        data = pd.concat(frames, axis="index", ignore_index=True)
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 5
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations, eta, min_budget, max_budget, gamma,
         num_random_init, init_method, batch_size,
         batch_size_training, num_steps_per_iter, num_epochs, optimizer,
         num_layers, num_units, activation, transform, max_iter,
         step_size, length_scale, tau, lambd, input_dir, output_dir):

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(eta=eta, min_budget=min_budget, max_budget=max_budget,
                   gamma=gamma, num_random_init=num_random_init,
                   batch_size=batch_size,
                   batch_size_training=batch_size_training,
                   num_steps_per_iter=num_steps_per_iter,
                   num_epochs=num_epochs, optimizer=optimizer,
                   num_layers=num_layers, num_units=num_units,
                   activation=activation, transform=transform,
                   max_iter=max_iter, step_size=step_size, tau=tau,
                   lambd=lambd, length_scale=length_scale,
                   init_method=init_method)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    n_init_batches = ceil_divide(num_random_init, batch_size) * batch_size

    for run_id in trange(run_start, num_runs, unit="run"):

        run_begin_t = batch_end_t_adj = batch_end_t = datetime.now()

        frames = []

        random_state = np.random.RandomState(seed=run_id)

        config_space = DenseConfigurationSpace(benchmark.get_config_space(),
                                               seed=run_id)
        input_dim = config_space.get_dimensions(sparse=False)
        bounds = config_space.get_bounds()

        model = create_model(input_dim, num_units, num_layers, activation,
                             optimizer)

        record = Record()

        if init_method == "latin_hypercube":
            sampler = LatinHypercube(d=input_dim, seed=run_id)
            X_init = (bounds.ub - bounds.lb) * sampler.random(n_init_batches) + bounds.lb
        else:
            X_init = random_state.uniform(low=bounds.lb,
                                          high=bounds.ub,
                                          size=(n_init_batches, input_dim))

        with trange(num_iterations) as iterations:

            for batch in iterations:

                if record.size() < num_random_init:
                    # config_batch = config_space.sample_configuration(size=batch_size)
                    # X_batch = [config.to_array() for config in config_batch]
                    a = batch * batch_size
                    b = a + batch_size
                    X_batch = X_init[a:b]
                else:
                    # construct binary classification problem
                    X, z = record.load_classification_data(gamma)

                    if num_epochs is None:
                        dataset_size = record.size()
                        num_steps = steps_per_epoch(batch_size_training, dataset_size)
                        num_epochs = num_steps_per_iter // num_steps

                    # update classifier
                    model.fit(X, z, epochs=num_epochs,
                              batch_size=batch_size_training, verbose=False)

                    X_batch = model.argmax_batch(batch_size=batch_size,
                                                 n_iter=max_iter,
                                                 length_scale=length_scale,
                                                 step_size=step_size,
                                                 bounds=bounds,
                                                 tau=tau, lambd=lambd,
                                                 # print_fn=click.echo,
                                                 random_state=random_state)

                # begin batch evaluation
                batch_begin_t = datetime.now()
                decision_duration = batch_begin_t - batch_end_t
                batch_begin_t_adj = batch_end_t_adj + decision_duration

                eval_end_times = []

                # TODO(LT): Deliberately not doing broadcasting for now since
                # batch sizes are so small anyway. Can revisit later if there
                # is a compelling reason to do it.
                rows = []
                for j, x_next in enumerate(X_batch):

                    config = dict_from_array(config_space, x_next)

                    # eval begin time
                    eval_begin_t = datetime.now()

                    # evaluate blackbox objective
                    y_next = benchmark.evaluate(config).value

                    # eval end time
                    eval_end_t = datetime.now()

                    # eval duration
                    eval_duration = eval_end_t - eval_begin_t

                    # adjusted eval end time is the duration added to the
                    # time at which batch eval was started
                    eval_end_t_adj = batch_begin_t_adj + eval_duration

                    eval_end_times.append(eval_end_t_adj)
                    elapsed = eval_end_t_adj - run_begin_t

                    # update dataset
                    record.append(x=x_next, y=y_next)

                    row = dict(config)
                    row["loss"] = y_next
                    row["cost_eval"] = eval_duration.total_seconds()
                    row["finished"] = elapsed.total_seconds()
                    rows.append(row)

                batch_end_t = datetime.now()
                batch_end_t_adj = max(eval_end_times)

                frame = pd.DataFrame(data=rows) \
                          .assign(batch=batch,
                                  cost_decision=decision_duration.total_seconds())
                frames.append(frame)

        data = pd.concat(frames, axis="index", ignore_index=True)
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 6
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations, eta, min_budget, max_budget, gamma,
         num_random_init, random_rate, retrain, num_starts, num_samples,
         batch_size, num_steps_per_iter, num_epochs_per_iter, optimizer,
         num_layers, num_units, activation, l2_factor, transform, method,
         max_iter, ftol, distortion, input_dir, output_dir):

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(eta=eta,
                   min_budget=min_budget,
                   max_budget=max_budget,
                   gamma=gamma,
                   num_random_init=num_random_init,
                   random_rate=random_rate,
                   retrain=retrain,
                   num_starts=num_starts,
                   num_samples=num_samples,
                   batch_size=batch_size,
                   num_steps_per_iter=num_steps_per_iter,
                   num_epochs_per_iter=num_epochs_per_iter,
                   optimizer=optimizer,
                   num_layers=num_layers,
                   num_units=num_units,
                   activation=activation,
                   l2_factor=l2_factor,
                   transform=transform,
                   method=method,
                   max_iter=max_iter,
                   ftol=ftol,
                   distortion=distortion)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    for run_id in range(run_start, num_runs):

        NS = hpns.NameServer(run_id=run_id, host='localhost', port=0)
        ns_host, ns_port = NS.start()

        num_workers = 1

        workers = []
        for worker_id in range(num_workers):
            w = BenchmarkWorker(benchmark=benchmark,
                                nameserver=ns_host,
                                nameserver_port=ns_port,
                                run_id=run_id,
                                id=worker_id)
            w.run(background=True)
            workers.append(w)

        rs = BORE(config_space=benchmark.get_config_space(),
                  run_id=run_id,
                  nameserver=ns_host,
                  nameserver_port=ns_port,
                  ping_interval=10,
                  seed=run_id,
                  **options)

        results = rs.run(num_iterations, min_n_workers=num_workers)

        rs.shutdown(shutdown_workers=True)
        NS.shutdown()

        data = HpBandSterLogs(results).to_frame()
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 7
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations,
         # acquisition_name,
         # acquisition_optimizer_name,
         gamma, num_random_init,
         num_restarts, raw_samples, noise_variance_init,
         # use_ard,
         # use_input_warping,
         standardize_targets,
         input_dir, output_dir):

    # TODO(LT): Turn into options
    # device = "cpu"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dtype = torch.double

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(gamma=gamma, num_random_init=num_random_init,
                   num_restarts=num_restarts, raw_samples=raw_samples,
                   noise_variance_init=noise_variance_init,
                   standardize_targets=standardize_targets)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    config_space = DenseConfigurationSpace(benchmark.get_config_space())
    bounds = create_bounds(config_space.get_bounds(), device=device, dtype=dtype)
    input_dim = config_space.get_dimensions()

    def func(tensor, *args, **kwargs):
        """
        Wrapper that receives and returns torch.Tensor
        """
        config = dict_from_tensor(tensor, cs=config_space)
        # turn into maximization problem
        res = - benchmark.evaluate(config).value
        return torch.tensor(res, device=device, dtype=dtype)

    for run_id in trange(run_start, num_runs, unit="run"):

        t_start = datetime.now()

        rows = []

        features = []
        targets = []

        noise_variance = torch.tensor(noise_variance_init, device=device, dtype=dtype)
        state_dict = None

        with trange(num_iterations) as iterations:

            for i in iterations:

                if len(targets) < num_random_init:
                    # click.echo(f"Completed {i}/{num_random_init} initial runs. "
                    #            "Suggesting random candidate...")
                    # TODO(LT): support random seed
                    x_new = torch.rand(size=(input_dim,), device=device, dtype=dtype)
                else:

                    # construct dataset
                    X = torch.vstack(features)
                    y = torch.hstack(targets).unsqueeze(axis=-1)
                    y = standardize(y) if standardize_targets else y

                    # construct model
                    # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y),
                    model = FixedNoiseGP(X, y, noise_variance.expand_as(y),
                                         input_transform=None).to(X)
                    mll = ExactMarginalLogLikelihood(model.likelihood, model)

                    if state_dict is not None:
                        model.load_state_dict(state_dict)

                    # update model
                    fit_gpytorch_model(mll)

                    # construct acquisition function
                    tau = torch.quantile(y, q=1-gamma)
                    iterations.set_postfix(tau=tau.item())

                    ei = ExpectedImprovement(model=model, best_f=tau)

                    # optimize acquisition function
                    X_batch, b = optimize_acqf(acq_function=ei, bounds=bounds,
                                               q=1,
                                               num_restarts=num_restarts,
                                               raw_samples=raw_samples,
                                               options=dict(batch_limit=5,
                                                            maxiter=200))
                    x_new = X_batch.squeeze(axis=0)

                    state_dict = model.state_dict()

                # evaluate blackbox objective
                # t0 = datetime.now()
                y_new = func(x_new)
                t1 = datetime.now()

                delta = t1 - t_start

                # update dataset
                features.append(x_new)
                targets.append(y_new)

                row = dict_from_tensor(x_new, cs=config_space)
                row["loss"] = - y_new.item()
                row["finished"] = delta.total_seconds()
                rows.append(row)

        data = pd.DataFrame(data=rows)
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Esempio n. 8
0
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs,
         run_start, num_iterations, acquisition_name,
         acquisition_optimizer_name, num_random_init, batch_size, use_ard,
         use_input_warping, standardize_targets, input_dir, output_dir):

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(batch_size=batch_size,
                   acquisition_name=acquisition_name,
                   acquisition_optimizer_name=acquisition_optimizer_name,
                   use_ard=use_ard,
                   use_input_warping=use_input_warping,
                   standardize_targets=standardize_targets)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    space = benchmark.get_domain()
    config_space = benchmark.get_config_space()

    def func(array, *args, **kwargs):
        kws = dict_from_array(array, cs=config_space)
        return benchmark.evaluate(kws).value

    model_type = "input_warped_GP" if use_input_warping else "GP"

    initial_design_numdata = ceil_divide(num_random_init,
                                         batch_size) * batch_size

    for run_id in trange(run_start, num_runs, unit="run"):

        BO = GPyOpt.methods.BayesianOptimization(
            f=func,
            domain=space,
            initial_design_numdata=initial_design_numdata,
            model_type=model_type,
            ARD=use_ard,
            normalize_Y=standardize_targets,
            exact_feval=False,
            acquisition_type=acquisition_name,
            acquisition_optimizer_type=acquisition_optimizer_name,
            batch_size=batch_size,
            evaluator_type="local_penalization")
        BO.run_optimization(
            num_iterations,
            # evaluations_file="bar/evaluations.csv",
            # models_file="bar/models.csv",
            verbosity=True)

        cost_decision_arr = np.hstack(BO.cost_decision)
        cost_eval_arr = np.hstack(BO.cost_eval)

        data = pd.DataFrame(data=BO.X, columns=[d["name"] for d in space]) \
                 .assign(loss=BO.Y, batch=lambda row: row.index // batch_size,
                         cost_decision=lambda row: cost_decision_arr[row.batch],
                         cost_eval=cost_eval_arr)
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0