def test_run_with_noise(random_state): def func(x): return (np.sin(x) + random_state.randn()).item(), 1.0 opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=1) opt.run(func, n_iter=2, n_samples=1, gp_burnin=0) assert_almost_equal(opt.gp.alpha, np.ones(2))
def test_probability_of_improvement(random_state, input, expected): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=0, random_state=random_state) opt.tell([[-2.0], [-1.0], [0.0], [1.0], [2.0]], [2.0, 0.0, -2.0, 0.0, 2.0], gp_burnin=10) prob = opt.probability_of_optimality( threshold=input["threshold"], n_random_starts=20, random_state=random_state, normalized_scores=input["normalized_scores"], ) np.testing.assert_almost_equal(prob, expected, decimal=2)
def test_initial_points(): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=5) x = opt.ask() opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 1 opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 2 opt.tell([[0.1], [0.2], [0.3]], [0.0, 0.1, 0.2], replace=True) assert opt._n_initial_points == opt.n_initial_points_ - 3
def _make_optimizer(self, params_space): """Instantiate bask Optimizer class. Parameters ---------- params_space : dict Represents parameter search space. The keys are parameter names (strings) and values are skopt.space.Dimension instances, one of Real, Integer or Categorical. Returns ------- optimizer: Instance of the `Optimizer` class used for for search in some parameter space. """ kwargs = self.optimizer_kwargs_.copy() kwargs["dimensions"] = dimensions_aslist(params_space) # Here we replace skopt's Optimizer: optimizer = Optimizer(**kwargs) for i in range(len(optimizer.space.dimensions)): if optimizer.space.dimensions[i].name is not None: continue optimizer.space.dimensions[i].name = list( sorted(params_space.keys()))[i] return optimizer
def test_expected_optimality_gap(random_state, input, expected): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=0, random_state=random_state) opt.tell([[-2.0], [-1.0], [0.0], [1.0], [2.0]], [2.0, 0.0, -2.0, 0.0, 2.0], gp_burnin=10) gap = opt.expected_optimality_gap( random_state=random_state, n_probabilities=10, n_space_samples=100, n_gp_samples=100, n_random_starts=10, tol=0.1, use_mean_gp=input["use_mean_gp"], normalized_scores=input["normalized_scores"], ) np.testing.assert_almost_equal(gap, expected, decimal=2)
def test_noise_vector(): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=5) opt.tell( [[-1.0], [0.0], [1.0], [0.5]], [0.0, -1.0, 0.0, -1.0], noise_vector=[1.0, 1.0, 1.0, 0.0], ) x = opt.ask() opt.tell([x], [0.0]) # Test, if the less noisy optimum (at 0.5) had a stronger impact on the mean process # than the noisy optimum (at 0.0): y_noisy, y = opt.gp.predict([[0.5], [0.625]]) assert y_noisy > y # Check, if passing a single point works correctly: x = opt.ask() opt.tell(x, 0.0, noise_vector=0.5)
def test_multiple_asks(): # calling ask() multiple times without a tell() inbetween should # be a "no op" opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=1) opt.run(bench1, n_iter=3, gp_burnin=0, n_samples=1) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_initial_points(init_strategy): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=3, init_strategy=init_strategy) x = opt.ask() assert not isinstance(x[0], list) opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 1 opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 2 assert opt.gp.chain_ is None opt.tell([[0.1], [0.2], [0.3]], [0.0, 0.1, 0.2], replace=True) assert opt._n_initial_points == opt.n_initial_points_ - 3 assert opt.gp.chain_ is not None
def test_optimum_intervals(): opt = Optimizer(dimensions=[(0.0, 1.0)], random_state=0, acq_func="mean", n_points=100) x = np.linspace(0, 1, num=20)[:, None] y = np.cos(np.pi * 4 * x).flatten() + opt.rng.randn(20) * 0.1 opt.tell(x.tolist(), y.tolist(), gp_burnin=20, progress=False, n_samples=1) intervals = opt.optimum_intervals(random_state=0, space_samples=100) assert len(intervals) == 1 assert len(intervals[0]) >= 2 assert len(intervals[0][0]) == 2 intervals = opt.optimum_intervals(random_state=0, space_samples=100, multimodal=False) assert len(intervals) == 1 assert len(intervals[0]) == 2
def local( # noqa: C901 tuning_config, acq_function="mes", acq_function_samples=1, confidence=0.9, data_path=None, gp_burnin=5, gp_samples=300, gp_initial_burnin=100, gp_initial_samples=300, logfile="log.txt", n_initial_points=30, n_points=500, plot_every=5, plot_path="plots", random_seed=0, result_every=5, resume=True, verbose=False, ): """Run a local tune. Parameters defined in the `tuning_config` file always take precedence. """ json_dict = json.load(tuning_config) settings, commands, fixed_params, param_ranges = load_tuning_config(json_dict) log_level = logging.DEBUG if verbose else logging.INFO log_format = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") root_logger = logging.getLogger() root_logger.setLevel(log_level) file_logger = logging.FileHandler(settings.get("logfile", logfile)) file_logger.setFormatter(log_format) root_logger.addHandler(file_logger) console_logger = logging.StreamHandler(sys.stdout) console_logger.setFormatter(log_format) root_logger.addHandler(console_logger) logging.debug(f"Got the following tuning settings:\n{json_dict}") # 1. Create seed sequence ss = np.random.SeedSequence(settings.get("random_seed", random_seed)) # 2. Create kernel # 3. Create optimizer random_state = np.random.RandomState(np.random.MT19937(ss.spawn(1)[0])) opt = Optimizer( dimensions=list(param_ranges.values()), n_points=settings.get("n_points", n_points), n_initial_points=settings.get("n_initial_points", n_initial_points), # gp_kernel=kernel, # TODO: Let user pass in different kernels gp_kwargs=dict(normalize_y=True), # gp_priors=priors, # TODO: Let user pass in priors acq_func=settings.get("acq_function", acq_function), acq_func_kwargs=dict(alpha="inf", n_thompson=20), random_state=random_state, ) X = [] y = [] noise = [] iteration = 0 # 3.1 Resume from existing data: if data_path is None: data_path = "data.npz" if resume: path = pathlib.Path(data_path) if path.exists(): with np.load(path) as importa: X = importa["arr_0"].tolist() y = importa["arr_1"].tolist() noise = importa["arr_2"].tolist() if len(X[0]) != opt.space.n_dims: logging.error( "The number of parameters are not matching the number of " "dimensions. Rename the existing data file or ensure that the " "parameter ranges are correct." ) sys.exit(1) reduction_needed, X_reduced, y_reduced, noise_reduced = reduce_ranges( X, y, noise, opt.space ) if reduction_needed: backup_path = path.parent / ( path.stem + f"_backup_{int(time.time())}" + path.suffix ) logging.warning( f"The parameter ranges are smaller than the existing data. " f"Some points will have to be discarded. " f"The original {len(X)} data points will be saved to " f"{backup_path}" ) np.savez_compressed( backup_path, np.array(X), np.array(y), np.array(noise) ) X = X_reduced y = y_reduced noise = noise_reduced iteration = len(X) logging.info( f"Importing {iteration} existing datapoints. This could take a while..." ) opt.tell( X, y, noise_vector=noise, gp_burnin=settings.get("gp_initial_burnin", gp_initial_burnin), gp_samples=settings.get("gp_initial_samples", gp_initial_samples), n_samples=settings.get("n_samples", 1), progress=True, ) logging.info("Importing finished.") # 4. Main optimization loop: while True: logging.info("Starting iteration {}".format(iteration)) result_every_n = settings.get("result_every", result_every) if ( result_every_n > 0 and iteration % result_every_n == 0 and opt.gp.chain_ is not None ): result_object = create_result(Xi=X, yi=y, space=opt.space, models=[opt.gp]) try: best_point, best_value = expected_ucb(result_object, alpha=0.0) best_point_dict = dict(zip(param_ranges.keys(), best_point)) logging.info(f"Current optimum:\n{best_point_dict}") logging.info(f"Estimated value: {best_value}") confidence_val = settings.get("confidence", confidence) confidence_out = confidence_intervals( optimizer=opt, param_names=list(param_ranges.keys()), hdi_prob=confidence_val, opt_samples=1000, multimodal=False, ) logging.info( f"{confidence_val*100}% confidence intervals:\n{confidence_out}" ) except ValueError: logging.info( "Computing current optimum was not successful. " "This can happen in rare cases and running the " "tuner again usually works." ) plot_every_n = settings.get("plot_every", plot_every) if ( plot_every_n > 0 and iteration % plot_every_n == 0 and opt.gp.chain_ is not None ): logging.getLogger("matplotlib.font_manager").disabled = True if opt.space.n_dims == 1: logging.warning( "Plotting for only 1 parameter is not supported yet." ) else: logging.debug("Starting to compute the next plot.") result_object = create_result( Xi=X, yi=y, space=opt.space, models=[opt.gp] ) plt.style.use("dark_background") fig, ax = plt.subplots( nrows=opt.space.n_dims, ncols=opt.space.n_dims, figsize=(3 * opt.space.n_dims, 3 * opt.space.n_dims), ) fig.patch.set_facecolor("#36393f") for i in range(opt.space.n_dims): for j in range(opt.space.n_dims): ax[i, j].set_facecolor("#36393f") timestr = time.strftime("%Y%m%d-%H%M%S") plot_objective( result_object, dimensions=list(param_ranges.keys()), fig=fig, ax=ax ) plotpath = pathlib.Path(settings.get("plot_path", plot_path)) plotpath.mkdir(parents=True, exist_ok=True) full_plotpath = plotpath / f"{timestr}-{iteration}.png" plt.savefig( full_plotpath, pad_inches=0.1, dpi=300, bbox_inches="tight", facecolor="#36393f", ) logging.info(f"Saving a plot to {full_plotpath}.") plt.close(fig) point = opt.ask() point_dict = dict(zip(param_ranges.keys(), point)) logging.info("Testing {}".format(point_dict)) engine_json = prepare_engines_json(commands=commands, fixed_params=fixed_params) logging.debug(f"engines.json is prepared:\n{engine_json}") write_engines_json(engine_json, point_dict) logging.info("Start experiment") now = datetime.now() out_exp, out_exp_err = run_match(**settings) later = datetime.now() difference = (later - now).total_seconds() logging.info(f"Experiment finished ({difference}s elapsed).") logging.debug(f"Raw result:\n{out_exp}\n{out_exp_err}") score, error = parse_experiment_result(out_exp, **settings) logging.info("Got score: {} +- {}".format(score, error)) logging.info("Updating model") while True: try: now = datetime.now() # We fetch kwargs manually here to avoid collisions: n_samples = settings.get("acq_function_samples", acq_function_samples) gp_burnin = settings.get("gp_burnin", gp_burnin) gp_samples = settings.get("gp_samples", gp_samples) if opt.gp.chain_ is None: gp_burnin = settings.get("gp_initial_burnin", gp_initial_burnin) gp_samples = settings.get("gp_initial_samples", gp_initial_samples) opt.tell( point, score, n_samples=n_samples, gp_samples=gp_samples, gp_burnin=gp_burnin, ) else: opt.tell( point, score, n_samples=n_samples, gp_samples=gp_samples, gp_burnin=gp_burnin, ) later = datetime.now() difference = (later - now).total_seconds() logging.info(f"GP sampling finished ({difference}s)") logging.debug(f"GP kernel: {opt.gp.kernel_}") except ValueError: logging.warning( "Error encountered during fitting. Trying to sample chain a bit. " "If this problem persists, restart the tuner to reinitialize." ) opt.gp.sample(n_burnin=5, priors=opt.gp_priors) else: break X.append(point) y.append(score) noise.append(error) iteration = len(X) with AtomicWriter(data_path, mode="wb", overwrite=True).open() as f: np.savez_compressed(f, np.array(X), np.array(y), np.array(noise))
def test_error_on_invalid_priors(): opt = Optimizer(dimensions=[(-2.0, 2.0)], gp_priors=[], n_initial_points=0) with pytest.raises(ValueError): opt.tell([(0.0, )], 0.0)
def test_no_error_on_unknown_kwargs(): Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=5, unknown_argument=42)