def test_optimizer_base_estimator_string_smoke_njobs(): opt = Optimizer([(-2.0, 2.0)], base_estimator="GBRT", n_initial_points=1, acq_func="EI", n_jobs=-1) opt.run(func=lambda x: x[0]**2, n_iter=3)
def test_dict_list_space_representation(): """ Tests whether the conversion of the dictionary and list representation of a point from a search space works properly. """ chef_space = { 'Cooking time': (0, 1200), # in minutes 'Main ingredient': [ 'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory' ], 'Secondary ingredient': [ 'love', 'passion', 'dedication' ], 'Cooking temperature': (-273.16, 10000.0) # in Celsius } opt = Optimizer(dimensions=dimensions_aslist(chef_space)) point = opt.ask() # check if the back transformed point and original one are equivalent assert_equal( point, point_aslist(chef_space, point_asdict(chef_space, point)) )
def test_dimension_checking_1D(): low = -2 high = 2 opt = Optimizer([(low, high)]) with pytest.raises(ValueError) as e: # within bounds but one dimension too high opt.tell([low+1, low+1], 2.) assert "Dimensions of point " in str(e.value)
def test_dimension_checking_1D(): low = -2 high = 2 opt = Optimizer([(low, high)]) with pytest.raises(ValueError) as e: # within bounds but one dimension too high opt.tell([low + 1, low + 1], 2.) assert "Dimensions of point " in str(e.value)
def test_returns_result_object(): base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling") result = opt.tell([1.5], 2.) assert isinstance(result, OptimizeResult) assert_equal(len(result.x_iters), len(result.func_vals)) assert_equal(np.min(result.func_vals), result.fun)
def test_dump_and_load_optimizer(): base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) with tempfile.TemporaryFile() as f: dump(opt, f) load(f)
def test_exhaust_initial_calls(): # check a model is fitted and used to make suggestions after we added # at least n_initial_points via tell() base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=2, acq_optimizer="sampling", random_state=1) x0 = opt.ask() # random point x1 = opt.ask() # random point assert x0 != x1 # first call to tell() r1 = opt.tell(x1, 3.) assert len(r1.models) == 0 x2 = opt.ask() # random point assert x1 != x2 # second call to tell() r2 = opt.tell(x2, 4.) assert len(r2.models) == 1 # this is the first non-random point x3 = opt.ask() assert x2 != x3 x4 = opt.ask() # no new information was added so should be the same assert x3 == x4 r3 = opt.tell(x3, 1.) assert len(r3.models) == 2
def test_exhaust_initial_calls(base_estimator): # check a model is fitted and used to make suggestions after we added # at least n_initial_points via tell() opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=2, acq_optimizer="sampling", random_state=1) x0 = opt.ask() # random point x1 = opt.ask() # random point assert x0 != x1 # first call to tell() r1 = opt.tell(x1, 3.) assert len(r1.models) == 0 x2 = opt.ask() # random point assert x1 != x2 # second call to tell() r2 = opt.tell(x2, 4.) if base_estimator.lower() == 'dummy': assert len(r2.models) == 0 else: assert len(r2.models) == 1 # this is the first non-random point x3 = opt.ask() assert x2 != x3 x4 = opt.ask() r3 = opt.tell(x3, 1.) # no new information was added so should be the same, unless we are using # the dummy estimator which will forever return random points and never # fits any models if base_estimator.lower() == 'dummy': assert x3 != x4 assert len(r3.models) == 0 else: assert x3 == x4 assert len(r3.models) == 2
def test_optimizer_copy(acq_func): opt = Optimizer([(-2.0, 2.0)], acq_func=acq_func) opt_copy = opt.copy() base_est = opt_copy.base_estimator_ if "ps" in acq_func: assert_true(isinstance(base_est, MultiOutputRegressor)) # check that the base_estimator is not wrapped multiple times assert_false(isinstance(base_est.estimator, MultiOutputRegressor)) else: assert_false(isinstance(base_est, MultiOutputRegressor))
def test_dimension_checking_2D_multiple_points(): low = -2 high = 2 opt = Optimizer([(low, high), (low, high)]) # within bounds but one dimension too little with pytest.raises(ValueError) as e: opt.tell([[low+1, ], [low+1, low+2], [low+1, low+3]], 2.) assert "dimensions as the space" in str(e.value) # within bounds but one dimension too much with pytest.raises(ValueError) as e: opt.tell([[low + 1, low + 1, low + 1], [low + 1, low + 2], [low + 1, low + 3]], 2.) assert "dimensions as the space" in str(e.value)
def test_defaults_are_equivalent(): # check that the defaults of Optimizer reproduce the defaults of # gp_minimize space = [(-5., 10.), (0., 15.)] opt = Optimizer(space, random_state=1) for n in range(15): x = opt.ask() res_opt = opt.tell(x, branin(x)) res_min = gp_minimize(branin, space, n_calls=15, random_state=1) assert res_min.space == res_opt.space # tolerate small differences in the points sampled assert np.allclose(res_min.x_iters, res_opt.x_iters, atol=1e-5) assert np.allclose(res_min.x, res_opt.x, atol=1e-5)
def test_acq_optimizer(base_estimator): with pytest.raises(ValueError) as e: opt = Optimizer([(-2.0, 2.0)], base_estimator=base_estimator, n_random_starts=1, acq_optimizer='lbfgs') assert 'The tree-based regressor' in str(e.value)
def test_invalid_tell_arguments(): base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling") # can't have single point and multiple values for y assert_raises(ValueError, opt.tell, [1.], [1., 1.])
def test_acq_optimizer(base_estimator): with pytest.raises(ValueError) as e: Optimizer([(-2.0, 2.0)], base_estimator=base_estimator, n_initial_points=1, acq_optimizer='lbfgs') assert "should run with acq_optimizer='sampling'" in str(e.value)
def test_dimensions_names(): from skopt.space import Real, Categorical, Integer # create search space and optimizer space = [Real(0, 1, name='real'), Categorical(['a', 'b', 'c'], name='cat'), Integer(0, 1, name='int')] opt = Optimizer(space, n_initial_points=1) # result of the optimizer missing dimension names result = opt.tell([(0.5, 'a', 0.5)], [3]) names = [] for d in result.space.dimensions: names.append(d.name) assert len(names) == 3 assert "real" in names assert "cat" in names assert "int" in names assert None not in names
def test_invalid_tell_arguments_list(): base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling") assert_raises(ValueError, opt.tell, [[1.], [2.]], [1., None])
def test_optimizer_copy(acq_func): # Checks that the base estimator, the objective and target values # are copied correctly. base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, acq_func=acq_func, n_initial_points=1, acq_optimizer="sampling") # run three iterations so that we have some points and objective values if "ps" in acq_func: opt.run(bench1_with_time, n_iter=3) else: opt.run(bench1, n_iter=3) opt_copy = opt.copy() copied_estimator = opt_copy.base_estimator_ if "ps" in acq_func: assert isinstance(copied_estimator, MultiOutputRegressor) # check that the base_estimator is not wrapped multiple times is_multi = isinstance(copied_estimator.estimator, MultiOutputRegressor) assert not is_multi else: assert not isinstance(copied_estimator, MultiOutputRegressor) assert_array_equal(opt_copy.Xi, opt.Xi) assert_array_equal(opt_copy.yi, opt.yi)
def test_bounds_checking_1D(): low = -2. high = 2. base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(low, high)], base_estimator, n_initial_points=1, acq_optimizer="sampling") assert_raises(ValueError, opt.tell, [high + 0.5], 2.) assert_raises(ValueError, opt.tell, [low - 0.5], 2.) # feed two points to tell() at once assert_raises(ValueError, opt.tell, [high + 0.5, high], (2., 3.)) assert_raises(ValueError, opt.tell, [low - 0.5, high], (2., 3.))
def test_optimizer_copy(acq_func): # Checks that the base estimator, the objective and target values # are copied correctly. base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, acq_func=acq_func, n_initial_points=1, acq_optimizer="sampling") # run three iterations so that we have some points and objective values if "ps" in acq_func: opt.run(bench1_with_time, n_iter=3) else: opt.run(bench1, n_iter=3) opt_copy = opt.copy() copied_estimator = opt_copy.base_estimator_ if "ps" in acq_func: assert_true(isinstance(copied_estimator, MultiOutputRegressor)) # check that the base_estimator is not wrapped multiple times is_multi = isinstance(copied_estimator.estimator, MultiOutputRegressor) assert_false(is_multi) else: assert_false(isinstance(copied_estimator, MultiOutputRegressor)) assert_array_equal(opt_copy.Xi, opt.Xi) assert_array_equal(opt_copy.yi, opt.yi)
def test_defaults_are_equivalent(): # check that the defaults of Optimizer reproduce the defaults of # gp_minimize space = [(-5., 10.), (0., 15.)] #opt = Optimizer(space, 'ET', acq_func="EI", random_state=1) opt = Optimizer(space, random_state=1) for n in range(12): x = opt.ask() res_opt = opt.tell(x, branin(x)) #res_min = forest_minimize(branin, space, n_calls=12, random_state=1) res_min = gp_minimize(branin, space, n_calls=12, random_state=1) assert res_min.space == res_opt.space # tolerate small differences in the points sampled assert np.allclose(res_min.x_iters, res_opt.x_iters) #, atol=1e-5) assert np.allclose(res_min.x, res_opt.x) #, atol=1e-5) res_opt2 = opt.get_result() assert np.allclose(res_min.x_iters, res_opt2.x_iters) # , atol=1e-5) assert np.allclose(res_min.x, res_opt2.x) # , atol=1e-5)
def test_bounds_checking_2D(): low = -2. high = 2. base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(low, high), (low+4, high+4)], base_estimator, n_initial_points=1, acq_optimizer="sampling") assert_raises(ValueError, opt.tell, [high + 0.5, high + 4.5], 2.) assert_raises(ValueError, opt.tell, [low - 0.5, low - 4.5], 2.) # first out, second in assert_raises(ValueError, opt.tell, [high + 0.5, high + 0.5], 2.) assert_raises(ValueError, opt.tell, [low - 0.5, high + 0.5], 2.)
def test_model_queue_size(): # Check if model_queue_size limits the model queue size base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling", model_queue_size=2) opt.run(bench1, n_iter=3) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.models), 2) assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.models), 2) assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_categorical_only2(): from numpy import linalg from skopt.space import Categorical from skopt.learning import GaussianProcessRegressor space = [Categorical([1, 2, 3]), Categorical([4, 5, 6])] opt = Optimizer(space, base_estimator=GaussianProcessRegressor(alpha=1e-7), acq_optimizer='lbfgs', n_initial_points=10, n_jobs=2) next_x = opt.ask(n_points=4) assert len(next_x) == 4 opt.tell(next_x, [linalg.norm(x) for x in next_x]) next_x = opt.ask(n_points=4) assert len(next_x) == 4 opt.tell(next_x, [linalg.norm(x) for x in next_x]) next_x = opt.ask(n_points=4) assert len(next_x) == 4
def test_bounds_checking_2D_multiple_points(): low = -2. high = 2. base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(low, high), (low + 4, high + 4)], base_estimator, n_initial_points=1, acq_optimizer="sampling") # first component out, second in assert_raises(ValueError, opt.tell, [(high + 0.5, high + 0.5), (high + 0.5, high + 0.5)], [2., 3.]) assert_raises(ValueError, opt.tell, [(low - 0.5, high + 0.5), (low - 0.5, high + 0.5)], [2., 3.])
def test_multiple_asks(): # calling ask() multiple times without a tell() inbetween should # be a "no op" base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_categorical_only(): from skopt.space import Categorical cat1 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) cat2 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) opt = Optimizer([cat1, cat2]) for n in range(15): x = opt.ask() res = opt.tell(x, 12 * n) assert len(res.x_iters) == 15 next_x = opt.ask(n_points=4) assert len(next_x) == 4 cat3 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]) cat4 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]) opt = Optimizer([cat3, cat4]) for n in range(15): x = opt.ask() res = opt.tell(x, 12 * n) assert len(res.x_iters) == 15 next_x = opt.ask(n_points=4) assert len(next_x) == 4
def test_acq_optimizer_with_time_api(base_estimator, acq_func): opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="sampling", n_initial_points=2) x1 = opt.ask() opt.tell(x1, (bench1(x1), 1.0)) x2 = opt.ask() res = opt.tell(x2, (bench1(x2), 2.0)) # x1 and x2 are random. assert x1 != x2 assert len(res.models) == 1 assert_array_equal(res.func_vals.shape, (2,)) assert_array_equal(res.log_time.shape, (2,)) # x3 = opt.ask() with pytest.raises(TypeError) as e: opt.tell(x2, bench1(x2))
def test_multiple_asks(): # calling ask() multiple times without a tell() inbetween should # be a "no op" base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_acq_optimizer_with_time_api(base_estimator, acq_func): opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="sampling", n_initial_points=2) x1 = opt.ask() opt.tell(x1, (bench1(x1), 1.0)) x2 = opt.ask() res = opt.tell(x2, (bench1(x2), 2.0)) # x1 and x2 are random. assert_true(x1 != x2) assert_true(len(res.models) == 1) assert_array_equal(res.func_vals.shape, (2,)) assert_array_equal(res.log_time.shape, (2,)) # x3 = opt.ask() with pytest.raises(TypeError) as e: opt.tell(x2, bench1(x2))
def _make_optimizer(self, dimensions): n_initial_points = 10 return Optimizer(list(dimensions.values()), n_initial_points=n_initial_points, base_estimator='gp')
def base_computation_graph(estimator, cross_validator, group_key=None, dimensions=None, base_estimator=None, n_calls=100, n_initial_points=10, initial_point_generator="random", acq_func="EI", acq_optimizer="lbfgs", x0=None, y0=None, random_state=None, verbose=False, callback=None, n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96, n_jobs=1, model_queue_size=None): acq_optimizer_kwargs = { "n_points": n_points, "n_restarts_optimizer": n_restarts_optimizer, "n_jobs": n_jobs } acq_func_kwargs = {"xi": xi, "kappa": kappa} optimizer = Optimizer( dimensions, base_estimator, n_initial_points=n_initial_points, #initial_point_generator=initial_point_generator, #n_jobs=n_jobs, acq_func=acq_func, acq_optimizer=acq_optimizer, random_state=random_state, model_queue_size=model_queue_size, acq_optimizer_kwargs=acq_optimizer_kwargs, acq_func_kwargs=acq_func_kwargs) K = cross_validator.K for n in range(n_calls): next_x = dask.delayed(optimizer.ask)() evaluation_results = [] for k in range(K): evaluation_result = dask.delayed(cross_validator.evaluate_fold)( estimator, k, next_x, group_key) evaluation_results.append(evaluation_result) avg_evaluation_result = dask.delayed(lambda *args: sum(args) / k)( *evaluation_results) optimizer = dask.delayed(stateful_object_mutator)( optimizer, "tell", x=next_x, y=avg_evaluation_result) return optimizer
def main(args): # 1. load config print('Importing architecture from %s' % args.arch_module) arch_mod = import_module(args.arch_module) prob_mods = [] for prob_module_path in args.prob_modules: print('Importing problem from %s' % prob_module_path) this_prob_mod = import_module(prob_module_path) prob_mods.append(this_prob_mod) # 2. spool up Ray new_cluster = args.ray_connect is None ray_kwargs = {} if not new_cluster: ray_kwargs["redis_address"] = args.ray_connect assert args.ray_ncpus is None, \ "can't provide --ray-ncpus and --ray-connect" else: if args.ray_ncpus is not None: assert args.job_ncpus is None \ or args.job_ncpus <= args.ray_ncpus, \ "must have --job-ncpus <= --ray-ncpus if both given" ray_kwargs["num_cpus"] = args.ray_ncpus ray.init(**ray_kwargs) max_par_trials = args.max_par_trials if max_par_trials is None: # leave some room for hyperthread-caused over-counting of CPUs (a /2 # factor), and for running eval trials in parallel max_par_trials = max(1, multiprocessing.cpu_count() // 5) sk_space = OrderedDict() # originally I had this split between 2/3, but I think 3 is a bit too slow # on some problems, so I want to stick to 2 (even though exbw really seems # to benefit from 3) sk_space['num_layers'] = [2] sk_space['hidden_size'] = (12, 20) # empty list; no steps down, just a single fixed learning rate sk_space['learning_rate_steps'] = [()] sk_space['supervised_learning_rate'] = (1e-4, 1e-2, 'log-uniform') # these ranges are similar to my original config, which seemed to work okay sk_space['supervised_batch_size'] = (48, 128) sk_space['opt_batch_per_epoch'] = (300, 1200) # (150, 1500) # we use categorical vars to add "switched off entirely" as options (as # opposed to just "turned down very low"); I suspect switching off entirely # is good for some of those things sk_space['dropout'] = [0, 0.1, 0.25] sk_space['l1_reg'] = [0.0] # (1e-10, 1e-2, 'log-uniform') sk_space['l2_reg'] = (1e-5, 1e-2, 'log-uniform') sk_space['target_rollouts_per_epoch'] = (30, 150) if arch_mod.TEACHER_PLANNER == 'ssipp': # only relevant for SSiPP # (originally I had both h-add and lm-cut as options, but lm-cut didn't # seem to help much, so I'm leaving it out) sk_space['ssipp_teacher_heuristic'] = ['h-add'] # using random forest b/c we have lots of discrete params, & a few # categorical sk_optimiser = Optimizer(list(sk_space.values()), base_estimator='RF') algo = SkOptSearch( sk_optimiser, sk_space.keys(), max_concurrent=max_par_trials, metric='coverage', mode='max') perform_trial = make_perform_trial(arch_mod, prob_mods) tune.run( perform_trial, search_alg=algo, local_dir=args.work_dir, resources_per_trial={"cpu": 0}, num_samples=1000)
def test_optimizer_base_estimator_string_smoke(base_estimator): opt = Optimizer([(-2.0, 2.0)], base_estimator=base_estimator, n_initial_points=1, acq_func="EI") opt.run(func=lambda x: x[0]**2, n_iter=3)
def test_optimizer_base_estimator_string_invalid(): with pytest.raises(ValueError) as e: Optimizer([(-2.0, 2.0)], base_estimator="rtr", n_initial_points=1) assert "'RF', 'ET', 'GP', 'GBRT' or 'DUMMY'" in str(e.value)