def test_reproducible_runs(strategy, surrogate): # two runs of the optimizer should yield exactly the same results optimizer = Optimizer(base_estimator=surrogate(random_state=1), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1) points = [] for i in range(n_steps): x = optimizer.ask(n_points, strategy) points.append(x) optimizer.tell(x, [branin(v) for v in x]) # the x's should be exaclty as they are in `points` optimizer = Optimizer(base_estimator=surrogate(random_state=1), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1) for i in range(n_steps): x = optimizer.ask(n_points, strategy) assert points[i] == x optimizer.tell(x, [branin(v) for v in x])
def test_reproducible_runs(strategy, surrogate): # two runs of the optimizer should yield exactly the same results optimizer = Optimizer( base_estimator=surrogate(random_state=1), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1 ) points = [] for i in range(n_steps): x = optimizer.ask(n_points, strategy) points.append(x) optimizer.tell(x, [branin(v) for v in x]) # the x's should be exaclty as they are in `points` optimizer = Optimizer( base_estimator=surrogate(random_state=1), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1 ) for i in range(n_steps): x = optimizer.ask(n_points, strategy) assert points[i] == x optimizer.tell(x, [branin(v) for v in x])
def create_guassian_process(): default_parameters_1 = [1e-3, 3, 200, "block", 10, 'relu', 64] default_parameters_2 = [1e-3, 3, 200, "bowtie", 10, 'relu', 64] default_parameters_3 = [1e-3, 3, 200, "diamond", 10, 'relu', 64] x0 = [] x0.append(default_parameters_1) x0.append(default_parameters_2) x0.append(default_parameters_3) optimizer = Optimizer(dimensions=dimensions, random_state=1, n_initial_points=3, base_estimator='gp') y = Parallel(n_jobs=3)(delayed(fitness)(v) for v in x0) optimizer.tell(x0, y) for run in range(20): x = optimizer.ask(n_points=3) y = Parallel(n_jobs=3)(delayed(fitness)(v) for v in x) print(str(val) for val in y) optimizer.tell(x, y) # gp_result = gp_minimize(func=fitness, # dimensions=dimensions, # n_calls=12, # noise= 0.01, # n_jobs=-1, # kappa = 5, # x0=default_parameters) results_file = open("df_gp_res.pickle", "wb") pickle.dump(optimizer, results_file)
def test_same_set_of_points_ask(strategy, surrogate): """ For n_points not None, tests whether two consecutive calls to ask return the same sets of points. Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer(base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=2) for i in range(n_steps): xa = optimizer.ask(n_points, strategy) xb = optimizer.ask(n_points, strategy) optimizer.tell(xa, [branin(v) for v in xa]) assert_equal(xa, xb) # check if the sets of points generated are equal
def test_constant_liar_runs(strategy, surrogate, acq_func): """ Tests whether the optimizer runs properly during the random initialization phase and beyond Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer(base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_func=acq_func, acq_optimizer='sampling', random_state=0) # test arguments check assert_raises(ValueError, optimizer.ask, {"strategy": "cl_maen"}) assert_raises(ValueError, optimizer.ask, {"n_points": "0"}) assert_raises(ValueError, optimizer.ask, {"n_points": 0}) for i in range(n_steps): x = optimizer.ask(n_points=n_points, strategy=strategy) # check if actually n_points was generated assert_equal(len(x), n_points) if "ps" in acq_func: optimizer.tell(x, [[branin(v), 1.1] for v in x]) else: optimizer.tell(x, [branin(v) for v in x])
class BayesianOptimizer(BaseAlgorithm): """Wrapper skopt's bayesian optimizer""" def __init__(self, space, **kwargs): super(BayesianOptimizer, self).__init__(space) self.optimizer = Optimizer( base_estimator=GaussianProcessRegressor(**kwargs), dimensions=convert_orion_space_to_skopt_space(space)) self.strategy = "cl_min" def suggest(self, num=1): """Suggest a `num`ber of new sets of parameters. Perform a step towards negative gradient and suggest that point. """ points = self.optimizer.ask(n_points=num, strategy=self.strategy) return points def observe(self, points, results): """Observe evaluation `results` corresponding to list of `points` in space. Save current point and gradient corresponding to this point. """ self.optimizer.tell(points, [r['objective'] for r in results]) @property def is_done(self): """Implement a terminating condition.""" return False
def test_all_points_different(strategy, surrogate): """ Tests whether the parallel optimizer always generates different points to evaluate. Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer(base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1) tolerance = 1e-3 # distance above which points are assumed same for i in range(n_steps): x = optimizer.ask(n_points, strategy) optimizer.tell(x, [branin(v) for v in x]) distances = pdist(x) assert all(distances > tolerance)
def optimize(cfgFilename): params = importJsonCfg(cfgFilename) BO_params = unpackVariables(params) varSpace = BO_params["Variables"] opt = Optimizer(varSpace, base_estimator=params["BaseEstimator"], acq_func=params["AcquisitionFunction"], acq_optimizer=params["AcquisitionOptimizer"]) optMaxIter = int(params["maxOptIter"]) pointLst = [] auprcs = [] if os.path.isfile("tempOpt.txt"): pointLst, auprcs = importFromFile(BO_params["VariableNames"], BO_params["FixedVars"]) for i in range(0, len(auprcs)): opt.tell(pointLst[i], auprcs[i]) shouldIContinue(pointLst, auprcs, optMaxIter, int(params["EarlyStoppingNBest"]), float(params["EarlyStoppingDelta"])) pt = opt.ask() pt = convertPoint(pt, BO_params["VariableNames"], BO_params["FixedVars"]) with open("tempOpt.txt", "a") as fout: fout.write(" ".join([str(x) for x in pt]))
def test_same_set_of_points_ask(strategy, surrogate): """ For n_points not None, tests whether two consecutive calls to ask return the same sets of points. Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer( base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=2 ) for i in range(n_steps): xa = optimizer.ask(n_points, strategy) xb = optimizer.ask(n_points, strategy) optimizer.tell(xa, [branin(v) for v in xa]) assert_equal(xa, xb) # check if the sets of points generated are equal
def job(loss): scorer = SklearnScorer( X, y, words, postfx, rules_apply=0.8, max_endings=75 ) space = { 'alpha': (0.0001, 1.0, 'log-uniform'), 'l1_ratio': (0.001, 0.999), 'loss': [loss], 'epsilon': (0.001, 10.0, 'log-uniform'), 'threshold': (0.00001, 0.001, 'log-uniform'), } opt = Optimizer(point_aslist(space, space)) for i in range(128): p = opt.ask() p = point_asdict(space, p) f = scorer(p) opt.tell(point_aslist(space, p), f) print(f) print(i, scorer.best_obj, scorer.best_params) import json json.dump(scorer.result, open(loss + '.json', 'w'), indent=2, sort_keys=True)
def test_constant_liar_runs(strategy, surrogate, acq_func): """ Tests whether the optimizer runs properly during the random initialization phase and beyond Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer( base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_func=acq_func, acq_optimizer='sampling', random_state=0 ) # test arguments check assert_raises(ValueError, optimizer.ask, {"strategy": "cl_maen"}) assert_raises(ValueError, optimizer.ask, {"n_points": "0"}) assert_raises(ValueError, optimizer.ask, {"n_points": 0}) for i in range(n_steps): x = optimizer.ask(n_points=n_points, strategy=strategy) # check if actually n_points was generated assert_equal(len(x), n_points) if "ps" in acq_func: optimizer.tell(x, [[branin(v), 1.1] for v in x]) else: optimizer.tell(x, [branin(v) for v in x])
def evaluate_optimizer(surrogate, model, dataset, n_calls, random_state): """ Evaluates some estimator for the task of optimization of parameters of some model, given limited number of model evaluations. Parameters ---------- * `surrogate`: Estimator to use for optimization. * `model`: scikit-learn estimator. sklearn estimator used for parameter tuning. * `dataset`: str Name of dataset to train ML model on. * `n_calls`: int Budget of evaluations * `random_state`: seed Set the random number generator in numpy. Returns ------- * `trace`: list of tuples (p, f(p), best), where p is a dictionary of the form "param name":value, and f(p) is performance achieved by the model for configuration p and best is the best value till that index. Such a list contains history of execution of optimization. """ # below seed is necessary for processes which fork at the same time # so that random numbers generated in processes are different np.random.seed(random_state) problem = MLBench(model, dataset) space = problem.space # initialization estimator = surrogate(random_state=random_state) dimensions_names = sorted(space) dimensions = [space[d][0] for d in dimensions_names] solver = Optimizer(dimensions, estimator, random_state=random_state) trace = [] best_y = np.inf # optimization loop for i in range(n_calls): point_list = solver.ask() # convert list of dimension values to dictionary point_dct = dict(zip(dimensions_names, point_list)) # the result of "evaluate" is accuracy / r^2, which is the more the better objective_at_point = -problem.evaluate(point_dct) if best_y > objective_at_point: best_y = objective_at_point # remember the point, objective pair trace.append((point_dct, objective_at_point, best_y)) print("Evaluation no. " + str(i + 1)) solver.tell(point_list, objective_at_point) return trace
def test_dict_list_space_representation(): """ Tests whether the conversion of the dictionary and list representation of a point from a search space works properly. """ chef_space = { 'Cooking time': (0, 1200), # in minutes 'Main ingredient': [ 'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory' ], 'Secondary ingredient': [ 'love', 'passion', 'dedication' ], 'Cooking temperature': (-273.16, 10000.0) # in Celsius } opt = Optimizer(dimensions=dimensions_aslist(chef_space)) point = opt.ask() # check if the back transformed point and original one are equivalent assert_equal( point, point_aslist(chef_space, point_asdict(chef_space, point)) )
def test_all_points_different(strategy, surrogate): """ Tests whether the parallel optimizer always generates different points to evaluate. Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer( base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1 ) tolerance = 1e-3 # distance above which points are assumed same for i in range(n_steps): x = optimizer.ask(n_points, strategy) optimizer.tell(x, [branin(v) for v in x]) distances = pdist(x) assert all(distances > tolerance)
class GPOptimizer(HyperParamSearch): def __init__(self, space, samples, random_state=1): super().__init__(space) self._num_samples = samples self.optimizer = Optimizer(dimensions=space, random_state=1, base_estimator="GP", acq_optimizer="auto", n_initial_points=10) self.asked = 0 def tell(self, args, train_eval, validation_eval, test_eval, model_dir): super().tell(args, train_eval, validation_eval, test_eval, model_dir) self.optimizer.tell(args, -validation_eval["ll_mean"]) def ask(self): self.asked += 1 if self.asked <= self._num_samples: return self.optimizer.ask() else: raise StopIteration @property def num_samples(self): return self._num_samples
def do_bayesian_optimization(fitness, dimensions, default_parameters, timesteps, n_seq): print("START BAYESIAN OPTIMIZATION") print("fitness", fitness) print("dimensions", len(dimensions)) print("default parameters", len(default_parameters)) param = [] opt = Optimizer(dimensions=dimensions, acq_func='EIps') n_calls = 11 i = 0 res = [] while i != n_calls: next_x = list() if i == 0: next_x = default_parameters else: next_x = opt.ask() while verifyConditions(next_x, timesteps, n_seq) == False: next_x = opt.ask() if verifyConditions(next_x, timesteps, n_seq): f_val = fitness(next_x) res = opt.tell(next_x, f_val) i += 1 clear_session() return res """ es = DeltaYStopper(0.01) gp_result = gp_minimize(func=fitness, dimensions=dimensions, n_calls=11, noise= 0.01, n_jobs=-1, x0=default_parameters, callback=es, random_state=12, acq_func="EIps") print("END BAYESIAN OPTIMIZATION") param = gp_result.x clear_session() """ return param
class Optimizer: def __init__(self, model, featureset, target, validator, goal='maximize', search_spaces=None): raise NotImplementedError if isinstance(search_spaces, type(None)): self.search_spaces = type(model).search_spaces else: self.search_spaces = search_spaces self.parameter_names = list(self.search_spaces.keys()) self.value_spaces = list(self.search_spaces.values()) self.opt = SKOptimizer(self.value_spaces) self.model = model self.featureset = featureset self.target = target self.validator = validator self.goal = goal if self.goal == 'maximize': self.coeff = -1 elif self.goal == 'minimize': self.coeff = 1 else: raise ValueError( 'Goal should be either to maximize or minimize objective.') def optimize(self, n_iters): for i in range(n_iters): pt = self.opt.ask() self.model.params = { k: v for k, v in zip(self.parameter_names, pt) } val = self.coeff * self.validator.score(self.model, self.featureset) clear_output(True) plot_convergence(self.opt.tell(pt, val)) plt.show() def plot_objective(self): res = create_result(Xi=self.opt.Xi, yi=self.opt.yi, space=self.opt.space, rng=self.opt.rng, models=self.opt.models) plot_objective(res, dimensions=self.parameter_names) plt.show() def plot_evaluations(self): res = create_result(Xi=self.opt.Xi, yi=self.opt.yi, space=self.opt.space, rng=self.opt.rng, models=self.opt.models) plot_objective(res, dimensions=self.parameter_names) plt.show()
def test_purely_categorical_space(): # Test reproduces the bug in #908, make sure it doesn't come back dims = [Categorical(['a', 'b', 'c']), Categorical(['A', 'B', 'C'])] optimizer = Optimizer(dims, n_initial_points=1, random_state=3) x = optimizer.ask() # before the fix this call raised an exception optimizer.tell(x, 1.)
class SKOptTuner(Tuner): """Bayesian Optimizer.""" def __init__(self, pipeline_hyperparameter_ranges, random_state=0): """Init SkOptTuner Arguments: pipeline_hyperparameter_ranges (dict): A set of hyperparameter ranges corresponding to a pipeline's parameters random_state (int): The random state. Defaults to 0. """ super().__init__(pipeline_hyperparameter_ranges, random_state=random_state) self.opt = Optimizer(self._search_space_ranges, "ET", acq_optimizer="sampling", random_state=random_state) def add(self, pipeline_parameters, score): """Add score to sample Arguments: pipeline_parameters (dict): A dict of the parameters used to evaluate a pipeline score (float): The score obtained by evaluating the pipeline with the provided parameters Returns: None """ # skip adding nan scores if pd.isnull(score): return flat_parameter_values = self._convert_to_flat_parameters( pipeline_parameters) try: self.opt.tell(flat_parameter_values, score) except Exception as e: logger.debug( 'SKOpt tuner received error during add. Score: {}\nParameters: {}\nFlat parameter values: {}\nError: {}' .format(pipeline_parameters, score, flat_parameter_values, e)) if str( e ) == "'<=' not supported between instances of 'int' and 'NoneType'": msg = "Invalid parameters specified to SKOptTuner.add: parameters {} error {}" \ .format(pipeline_parameters, str(e)) logger.error(msg) raise ParameterError(msg) raise (e) def propose(self): """Returns a suggested set of parameters to train and score a pipeline with, based off the search space dimensions and prior samples. Returns: dict: Proposed pipeline parameters """ with warnings.catch_warnings(): warnings.simplefilter('ignore') if not len(self._search_space_ranges): return self._convert_to_pipeline_parameters({}) flat_parameters = self.opt.ask() return self._convert_to_pipeline_parameters(flat_parameters)
def runOptimizer(self): # if self.optimizer=='gp': # res = gp_minimize(self.getRuntime, self.domain, n_calls=self.budget, # n_random_starts=self.initial_samples) # elif self.optimizer=='gbrt': # res = gbrt_minimize(self.getRuntime, self.domain, n_calls=self.budget, # n_random_starts=self.initial_samples) # elif self.optimizer=='forest': # res = forest_minimize(self.getRuntime, self.domain, n_calls=self.budget, # n_random_starts=self.initial_samples) opt = Optimizer(self.domain, base_estimator=self.optimizer, n_random_starts=self.initial_samples, acq_optimizer="sampling", acq_func=self.acquisition_method, acq_func_kwargs=self.acq_kwargs #acq_optimizer_kwargs={'n_points': 100} ) count = 0 trails = list() results = list() min_x = list() min_val = 10000 pte = self.convert_points(self.points_to_evaluate) # print("Evaluating initial points") for point in pte: f_val = self.getObjectiveValue(point) count += 1 if f_val < min_val: min_val = f_val min_x = point trails.append(point) results.append(f_val) opt.tell(point, f_val) # print("Doing optimization runs") while count < self.budget: next_x = opt.ask() if next_x not in trails: f_val = self.getObjectiveValue(next_x) count += 1 if f_val < min_val: min_val = f_val min_x = next_x trails.append(next_x) results.append(f_val) else: f_val = results[trails.index(next_x)] opt.tell(next_x, f_val) best_parameters = dict() best_parameters['type'], best_parameters['size'], best_parameters[ 'num'] = self.convertToConfig(min_x) print(min_val, best_parameters) trials = pickleRead(self.trialsFile) return trials
def _step( self, optimizer: Optimizer, data: Dataset, metrics: List[str], cv: Any, n_jobs: int, verbose: int, ) -> Result: """ Performs a step in the Bayesian optimization Parameters ---------- optimizer: Optimizer An instance of skopt's Optimizer data: Dataset Instance of data to train on metrics: List of str List of metrics to calculate results for cv: Any Either a CV object from sklearn or an int to specify number of folds n_jobs Number of jobs to calculate in parallel verbose Verbosity level of the method Returns ------- Result """ params = optimizer.ask() params = [np.array(p).item() for p in params] # make lists into dictionaries params_dict = point_asdict(self.param_grid, params) estimator = clone(self.estimator).set_params(**params_dict) logger.info("Fitting estimator...") logger.debug("Fitting estimator %s", estimator) result = Result.from_estimator( estimator=estimator, data=data, metrics=metrics, cv=cv, n_jobs=n_jobs, verbose=verbose, ) logger.info("Result: %s", result) optimizer.tell([params], [-result.metrics[0].score]) return result
def evaluate_optimizer(surrogate, model, dataset, n_calls, random_state): """ Evaluates some estimator for the task of optimization of parameters of some model, given limited number of model evaluations. Parentheses on parameters are used to be able to run function with pool.map method. :param surrogate: Estimator to use for optimization. :param model: str, name of the ML model class to be used for parameter tuning :param dataset: str, name of dataset to train ML model on :param n_calls: a budget of evaluations :param random_state: random seed, used to set the random number generator in numpy :return: a list of paris (p, f(p)), where p is a dictionary of the form "param name":value, and f(p) is performance measure value achieved by the model for configuration p. Such list contains history of execution of optimization. """ # below seed is necessary for processes which fork at the same time # so that random numbers generated in processes are different np.random.seed(random_state) problem = MLBench(model, dataset, random_state=random_state) space = problem.model_description[MODEL_PARAMETERS] # initialization estimator = surrogate(random_state=random_state) dimensions_names = space.keys() dimensions = [space[k][0] for k in dimensions_names] solver = Optimizer(dimensions, estimator, random_state=random_state) trace = [] best_y = np.inf best_x = None # optimization loop for i in range(n_calls): point_list = solver.ask() point_dct = {k: v for k, v in zip(dimensions_names, point_list) } # convert list of dimension values to dictionary objective_at_point = -problem.evaluate( point_dct ) # the result of "evaluate" is accuracy / r^2, which is the more the better if best_y > objective_at_point: best_y = objective_at_point trace.append((point_dct, objective_at_point, best_y)) # remember the point, objective pair print("Eval. #" + str(i)) solver.tell(point_list, objective_at_point) return trace
class LeafOptimizer: FAIL = sys.float_info.max def __init__(self, nlist: List[TNode], rfsearch): self.nlist = nlist[:] self.path = "" self.minimum = self.FAIL odims = list() self.space = 1 for n in nlist: self.path += n.hash() odims += n.get_odims() for i in n.get_dims(): self.space = self.space * i optparam = {'kappa': 1.96} self.opt = Optimizer(odims, base_estimator='RF', acq_optimizer='sampling', acq_func='LCB', acq_func_kwargs=optparam) self.lv = self.FAIL self.l = 1 self.exp = 0 self.rfsearch = rfsearch self.reporter = rfsearch.reporter def execscript(self): psel = self.opt.ask() for i in range(len(psel)): if isinstance(psel[i], np.generic): psel[i] = np.asscalar(psel[i]) print(self.path, psel) pstate = {'global': dict(), 'local': dict()} b = 0 try: for n in self.nlist: pl = len(n.get_dims()) scr = n.compile_val(psel[b:(b + pl)]) b += pl if not runseg(self.reporter, scr, pstate): raise ScriptExit() self.lv = self.rfsearch.obj(self.reporter.metrics) self.rfsearch.update(self.nlist, psel) self.reporter.finalize(self.path, psel) except ScriptExit: self.lv = self.FAIL self.opt.tell(psel, self.lv) self.reporter.clear() if self.lv < self.minimum: self.minimum = self.lv return True return False
def bayesian_optimization(X, y, k_folds, random_state, model_space, model_creator, metric, evaluation_handler, args_handler=None, post_evaluation_handler=None, n_iterations=1000, extra_columns=[], n_parallel=1): dim_names, dims = model_space_to_dims(model_space) opt = Optimizer(dims, random_state=random_state) data = {column: [] for column in model_space.keys()} data['mean_score'] = [] for i in range(k_folds): data['score%d' % i] = [] for column in extra_columns: data[column] = [] model_idx = 0 for i in range(0, n_iterations, n_parallel): suggested = opt.ask(n_points=n_parallel) results = Parallel(n_jobs=n_parallel)( delayed(evaluate_single_point)( X, y, k_folds, random_state, point, model_creator, evaluation_handler, args_handler, dim_names, model_idx + parallel_offset) for point, parallel_offset in zip(suggested, range(n_parallel)) ) # save scores for model_args, model_mean_score, model_scores, results_model_idx, extra_values in results: for arg_name in data.keys(): if arg_name in model_args: data[arg_name].append(model_args[arg_name]) for column, value in extra_values.items(): data[column].append(value) data['mean_score'].append(model_mean_score) for i in range(k_folds): data['score%d' % i].append(model_scores[i]) if post_evaluation_handler is not None: post_evaluation_handler( results_model_idx, data, model_mean_score) opt.tell(suggested, [ metric.rank(model_mean_score) for model_args, model_mean_score, model_scores, results_model_idx, extra_values in results]) model_idx += n_parallel if post_evaluation_handler is not None: post_evaluation_handler(-model_idx, data, None) return pd.DataFrame(data=data)
def run_bayopt(conf): ''' Perform a bayesian optmization for the algorithms using -------- conf: dict Configuration dictionary ''' iterations = conf['optimize'][ 'iterations'] if 'optimize' in conf and 'iterations' in conf[ 'optimize'] else 100 start = conf['optimize'][ 'iterations_skip'] if 'optimize' in conf and 'iterations_skip' in conf[ 'optimize'] else 0 print('run opt with {} iterations starting at {}'.format( iterations, start)) globals = {} globals['results'] = [] for entry in conf['algorithms']: space_dict = generate_space(entry) # generate space for algorithm opt = Optimizer([values for k, values in space_dict.items()], n_initial_points=conf['optimize']['initial_points'] if 'optimize' in conf and 'initial_points' in conf['optimize'] else 10) for i in range(start, iterations): print('start bayesian test ', str(i)) suggested = opt.ask() params = {k: v for k, v in zip(space_dict.keys(), suggested)} algo_instance = create_algorithm_dict(entry, params) run_bayopt_single(conf, algo_instance, i, globals) res = globals['current'] opt.tell(suggested, -1 * res) global_results = {} for results in globals['results']: for key, value in results.items(): global_results[key] = value write_results_csv(global_results, conf)
def test_names_dimensions(): # Define objective def objective(x, noise_level=0.1): return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\ np.random.randn() * noise_level # Initialize Optimizer opt = Optimizer([(-2.0, 2.0)], n_initial_points=1) # Optimize for i in range(2): next_x = opt.ask() f_val = objective(next_x) res = opt.tell(next_x, f_val) # Plot results plots.plot_objective(res)
class SkoptBackend(Backend): """The scikit-optimize backend uses scikit-optimize for black box optimization.""" backend_name = "scikit-optimize" implemented_funcs = ( "choice", "randrange", "uniform", ) def __init__(self, examples, params, base_estimator="GP", **options): self.init_fallback_backend() if not params: self.current_values = {} return if isinstance(base_estimator, str): base_estimator = py_str(base_estimator) self.optimizer = Optimizer(create_dimensions(params), base_estimator, **options) if examples: self.tell_examples(examples, params) else: self.current_values = {} def tell_examples(self, new_examples, params): """Special method that allows fast updating of the backend with new examples.""" data_points, losses = split_examples(new_examples, params) self.result = self.optimizer.tell(data_points, losses) current_point = self.optimizer.ask() self.current_values = make_values(params, current_point) @property def space(self): """The space over which optimization was performed.""" return self.optimizer.space @property def model(self): """Get the most recently fit model.""" return self.optimizer.models[-1]
def __init__(self, examples, params, base_estimator="gp", **options): self.init_fallback_backend() if not examples: self.current_values = {} return data_points, losses = split_examples(examples, params) dimensions = [ create_dimension(name, func, *args) for name, (func, args, kwargs) in sorted_items(params) ] if isinstance(base_estimator, str): base_estimator = py_str(base_estimator) optimizer = Optimizer(dimensions, base_estimator, **options) optimizer.tell(data_points, losses) current_point = optimizer.ask() self.current_values = make_values(params, current_point)
def get_next_candidate(self, n_points): """Returns the next candidates for the skopt acquisition function Args: n_points (int): Number of candidates desired Returns: List of points that would be chosen by gp_minimize as next candidate """ # Negate y_values because skopt performs minimization instead of maximization y_values = [-y for y in self.y_values] optimizer = Optimizer( dimensions=self.search_space, base_estimator='gp', n_initial_points=len(self.x_values), acq_func='EI' ) optimizer.tell(self.x_values, y_values) # TODO Does this fit the values??? points = optimizer.ask(n_points=n_points) return self._to_dict_list(points)
class SkoptBackend(StandardBackend): """The scikit-optimize backend uses scikit-optimize for black box optimization.""" backend_name = "scikit-optimize" implemented_funcs = ("choice", "randrange", "uniform") @override def setup_backend(self, params, base_estimator="GP", n_initial_points=None, **options): """Special method to initialize the backend from params.""" self.params = params if isinstance(base_estimator, str): base_estimator = py_str(base_estimator) if n_initial_points is None: n_initial_points = guess_n_initial_points(params) self.optimizer = Optimizer(create_dimensions(params), base_estimator, n_initial_points=n_initial_points, **options) @override def tell_examples(self, new_examples): """Special method that allows fast updating of the backend with new examples.""" data_points, losses = split_examples(new_examples, self.params) self.result = self.optimizer.tell(data_points, losses) current_point = self.optimizer.ask() self.current_values = make_values(self.params, current_point) @property def space(self): """The space over which optimization was performed.""" return self.optimizer.space @property def model(self): """Get the most recently fit model.""" return self.optimizer.models[-1]
def skopt_main(): from skopt import Optimizer, dump, load, Space from skopt.learning import GaussianProcessRegressor from skopt.space import Real, Integer fname = 'optimizer-exp-pendulum-4.pkl' dims = [Integer(15, 500), Real(0.025, 0.1, prior="log-uniform")] try: optimizer = load(fname) optimizer.space = Space(dims) except: optimizer = Optimizer(dimensions=dims, random_state=1) n_jobs = 2 for i in range(3): pool = Pool(n_jobs, initializer=mute) x = optimizer.ask(n_points=n_jobs) # x is a list of n_points points print(x) y = pool.map(f, x) pool.close() optimizer.tell(x, y) print('Iteration %d. Best yi %.2f' % (i, min(optimizer.yi))) dump(optimizer, fname)
def submit(n, optimizer: Optimizer, opt_param_names, current_configs, param_space: ParamSpace, queue: Queue): """ Generate and submit n new configurations to a queue. Asks the optimizer for n new values to explore, creates configurations for those points and puts them in the given queue. Args: n: the number of configurations to be generated optimizer: the optimiser object from skopt with the model used for the suggested points to explore opt_param_names: the names for the parameters using the same order of the dimensions in the optimizer current_configs: current list of configurations (updated with the newly generated ones) param_space: parameter space which we can use to convert optimizer points to fully specified configurations queue: que multiprocessing queue in which we put the new configurations """ dims = opt_param_names xs = optimizer.ask(n_points=n) cfgs = [values_to_params(dict(zip(dims, x)), param_space) for x in xs] for i, c in enumerate(cfgs): c["id"] = i + len(current_configs) queue.put(c) current_configs += cfgs
class BayesianOptimizedExperimentQueue(ExperimentQueue): def __init__(self, dimensions_file: str, min_num_results_to_fit: int=8, lease_timout='2 days'): self.__all_experiments = pd.DataFrame() self.__all_experiments['status'] = [self.WAITING] * len(self.__all_experiments) self.__all_experiments['last_update'] = pd.Series(pd.Timestamp(float('NaN'))) self.__all_experiments['client'] = [""] * len(self.__all_experiments) self.__lease_duration = pd.to_timedelta(lease_timout) self.__leased_experiments = [] dims = self.__load_dimensions(dimensions_file) self.__dimension_names = list(dims.keys()) self.__dimensions = list(dims.values()) self.__min_num_results_to_fit = min_num_results_to_fit # Initialize dim_types = [check_dimension(d) for d in self.__dimensions] is_cat = all([isinstance(check_dimension(d), Categorical) for d in dim_types]) if is_cat: transformed_dims = [check_dimension(d, transform="identity") for d in self.__dimensions] else: transformed_dims = [] for dim_type, dim in zip(dim_types, self.__dimensions): if isinstance(dim_type, Categorical): transformed_dims.append(check_dimension(dim, transform="onehot")) # To make sure that GP operates in the [0, 1] space else: transformed_dims.append(check_dimension(dim, transform="normalize")) space = Space(transformed_dims) # Default GP cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) if is_cat: other_kernel = HammingKernel(length_scale=np.ones(space.transformed_n_dims)) acq_optimizer = "lbfgs" else: other_kernel = Matern( length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, random_state=None, alpha=0.0, noise='gaussian', n_restarts_optimizer=2) self.__opt = Optimizer(self.__dimensions, base_estimator, acq_optimizer="lbfgs", n_random_starts=100, acq_optimizer_kwargs=dict(n_points=10000)) @property def all_experiments(self) -> pd.DataFrame: """ :return: The PandasFrame containing the details for all the experiments in the queue. """ return self.__all_experiments @property def completed_percent(self) -> float: return 0. @property def leased_percent(self) -> float: return 0 @property def experiment_parameters(self) -> List: return self.__dimension_names def lease_new(self, client_name: str) -> Tuple[int, Dict]: """ Lease a new experiment lock. Select first any waiting experiments and then re-lease expired ones :param client_name: The name of the leasing client :return: a tuple (id, parameters) or None if nothing is available """ experiment_params = self.__opt.ask() if experiment_params in self.__leased_experiments: experiment_params = self.__compute_alternative_params() self.__leased_experiments.append(experiment_params) # TODO: Add to all experiments, use Ids def parse_dim_val(value, dim_type): if type(dim_type) is Real: return float(value) elif type(dim_type) is Integer: return int(value) return value return {name: parse_dim_val(value, dim_type) for name, dim_type, value in zip(self.__dimension_names, self.__dimensions, experiment_params)}, -1 def __compute_alternative_params(self): # Copied directly from skopt transformed_bounds = np.array(self.__opt.space.transformed_bounds) est = clone(self.__opt.base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(self.__opt.space.transform(self.__opt.Xi), self.__opt.yi) X = self.__opt.space.transform(self.__opt.space.rvs( n_samples=self.__opt.n_points, random_state=self.__opt.rng)) values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi), acq_func='EI', acq_func_kwargs=dict(n_points=10000)) print('original point ei: %s' % np.min(values)) discount_width = .5 values = self.__discount_leased_params(X, values, discount_width) while np.min(values) > -1e-5 and discount_width > 1e-2: discount_width *= .9 values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi), acq_func='EI', acq_func_kwargs=dict(n_points=10000)) values = self.__discount_leased_params(X, values, discount_width) next_x = X[np.argmin(values)] print('new point ei: %s' % np.min(values)) if not self.__opt.space.is_categorical: next_x = np.clip(next_x, transformed_bounds[:, 0], transformed_bounds[:, 1]) return self.__opt.space.inverse_transform(next_x.reshape((1, -1)))[0] @staticmethod def leased_discount(center, width, x_values): """Triangular (cone) discount""" distance_from_center = np.linalg.norm(x_values - center, 2, axis=1) discount = -distance_from_center / width + 1 discount[discount < 0] = 0 return discount def __discount_leased_params(self, X, values, discount_width_size): transformed_leased_params = self.__opt.space.transform(np.array(self.__leased_experiments)) discount_factor = reduce(lambda x, y: x * y, (self.leased_discount(p, discount_width_size, X) for p in self.__leased_experiments), np.ones(values.shape[0])) out_vals = values * (1. - discount_factor) return out_vals def complete(self, experiment_id: int, parameters: Dict, client: str, result: float = 0) -> None: """ Declare an experiment to be completed. :param experiment_id: the id of the experiment or -1 if unknown :param client: the client id :param result: the output results of the experiment. This may be used in optimizing queues. """ parameters = [parameters[n] for n in self.__dimension_names] if parameters in self.__leased_experiments: self.__leased_experiments.remove(parameters) do_fit_model = len(self.__opt.yi) >= self.__min_num_results_to_fit # Unfortunate hack: this depends on the internals. if do_fit_model: self.__opt._n_random_starts = 0 # Since we have adequately many results, stop using random self.__opt.tell(parameters, result, fit=do_fit_model) def __load_dimensions(self, dimensions_file:str)->Dict: with open(dimensions_file) as f: dimensions = json.load(f) def parse_dimension(specs: Dict[str, Any]): if specs['type'] == 'Real': return specs['name'], Real(specs['low'], specs['high']) elif specs['type'] == 'Integer': return specs['name'], Integer(specs['low'], specs['high']) elif specs['type'] == 'Categorical': return specs['name'], Categorical(specs['categories']) else: raise Exception('Unrecognized dimension type %s' % specs['type']) return OrderedDict([parse_dimension(d) for d in dimensions])
class SkOptOptimizer(PhotonBaseOptimizer): def __init__( self, n_configurations: int = 20, acq_func: str = "gp_hedge", acq_func_kwargs: dict = None, ): self.optimizer = None self.hyperparameter_list = [] self.metric_to_optimize = "" self.ask = self.ask_generator() self.n_configurations = n_configurations self.acq_func = acq_func self.acq_func_kwargs = acq_func_kwargs self.maximize_metric = True self.constant_dictionary = {} def prepare(self, pipeline_elements: list, maximize_metric: bool): self.hyperparameter_list = [] self.maximize_metric = maximize_metric # build space space = [] for pipe_element in pipeline_elements: if hasattr(pipe_element, "hyperparameters"): for name, value in pipe_element.hyperparameters.items(): # if we only have one value we do not need to optimize if isinstance(value, list) and len(value) < 2: self.constant_dictionary[name] = value[0] continue if isinstance(value, PhotonCategorical) and len(value.values) < 2: self.constant_dictionary[name] = value.values[0] continue skopt_param = self._convert_PHOTON_to_skopt_space( value, name) if skopt_param is not None: space.append(skopt_param) if len(space) == 0: logger.warn( "Did not find any hyperparameters to convert into skopt space") self.optimizer = None else: self.optimizer = Optimizer( space, "ET", acq_func=self.acq_func, acq_func_kwargs=self.acq_func_kwargs, ) self.ask = self.ask_generator() def _convert_PHOTON_to_skopt_space(self, hyperparam: object, name: str): if not hyperparam: return None self.hyperparameter_list.append(name) if isinstance(hyperparam, PhotonCategorical): return skoptCategorical(hyperparam.values, name=name) elif isinstance(hyperparam, list): return skoptCategorical(hyperparam, name=name) elif isinstance(hyperparam, FloatRange): if hyperparam.range_type == "linspace": return Real(hyperparam.start, hyperparam.stop, name=name, prior="uniform") elif hyperparam.range_type == "logspace": return Real(hyperparam.start, hyperparam.stop, name=name, prior="log-uniform") else: return Real(hyperparam.start, hyperparam.stop, name=name) elif isinstance(hyperparam, IntegerRange): return Integer(hyperparam.start, hyperparam.stop, name=name) def ask_generator(self): if self.optimizer is None: yield {} else: for i in range(self.n_configurations): next_config_list = self.optimizer.ask() next_config_dict = { self.hyperparameter_list[number]: self._convert_to_native(value) for number, value in enumerate(next_config_list) } yield next_config_dict def _convert_to_native(self, obj): # check if we have a numpy object, if so convert it to python native if type(obj).__module__ == np.__name__: return np.asscalar(obj) else: return obj def tell(self, config, performance): # convert dictionary to list in correct order if self.optimizer is not None: config_values = [config[name] for name in self.hyperparameter_list] best_config_metric_performance = performance[1] if self.maximize_metric: if isinstance(best_config_metric_performance, list): print("BEST CONFIG METRIC PERFORMANCE: " + str(best_config_metric_performance)) best_config_metric_performance = best_config_metric_performance[ 0] best_config_metric_performance = -best_config_metric_performance # random_accuracy = np.random.randn(1)[0] self.optimizer.tell(config_values, best_config_metric_performance) def plot_evaluations(self): results = SkoptResults() results.space = self.optimizer.space results.x_iters = self.optimizer.Xi results = self._convert_categorical_hyperparameters(results) results.x = results.x_iters[np.argmin(self.optimizer.yi)] plt.figure(figsize=(10, 10)) return plot_evaluations(results) def plot_objective(self): results = SkoptResults() results.space = self.optimizer.space results.x_iters = self.optimizer.Xi results = self._convert_categorical_hyperparameters(results) results.x = results.x_iters[np.argmin(self.optimizer.yi)] results.models = self.optimizer.models plt.figure(figsize=(10, 10)) return plot_objective(results) def _convert_categorical_hyperparameters(self, results): parameter_types = list() for i, dim in enumerate(results.space.dimensions): if isinstance(dim, skoptCategorical): parameter_types.append(dim.transformer) setattr(results.space.dimensions[i], "categories", dim.transformed_bounds) else: parameter_types.append(False) for i, xs in enumerate(results.x_iters): for k, xsk in enumerate(xs): if parameter_types[k]: results.x_iters[i][k] = parameter_types[k].transform([xsk]) return results