def _get_gp_est(space, **kwargs): from skopt.utils import Space from skopt.utils import normalize_dimensions from skopt.utils import ConstantKernel, HammingKernel, Matern from skopt.learning import GaussianProcessRegressor # Set space space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # If all dimensions are categorical, use Hamming kernel if space.is_categorical: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) base_estimator.set_params(**kwargs) return base_estimator
def prepareScoring( self, spaceSpec: typing.Tuple["skopt.space.space.Real", "skopt.space.space.Integer", "skopt.space.space.Categorical"] ) -> typing.Tuple[int, str, typing.Tuple["skopt.space.space.Real", "skopt.space.space.Integer", "skopt.space.space.Categorical"]]: from skopt.utils import cook_estimator, normalize_dimensions normalized = normalize_dimensions(spaceSpec) base_estimator = cook_estimator(self.__class__.skoptAlgo, space=normalized, random_state=None) optimizer = skopt.Optimizer( normalized, base_estimator, n_initial_points=0, acq_func=self.acquisitionType, acq_optimizer=self.acquisitionOptimizerType, acq_optimizer_kwargs={ "n_points": self.iters, "n_restarts_optimizer": self.nRestartsOptimizer, "n_jobs": self.jobs }, acq_func_kwargs={ "xi": self.chi, "kappa": self.kappa }) return (self.iters, "SKOpt (" + self.__class__.skoptAlgo + ")", optimizer)
def setup_tuner(self): self.tunecfg = self.experiment["tuner"] self.parameters = list(self.tunecfg["parameters"].keys()) self.dimensions = self.parse_dimensions(self.tunecfg["parameters"]) self.space = normalize_dimensions(self.dimensions) self.priors = self.parse_priors(self.tunecfg["priors"]) self.kernel = ConstantKernel( constant_value=self.tunecfg.get("variance_value", 0.1**2), constant_value_bounds=tuple( self.tunecfg.get("variance_bounds", (0.01**2, 0.5**2))), ) * Matern( length_scale=self.tunecfg.get("length_scale_value", 0.3), length_scale_bounds=tuple( self.tunecfg.get("length_scale_bounds", (0.2, 0.8))), nu=2.5, ) self.opt = Optimizer( dimensions=self.dimensions, n_points=self.tunecfg.get("n_points", 1000), n_initial_points=self.tunecfg.get("n_initial_points", 5 * len(self.dimensions)), gp_kernel=self.kernel, gp_kwargs=dict(normalize_y=True), gp_priors=self.priors, acq_func=self.tunecfg.get("acq_func", "ts"), acq_func_kwargs=self.tunecfg.get( "acq_func_kwargs", None), # TODO: Check if this works for all parameters random_state=self.rng.randint(0, np.iinfo(np.int32).max), )
def __init__( self, dimensions, n_points=500, n_initial_points=10, init_strategy="r2", gp_kernel=None, gp_kwargs=None, gp_priors=None, acq_func="pvrs", acq_func_kwargs=None, random_state=None, **kwargs ): self.rng = check_random_state(random_state) if callable(acq_func): self.acq_func = acq_func else: self.acq_func = ACQUISITION_FUNC[acq_func] if acq_func_kwargs is None: acq_func_kwargs = dict() self.acq_func_kwargs = acq_func_kwargs self.space = normalize_dimensions(dimensions) self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points self.init_strategy = init_strategy if self.init_strategy == "r2": self._initial_points = self.space.inverse_transform( r2_sequence(n=n_initial_points, d=self.space.n_dims) ) self.n_points = n_points if gp_kwargs is None: gp_kwargs = dict() if gp_kernel is None: # For now the default kernel is not adapted to the dimensions, # which is why a simple list is passed: gp_kernel = construct_default_kernel( list(range(self.space.transformed_n_dims)) ) self.gp = BayesGPR( kernel=gp_kernel, random_state=self.rng.randint(0, np.iinfo(np.int32).max), **gp_kwargs, ) # We are only able to guess priors now, since BayesGPR can add # another WhiteKernel, when noise is set to "gaussian": if gp_priors is None: gp_priors = guess_priors(self.gp.kernel) self.gp_priors = gp_priors self.Xi = [] self.yi = [] self.noisei = [] self._next_x = None
def __init__( self, dimensions, n_points=500, n_initial_points=10, init_strategy="sb", gp_kernel=None, gp_kwargs=None, gp_priors=None, acq_func="pvrs", acq_func_kwargs=None, random_state=None, **kwargs, ): self.rng = check_random_state(random_state) if callable(acq_func): self.acq_func = acq_func else: self.acq_func = ACQUISITION_FUNC[acq_func] if acq_func_kwargs is None: acq_func_kwargs = {} self.acq_func_kwargs = acq_func_kwargs self.space = normalize_dimensions(dimensions) self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points self.init_strategy = init_strategy if self.init_strategy == "r2": self._initial_points = self.space.inverse_transform( r2_sequence(n=n_initial_points, d=self.space.n_dims) ) elif self.init_strategy == "sb": self._init_rng = np.random.RandomState(self.rng.randint(2 ** 31)) self.n_points = n_points if gp_kwargs is None: gp_kwargs = {} if gp_kernel is None: # For now the default kernel is not adapted to the dimensions, # which is why a simple list is passed: gp_kernel = construct_default_kernel( list(range(self.space.transformed_n_dims)) ) self.gp = BayesGPR( kernel=gp_kernel, random_state=self.rng.randint(0, np.iinfo(np.int32).max), **gp_kwargs, ) self.gp_priors = gp_priors self.Xi = [] self.yi = [] self.noisei = [] self._next_x = None
def test_reduce_ranges(): space = normalize_dimensions([(0.0, 1.0), ("a", "b", "c")]) x = ((0.0, "a"), (1.01, "a"), (0.5, "d"), (1.0, "c")) y = (0.0, 1.0, 2.0, 3.0) noise = (0.1, 0.2, 0.3, 0.4) reduction_needed, x_new, y_new, noise_new = reduce_ranges(x, y, noise, space) assert reduction_needed assert tuple(x_new) == ((0.0, "a"), (1.0, "c")) assert tuple(y_new) == (0.0, 3.0) assert tuple(noise_new) == (0.1, 0.4)
def test_normalize_bounds(): bounds = [(-999, 189000), Categorical((True, False))] space = Space(normalize_dimensions(bounds)) for a in np.linspace(1e-9, 0.4999, 1000): x = space.inverse_transform([[a, a]]) check_limits(x[0][0], -999, 189000) y = space.transform(x) check_limits(y, 0., 1.) for a in np.linspace(0.50001, 1e-9 + 1., 1000): x = space.inverse_transform([[a, a]]) check_limits(x[0][0], -999, 189000) y = space.transform(x) check_limits(y, 0., 1.)
def create_opt(lines, ranker_name): gp_seed, opt_seed = get_seed(lines) _ranker_class = object_rankers[ranker_name] _ranker_class._use_early_stopping = True param_ranges = _ranker_class.set_tunable_parameter_ranges({}) transformed = [] for param in param_ranges: transformed.append(check_dimension(param)) space = normalize_dimensions(transformed) base_estimator = cook_estimator("GP", space=space, random_state=gp_seed, noise="gaussian") optimizer = Optimizer(dimensions=param_ranges, random_state=opt_seed, base_estimator=base_estimator) return optimizer
def set_optimizer(self, n_iter, opt_seed, acq_func, gp_seed, **kwargs): self.logger.info('Retrieving model stored at: {}'.format(self.optimizer_path)) try: optimizer = load(self.optimizer_path) self.logger.info('Loading model stored at: {}'.format(self.optimizer_path)) finished_iter = np.array(optimizer.yi).shape[0] if finished_iter == 0: optimizer = None self.logger.info('Optimizer did not finish any iterations so setting optimizer to null') except KeyError: self.logger.error('Cannot open the file {}'.format(self.optimizer_path)) optimizer = None except ValueError: self.logger.error('Cannot open the file {}'.format(self.optimizer_path)) optimizer = None except FileNotFoundError: self.logger.error('No such file or directory: {}'.format(self.optimizer_path)) optimizer = None if optimizer is not None: n_iter = n_iter - finished_iter if n_iter < 0: n_iter = 0 self.logger.info('Iterations already done: {} and running iterations {}'.format(finished_iter, n_iter)) self.opt = optimizer self.logger.debug('Setting the provided optimizer') self.log_best_params() else: transformed = [] for param in self.parameter_ranges: transformed.append(check_dimension(param)) self.logger.info("Parameter Space: {}".format(transformed)) norm_space = normalize_dimensions(transformed) self.logger.info("Parameter Space after transformation: {}".format(norm_space)) categorical_space = np.array([isinstance(s, Categorical) for s in norm_space]) self.logger.info("categorical_space: {}".format(categorical_space)) if np.all(categorical_space): base_estimator = cook_estimator("RF", space=norm_space, random_state=gp_seed) else: base_estimator = cook_estimator("GP", space=norm_space, random_state=gp_seed, noise="gaussian") self.opt = Optimizer(dimensions=self.parameter_ranges, random_state=opt_seed, base_estimator=base_estimator, acq_func=acq_func, **kwargs) return n_iter
def get_x0(flat_base_config, search_space): """ Extract a default point from the base configuration, replacing any invalid params. """ x0 = [ flat_base_config.get(clean_nested_key(k), 'None') for k in search_space ] # Check x0 is in the space before running it # and coerce it into the search space with random samples where invalid dimensions = list(search_space.values()) space = Space(normalize_dimensions(dimensions)) for i, (p,d) in enumerate(zip(x0, space.dimensions)): if not p in d: sample = d.rvs() print(f"{p} not in dimension: {d} with name:{d.name}, setting to sample:{sample}") logger.info(f"{p} not in dimension: {d} with name:{d.name}, setting to sample:{sample}") x0[i] = sample print('x0', x0) print('space', space) return x0
def _patched_gp_base_estimator(dimensions, random_state, noise): """Returns a GP non-y-normalizing GP estimator.""" import numpy as np from sklearn.utils import check_random_state from skopt.utils import normalize_dimensions space = normalize_dimensions(dimensions) rng = check_random_state(random_state) estimator = skopt.utils.cook_estimator( "GP", space=space, random_state=rng.randint(0, np.iinfo(np.int32).max), noise=noise, ) # The point of this function - setting normalize_y to False. estimator.normalize_y = False return estimator
def __init__(self, hyper_param_conf, command, expdir, exp_recipe_dir, recipe, computing, exp_proposal_watch_dir=None): base_estimator = 'GP' self.hyper_param_conf = hyper_param_conf self.command = command self.expdir = expdir self.exp_recipe_dir = exp_recipe_dir self.recipe = recipe self.computing = computing # read the hyper parameter file hyper_param_cfg = configparser.ConfigParser() hyper_param_cfg.read(hyper_param_conf) hyper_info = dict(hyper_param_cfg.items('info')) self.hyper_param_names = hyper_info['hyper_params'].split(' ') self.num_iters = int(hyper_info['num_iters']) self.n_initial_points = int(hyper_info['n_initial_points']) self.n_initial_points_to_start = int( hyper_info['n_initial_points_to_start']) self.max_parallel_jobs = int(hyper_info['max_parallel_jobs']) self.selected_segment_length = hyper_info['segment_length'] self.selected_task = hyper_info['task'] if 'adapt_hyper_param' in hyper_info: self.adapt_param = { 'param_name': hyper_info['adapt_hyper_param'], 'param_thr': int(hyper_info['param_thr']), 'par_cnt_scheme': hyper_info['par_cnt_scheme'] } else: self.adapt_param = None hyper_param_dict = dict() skopt_dims = [] for par_name in self.hyper_param_names: par_dict = dict(hyper_param_cfg.items(par_name)) par_type = par_dict['type'] if par_type == 'Integer': skopt_dim = skopt_space.Integer(low=int(par_dict['min']), high=int(par_dict['max']), name=par_name) elif par_type == 'Real': skopt_dim = skopt_space.Real(low=float(par_dict['min']), high=float(par_dict['max']), name=par_name) elif par_type == 'Categorical': skopt_dim = skopt_space.Categorical( categories=par_dict['categories'].split(' '), name=par_name) else: raise ValueError('Type %s is not a valid parameter type' % par_type) hyper_param_dict[par_name] = par_dict skopt_dims.append(skopt_dim) self.hyper_param_dict = hyper_param_dict self.skopt_dims = skopt_dims self.last_result = None # self.all_results = [] self.start_new_run_flag = True self.iter_ind = 0 self.watch_list = dict() self.all_dim_values = [] self.all_losses = dict() self.n_job_running = 0 self.n_initial_points_started = 0 self.n_unsuitable_points_for_estimator = 0 self.max_n_unsuitable_points_for_estimator = 10000 self.unsuitable_runs = [] self.lost_runs = [] self.exp_proposal_watch_dir = exp_proposal_watch_dir self.use_proposal_run = False self.proposed_loss_runs = [] # only 0.25% of the point sample in the hyper space are wanted (since they lead to rougly the wanted amount of # trainable parameters) self.acq_optimizer_kwargs = {'n_points': 4000000} if 'debug' in expdir: self.acq_optimizer_kwargs = {'n_points': 40000} if base_estimator == 'boundedGP': # Make own estimator based on Gaussian Process Regressor. if skopt_dims is not None: space = Space(skopt_dims) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = BoundedGaussianProcessRegressor( space, self.hyper_param_names, self.adapt_param, kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) super(HyperParamOptimizer, self).__init__(skopt_dims, base_estimator=base_estimator, n_initial_points=self.n_initial_points, acq_optimizer_kwargs=self.acq_optimizer_kwargs)
def test_normalize_dimensions_all_categorical(): dimensions = (['a', 'b', 'c'], ['1', '2', '3']) space = normalize_dimensions(dimensions) assert space.is_categorical
def fit(self, X, Y, total_duration=6e7, n_iter=100, cv_iter=None, optimizer=None, acq_func='gp_hedge', **kwargs): start = datetime.now() def splitter(itr): for train_idx, test_idx in itr: yield X[train_idx], Y[train_idx], X[test_idx], Y[test_idx] def splitter_dict(itr_dict): n_splits = len(list(itr_dict.values())[0]) for i in range(n_splits): X_train = dict() Y_train = dict() X_test = dict() Y_test = dict() for n_obj, itr in itr_dict.items(): train_idx = itr[i][0] test_idx = itr[i][1] X_train[n_obj] = np.copy(X[n_obj][train_idx]) X_test[n_obj] = np.copy(X[n_obj][test_idx]) Y_train[n_obj] = np.copy(Y[n_obj][train_idx]) Y_test[n_obj] = np.copy(Y[n_obj][test_idx]) yield X_train, Y_train, X_test, Y_test if cv_iter is None: cv_iter = ShuffleSplit(n_splits=3, test_size=0.1, random_state=self.random_state) if isinstance(X, dict): splits = dict() for n_obj, arr in X.items(): if arr.shape[0] == 1: splits[n_obj] = [([0], [0]) for i in range(cv_iter.n_splits)] else: splits[n_obj] = list(cv_iter.split(arr)) else: splits = list(cv_iter.split(X)) # Pre-compute splits for reuse # Here we fix a random seed for all simulations to correlate the random # streams: seed = self.random_state.randint(2**32, dtype='uint32') self.logger.debug( 'Random seed for the ranking algorithm: {}'.format(seed)) opt_seed = self.random_state.randint(2**32, dtype='uint32') self.logger.debug('Random seed for the optimizer: {}'.format(opt_seed)) gp_seed = self.random_state.randint(2**32, dtype='uint32') self.logger.debug( 'Random seed for the GP surrogate: {}'.format(gp_seed)) if optimizer is not None: opt = optimizer self.logger.debug('Setting the provided optimizer') self.log_best_params(opt) else: transformed = [] for param in self.parameter_ranges: transformed.append(check_dimension(param)) self.logger.info("Parameter Space: {}".format(transformed)) space = normalize_dimensions(transformed) self.logger.info( "Parameter Space after transformation: {}".format(space)) # Todo: Make this passable base_estimator = cook_estimator("GP", space=space, random_state=gp_seed, noise="gaussian") opt = Optimizer(dimensions=self.parameter_ranges, random_state=opt_seed, base_estimator=base_estimator, acq_func=acq_func, **kwargs) self._callbacks_set_optimizer(opt) self._callbacks_on_optimization_begin() time_taken = duration_tillnow(start) total_duration -= time_taken max_fit_duration = -10000 self.logger.info('Time left for {} iterations is {}'.format( n_iter, microsec_to_time(total_duration))) try: for t in range(n_iter): start = datetime.now() self._callbacks_on_iteration_begin(t) self.logger.info( 'Starting optimization iteration: {}'.format(t)) if t > 0: self.log_best_params(opt) next_point = opt.ask() self.logger.info('Next parameters:\n{}'.format(next_point)) results = [] running_times = [] if isinstance(X, dict): for X_train, Y_train, X_test, Y_test in splitter_dict( splits): result, time_taken = self._fit_ranker( X_train, Y_train, X_test, Y_test, next_point) running_times.append(time_taken) results.append(result) else: for X_train, Y_train, X_test, Y_test in splitter(splits): result, time_taken = self._fit_ranker( X_train, Y_train, X_test, Y_test, next_point) running_times.append(time_taken) results.append(result) results = np.array(results) running_times = np.array(running_times) mean_result = np.mean(results) mean_fitting_duration = np.mean(running_times) # Storing the maximum time to run the splitting model and adding the time for out of sample evaluation if max_fit_duration < np.sum(running_times): max_fit_duration = np.sum(running_times) self.logger.info( 'Validation error for the parameters is {:.4f}'.format( mean_result)) self.logger.info('Time taken for the parameters is {}'.format( microsec_to_time(np.sum(running_times)))) if "ps" in opt.acq_func: opt.tell(next_point, [mean_result, mean_fitting_duration]) else: opt.tell(next_point, mean_result) self._callbacks_on_iteration_end(t) self.logger.info( "Main optimizer iterations done {} and saving the model". format(np.array(opt.yi).shape[0])) dump(opt, self.optimizer_path) time_taken = duration_tillnow(start) total_duration -= time_taken self.logger.info('Time left for simulations is {} '.format( microsec_to_time(total_duration))) if (total_duration - max_fit_duration) < 0: self.logger.info( 'At iteration {} maximum time required by model to validate a parameter values' .format(microsec_to_time(max_fit_duration))) self.logger.info( 'At iteration {} simulation stops, due to time deficiency' .format(t)) break except KeyboardInterrupt: self.logger.debug( 'Optimizer interrupted saving the model at {}'.format( self.optimizer_path)) self.log_best_params(opt) else: self.logger.debug( 'Finally, fit a model on the complete training set and storing the model at {}' .format(self.optimizer_path)) self._fit_params["epochs"] = self._fit_params.get("epochs", 1000) if "ps" in opt.acq_func: best_point = opt.Xi[np.argmin(np.array(opt.yi)[:, 0])] else: best_point = opt.Xi[np.argmin(opt.yi)] self._set_new_parameters(best_point) self.model = copy.copy(self.ranker) self.model.fit(X, Y, **self._fit_params) finally: self._callbacks_on_optimization_end() self.optimizer = opt if np.array(opt.yi).shape[0] != 0: dump(opt, self.optimizer_path)
def test_categoricals_mixed_types(): domain = [[1, 2, 3, 4], ['a', 'b', 'c'], [True, False]] x = [1, 'a', True] space = normalize_dimensions(domain) assert (space.inverse_transform(space.transform([x])) == [x])
def run(self): """start the tuning process""" def objective(var): """objective method receive the benchmark result and send the next parameters""" iter_result = {} option = [] for i, knob in enumerate(self.knobs): params[knob['name']] = var[i] if knob['dtype'] == 'string': option.append(knob['options'].index(var[i])) else: option.append(var[i]) iter_result["param"] = params self.child_conn.send(iter_result) result = self.child_conn.recv() x_num = 0.0 eval_list = result.split(',') for value in eval_list: num = float(value) x_num = x_num + num options.append(option) performance.append(x_num) return x_num params = {} options = [] performance = [] labels = [] estimator = None try: if self.engine == 'random' or self.engine == 'forest' or \ self.engine == 'gbrt' or self.engine == 'bayes' or self.engine == 'extraTrees': params_space = self.build_space() ref_x, ref_y = self.transfer() if len(ref_x) == 0: if len(self.ref) == 0: ref_x = None else: ref_x = self.ref ref_y = None if ref_x is not None and not isinstance( ref_x[0], (list, tuple)): ref_x = [ref_x] LOGGER.info('x0: %s', ref_x) LOGGER.info('y0: %s', ref_y) if ref_x is not None and isinstance(ref_x[0], (list, tuple)): self._n_random_starts = 0 if len(ref_x) >= self._n_random_starts \ else self._n_random_starts - len(ref_x) + 1 LOGGER.info('n_random_starts parameter is: %d', self._n_random_starts) LOGGER.info("Running performance evaluation.......") if self.engine == 'random': estimator = 'dummy' elif self.engine == 'forest': estimator = 'RF' elif self.engine == 'extraTrees': estimator = 'ET' elif self.engine == 'gbrt': estimator = 'GBRT' elif self.engine == 'bayes': params_space = normalize_dimensions(params_space) estimator = cook_estimator("GP", space=params_space, noise=self.noise) LOGGER.info("base_estimator is: %s", estimator) optimizer = baseOpt(dimensions=params_space, n_random_starts=self._n_random_starts, random_state=1, base_estimator=estimator) n_calls = self.max_eval # User suggested points at which to evaluate the objective first if ref_x and ref_y is None: ref_y = list(map(objective, ref_x)) LOGGER.info("ref_y is: %s", ref_y) # Pass user suggested initialisation points to the optimizer if ref_x: if not isinstance(ref_y, (collections.Iterable, numbers.Number)): raise ValueError( "`ref_y` should be an iterable or a scalar, " "got %s" % type(ref_y)) if len(ref_x) != len(ref_y): raise ValueError("`ref_x` and `ref_y` should " "have the same length") LOGGER.info("ref_x: %s", ref_x) LOGGER.info("ref_y: %s", ref_y) n_calls -= len(ref_y) ret = optimizer.tell(ref_x, ref_y) for i in range(n_calls): next_x = optimizer.ask() LOGGER.info("next_x: %s", next_x) LOGGER.info("Running performance evaluation.......") next_y = objective(next_x) LOGGER.info("next_y: %s", next_y) ret = optimizer.tell(next_x, next_y) LOGGER.info("finish (ref_x, ref_y) tell") elif self.engine == 'abtest': abtuning_manager = ABtestTuningManager(self.knobs, self.child_conn, self.split_count) options, performance = abtuning_manager.do_abtest_tuning_abtest( ) params = abtuning_manager.get_best_params() # convert string option into index options = abtuning_manager.get_options_index(options) elif self.engine == 'gridsearch': num_done = 0 if self.y_ref is not None: num_done = len(self.y_ref) gstuning_manager = GridSearchTuningManager( self.knobs, self.child_conn) options, performance = gstuning_manager.do_gridsearch(num_done) params, labels = gstuning_manager.get_best_params() # convert string option into index options = gstuning_manager.get_options_index(options) elif self.engine == 'lhs': from analysis.optimizer.knob_sampling_manager import KnobSamplingManager knobsampling_manager = KnobSamplingManager( self.knobs, self.child_conn, self.max_eval, self.split_count) options = knobsampling_manager.get_knob_samples() performance = knobsampling_manager.do_knob_sampling_test( options) params = knobsampling_manager.get_best_params( options, performance) options = knobsampling_manager.get_options_index(options) elif self.engine == 'tpe': from analysis.optimizer.tpe_optimizer import TPEOptimizer tpe_opt = TPEOptimizer(self.knobs, self.child_conn, self.max_eval) best_params = tpe_opt.tpe_minimize_tuning() final_param = {} final_param["finished"] = True final_param["param"] = best_params self.child_conn.send(final_param) return best_params elif self.engine == 'traverse': from analysis.optimizer.knob_traverse_manager import KnobTraverseManager default_values = [ p_nob['ref'] for _, p_nob in enumerate(self.knobs) ] knobtraverse_manager = KnobTraverseManager( self.knobs, self.child_conn, default_values) traverse_list = knobtraverse_manager.get_traverse_list() performance = knobtraverse_manager.get_traverse_performance( traverse_list) rank = knobtraverse_manager.get_traverse_rank(performance) final_param = { "rank": rank, "param": knobtraverse_manager.get_default_values(), "finished": True } self.child_conn.send(final_param) return final_param["param"] LOGGER.info("Minimization procedure has been completed.") except ValueError as value_error: LOGGER.error('Value Error: %s', repr(value_error)) self.child_conn.send(value_error) return None except RuntimeError as runtime_error: LOGGER.error('Runtime Error: %s', repr(runtime_error)) self.child_conn.send(runtime_error) return None except Exception as err: LOGGER.error('Unexpected Error: %s', repr(err)) self.child_conn.send(Exception("Unexpected Error:", repr(err))) return None for i, knob in enumerate(self.knobs): if estimator is not None: params[knob['name']] = ret.x[i] if self.engine != 'gridsearch': labels.append(knob['name']) LOGGER.info("Optimized result: %s", params) LOGGER.info("The optimized profile has been generated.") final_param = {} if self.sel_feature is True: if self.feature_selector == "wefs": wefs = WeightedEnsembleFeatureSelector() rank = wefs.get_ensemble_feature_importance( options, performance, labels) elif self.feature_selector == "vrfs": vrfs = VarianceReductionFeatureSelector() rank = vrfs.get_ensemble_feature_importance( options, performance, labels) final_param["rank"] = rank LOGGER.info( "The feature importances of current evaluation are: %s", rank) final_param["param"] = params final_param["finished"] = True self.child_conn.send(final_param) return params
def initialize_data( parameter_ranges: Sequence[Union[Sequence, Dimension]], data_path: Optional[str] = None, intermediate_data_path: Optional[str] = None, resume: bool = True, ) -> Tuple[list, list, list, int]: """Initialize data structures needed for tuning. Either empty or resumed from disk. Parameters ---------- parameter_ranges : Sequence of Dimension objects or tuples Parameter range specifications as expected by scikit-optimize. data_path : str or None, default=None Path to the file containing the data structures used for resuming. If None, no resuming will be performed. intermediate_data_path : str or None, default=None Path to the file containing the data structures used for resuming an unfinished experiment. If None, no resuming will be performed. resume : bool, default=True If True, fill the data structures with the the data from the given data_path. Otherwise return empty data structures. Returns ------- tuple consisting of list, list, list and int Returns the initialized data structures X, y, noise and iteration number. Raises ------ ValueError If the number of specified parameters is not matching the existing number of parameters in the data. """ logger = logging.getLogger() X = [] y = [] noise = [] point = [] iteration = 0 round = 0 counts_array = np.array([0, 0, 0, 0, 0]) if data_path is not None and resume: space = normalize_dimensions(parameter_ranges) path = pathlib.Path(data_path) intermediate_path = pathlib.Path(intermediate_data_path) if intermediate_path.exists(): with np.load(intermediate_path) as importa: round = importa["arr_0"] counts_array = importa["arr_1"] point = importa["arr_2"].tolist() if path.exists(): with np.load(path) as importa: X = importa["arr_0"].tolist() y = importa["arr_1"].tolist() noise = importa["arr_2"].tolist() if len(X[0]) != space.n_dims: raise ValueError( f"Number of parameters ({len(X[0])}) are not matching " f"the number of dimensions ({space.n_dims})." ) reduction_needed, X_reduced, y_reduced, noise_reduced = reduce_ranges( X, y, noise, space ) if reduction_needed: backup_path = path.parent / ( path.stem + f"_backup_{int(time.time())}" + path.suffix ) logger.warning( f"The parameter ranges are smaller than the existing data. " f"Some points will have to be discarded. " f"The original {len(X)} data points will be saved to " f"{backup_path}" ) np.savez_compressed( backup_path, np.array(X), np.array(y), np.array(noise) ) X = X_reduced y = y_reduced noise = noise_reduced iteration = len(X) return X, y, noise, iteration, round, counts_array, point
def test_normalize_dimensions(dimension, name): space = normalize_dimensions([dimension]) assert space.dimensions[0].name == name
def test_normalize_dimensions(dimensions, normalizations): space = normalize_dimensions(dimensions) for dimension, normalization in zip(space, normalizations): assert dimension.transform_ == normalization