def _create_acquisition_function(self, name, model, **kwargs): if not name in ["UCB", "EntropySearch", "MinimalRegretSearch", "GREEDY", "RANDOM"]: raise ValueError("%s acquisition function not supported." % name) return create_acquisition_function(name, model, **kwargs)
def init(self, dimension): self.dimension = dimension # Create surrogate model, acquisition function and Bayesian optimizer self.model = \ GaussianProcessModel(random_state=self.rng, **self.gp_kwargs) self.acquisition_function = \ create_acquisition_function(self.acquisition_function, self.model, **self.acq_fct_kwargs) if len(self.boundaries) == 1: self.boundaries = np.array(self.boundaries * self.dimension) elif len(self.boundaries) == self.dimension: self.boundaries = np.array(self.boundaries) else: raise Exception("Boundaries not specified for all dimensions") if self.bo_type == "bo": BoClass = BayesianOptimizer elif self.bo_type == "rembo": BoClass = REMBOOptimizer elif self.bo_type == "interleaved_rembo": BoClass = InterleavedREMBOOptimizer else: raise Exception("Unknown bo_type %s" % self.bo_type) self.bayes_opt = BoClass( model=self.model, acquisition_function=self.acquisition_function, optimizer=self.optimizer, n_dims=self.dimension, data_space=self.boundaries, maxf=self.kwargs.pop("maxf", 100), random_state=self.rng, **self.kwargs)
def _create_acquisition_function(self, name, model, **kwargs): if not name in [ "UCB", "EntropySearch", "MinimalRegretSearch", "GREEDY", "RANDOM" ]: raise ValueError("%s acquisition function not supported." % name) return create_acquisition_function(name, model, **kwargs)
def best_policy(self, maxfun=15000, variance=0.01, training=["model-free", "model_based"]): """Returns the best (greedy) policy learned so far. Parameters ---------- maxfun : int (default: 50000) How many function evaluations are used for model-based policy training. Only relevant if policy is not None. variance: float, optional (default: 0.01) The initial exploration variance of CMA-ES in the model-based policy training. Only relevant if policy is not None training : list (default: ["model-free", "model-based"]) How the policy is trained from data. If "model-free" is in the list a CREPS-based training is performed. If "model-based" is in the list, a model-based training is performed in the model by simulating rollouts. If both are in the list, first model-free training and then model-based fine-tuning is performed """ if self.policy is not None and training != []: if self.policy_fitted: # return already learned policy return self.policy assert "model-free" in training or "model-based" in training, \ "training must contain either 'model-free' or 'model-based'" X = np.asarray(self.bayes_opt.X_) contexts = X[:, :self.context_dims] parameters = X[:, self.context_dims:] returns = np.asarray(self.bayes_opt.y_) # Perform training if "model-free" in training: self.policy = model_free_policy_training( self.policy, contexts, parameters, returns, epsilon=1.0, min_eta=1e-6) if "model-based" in training: self.policy = model_based_policy_training( self.policy, contexts, parameters, returns, boundaries=self.boundaries, policy_initialized="model-free" in training, maxfun=maxfun, variance=variance, model_conf=self.gp_kwargs) self.policy_fitted = True return self.policy else: # TODO return UpperLevelPolicy object greedy_optimizer = deepcopy(self.bayes_opt) greedy_optimizer.acquisition_function = \ create_acquisition_function("GREEDY", self.model) def non_parametric_policy(c, explore): return self._determine_next_query_point(c, greedy_optimizer) return non_parametric_policy
def _create_acquisition_function(self, name, model, **kwargs): if not name in ["ContextualEntropySearch", "ContextualEntropySearchLocal"]: raise ValueError("%s acquisition function not supported." % name) return create_acquisition_function(name, model, **kwargs)
def _create_acquisition_function(self, name, model, **kwargs): if not name in [ "ContextualEntropySearch", "ContextualEntropySearchLocal" ]: raise ValueError("%s acquisition function not supported." % name) return create_acquisition_function(name, model, **kwargs)
def best_policy(self, maxfun=15000, variance=0.01, training=["model-free", "model-based"]): """Returns the best (greedy) policy learned so far. Parameters ---------- maxfun : int (default: 50000) How many function evaluations are used for model-based policy training. Only relevant if policy is not None. variance: float, optional (default: 0.01) The initial exploration variance of CMA-ES in the model-based policy training. Only relevant if policy is not None training : list (default: ["model-free", "model-based"]) How the policy is trained from data. If "model-free" is in the list a CREPS-based training is performed. If "model-based" is in the list, a model-based training is performed in the model by simulating rollouts. If both are in the list, first model-free training and then model-based fine-tuning is performed """ if self.policy is not None and training != []: if self.policy_fitted: # return already learned policy return self.policy assert "model-free" in training or "model-based" in training, \ "training must contain either 'model-free' or 'model-based'" X = np.asarray(self.bayes_opt.X_) contexts = X[:, :self.context_dims] parameters = X[:, self.context_dims:] returns = np.asarray(self.bayes_opt.y_) # Perform training if "model-free" in training: self.policy = model_free_policy_training(self.policy, contexts, parameters, returns, epsilon=1.0, min_eta=1e-6) if "model-based" in training: self.policy = model_based_policy_training( self.policy, contexts, parameters, returns, boundaries=self.boundaries, policy_initialized="model-free" in training, maxfun=maxfun, variance=variance, model_conf=self.gp_kwargs) self.policy_fitted = True return self.policy else: # TODO return UpperLevelPolicy object greedy_optimizer = deepcopy(self.bayes_opt) greedy_optimizer.acquisition_function = \ create_acquisition_function("GREEDY", self.model) def non_parametric_policy(c, explore): return self._determine_next_query_point(c, greedy_optimizer) return non_parametric_policy