Esempio n. 1
0
    def _create_acquisition_function(self, name, model, **kwargs):
        if not name in ["UCB", "EntropySearch", "MinimalRegretSearch",
                        "GREEDY", "RANDOM"]:
            raise ValueError("%s acquisition function not supported."
                             % name)

        return create_acquisition_function(name, model, **kwargs)
Esempio n. 2
0
    def init(self, dimension):
        self.dimension = dimension

        # Create surrogate model, acquisition function and Bayesian optimizer
        self.model = \
            GaussianProcessModel(random_state=self.rng, **self.gp_kwargs)

        self.acquisition_function = \
            create_acquisition_function(self.acquisition_function, self.model,
                                        **self.acq_fct_kwargs)

        if len(self.boundaries) == 1:
            self.boundaries = np.array(self.boundaries * self.dimension)
        elif len(self.boundaries) == self.dimension:
            self.boundaries = np.array(self.boundaries)
        else:
            raise Exception("Boundaries not specified for all dimensions")

        if self.bo_type == "bo":
            BoClass = BayesianOptimizer
        elif self.bo_type == "rembo":
            BoClass = REMBOOptimizer
        elif self.bo_type == "interleaved_rembo":
            BoClass = InterleavedREMBOOptimizer
        else:
            raise Exception("Unknown bo_type %s" % self.bo_type)
        self.bayes_opt = BoClass(
            model=self.model, acquisition_function=self.acquisition_function,
            optimizer=self.optimizer, n_dims=self.dimension,
            data_space=self.boundaries, maxf=self.kwargs.pop("maxf", 100),
            random_state=self.rng, **self.kwargs)
Esempio n. 3
0
    def init(self, dimension):
        self.dimension = dimension

        # Create surrogate model, acquisition function and Bayesian optimizer
        self.model = \
            GaussianProcessModel(random_state=self.rng, **self.gp_kwargs)

        self.acquisition_function = \
            create_acquisition_function(self.acquisition_function, self.model,
                                        **self.acq_fct_kwargs)

        if len(self.boundaries) == 1:
            self.boundaries = np.array(self.boundaries * self.dimension)
        elif len(self.boundaries) == self.dimension:
            self.boundaries = np.array(self.boundaries)
        else:
            raise Exception("Boundaries not specified for all dimensions")

        if self.bo_type == "bo":
            BoClass = BayesianOptimizer
        elif self.bo_type == "rembo":
            BoClass = REMBOOptimizer
        elif self.bo_type == "interleaved_rembo":
            BoClass = InterleavedREMBOOptimizer
        else:
            raise Exception("Unknown bo_type %s" % self.bo_type)
        self.bayes_opt = BoClass(
            model=self.model,
            acquisition_function=self.acquisition_function,
            optimizer=self.optimizer,
            n_dims=self.dimension,
            data_space=self.boundaries,
            maxf=self.kwargs.pop("maxf", 100),
            random_state=self.rng,
            **self.kwargs)
Esempio n. 4
0
    def _create_acquisition_function(self, name, model, **kwargs):
        if not name in [
                "UCB", "EntropySearch", "MinimalRegretSearch", "GREEDY",
                "RANDOM"
        ]:
            raise ValueError("%s acquisition function not supported." % name)

        return create_acquisition_function(name, model, **kwargs)
Esempio n. 5
0
    def best_policy(self, maxfun=15000, variance=0.01,
                    training=["model-free", "model_based"]):
        """Returns the best (greedy) policy learned so far.

        Parameters
        ----------
        maxfun : int (default: 50000)
            How many function evaluations are used for model-based policy
            training. Only relevant if policy is not None.

        variance: float, optional (default: 0.01)
            The initial exploration variance of CMA-ES in the model-based
            policy training. Only relevant if policy is not None

        training : list (default: ["model-free", "model-based"])
            How the policy is trained from data. If "model-free" is in the list
            a CREPS-based training is performed. If "model-based" is in the
            list, a model-based training is performed in the model by
            simulating rollouts. If both are in the list, first model-free
            training and then model-based fine-tuning is performed
        """
        if self.policy is not None and training != []:
            if self.policy_fitted:  # return already learned policy
                return self.policy
            assert "model-free" in training or "model-based" in training, \
                "training must contain either 'model-free' or 'model-based'"
            X = np.asarray(self.bayes_opt.X_)
            contexts = X[:, :self.context_dims]
            parameters = X[:, self.context_dims:]
            returns = np.asarray(self.bayes_opt.y_)
            # Perform training
            if "model-free" in training:
                self.policy = model_free_policy_training(
                    self.policy, contexts, parameters, returns,
                    epsilon=1.0, min_eta=1e-6)
            if "model-based" in training:
                self.policy = model_based_policy_training(
                    self.policy, contexts, parameters, returns,
                    boundaries=self.boundaries,
                    policy_initialized="model-free" in training,
                    maxfun=maxfun, variance=variance,
                    model_conf=self.gp_kwargs)
            self.policy_fitted = True
            return self.policy
        else:
            # TODO return UpperLevelPolicy object
            greedy_optimizer = deepcopy(self.bayes_opt)
            greedy_optimizer.acquisition_function = \
                create_acquisition_function("GREEDY", self.model)
            def non_parametric_policy(c, explore):
                return self._determine_next_query_point(c, greedy_optimizer)
            return non_parametric_policy
Esempio n. 6
0
 def _create_acquisition_function(self, name, model, **kwargs):
     if not name in ["ContextualEntropySearch",
                     "ContextualEntropySearchLocal"]:
         raise ValueError("%s acquisition function not supported."
                          % name)
     return create_acquisition_function(name, model, **kwargs)
Esempio n. 7
0
 def _create_acquisition_function(self, name, model, **kwargs):
     if not name in [
             "ContextualEntropySearch", "ContextualEntropySearchLocal"
     ]:
         raise ValueError("%s acquisition function not supported." % name)
     return create_acquisition_function(name, model, **kwargs)
Esempio n. 8
0
    def best_policy(self,
                    maxfun=15000,
                    variance=0.01,
                    training=["model-free", "model-based"]):
        """Returns the best (greedy) policy learned so far.

        Parameters
        ----------
        maxfun : int (default: 50000)
            How many function evaluations are used for model-based policy
            training. Only relevant if policy is not None.

        variance: float, optional (default: 0.01)
            The initial exploration variance of CMA-ES in the model-based
            policy training. Only relevant if policy is not None

        training : list (default: ["model-free", "model-based"])
            How the policy is trained from data. If "model-free" is in the list
            a CREPS-based training is performed. If "model-based" is in the
            list, a model-based training is performed in the model by
            simulating rollouts. If both are in the list, first model-free
            training and then model-based fine-tuning is performed
        """
        if self.policy is not None and training != []:
            if self.policy_fitted:  # return already learned policy
                return self.policy
            assert "model-free" in training or "model-based" in training, \
                "training must contain either 'model-free' or 'model-based'"
            X = np.asarray(self.bayes_opt.X_)
            contexts = X[:, :self.context_dims]
            parameters = X[:, self.context_dims:]
            returns = np.asarray(self.bayes_opt.y_)
            # Perform training
            if "model-free" in training:
                self.policy = model_free_policy_training(self.policy,
                                                         contexts,
                                                         parameters,
                                                         returns,
                                                         epsilon=1.0,
                                                         min_eta=1e-6)
            if "model-based" in training:
                self.policy = model_based_policy_training(
                    self.policy,
                    contexts,
                    parameters,
                    returns,
                    boundaries=self.boundaries,
                    policy_initialized="model-free" in training,
                    maxfun=maxfun,
                    variance=variance,
                    model_conf=self.gp_kwargs)
            self.policy_fitted = True
            return self.policy
        else:
            # TODO return UpperLevelPolicy object
            greedy_optimizer = deepcopy(self.bayes_opt)
            greedy_optimizer.acquisition_function = \
                create_acquisition_function("GREEDY", self.model)

            def non_parametric_policy(c, explore):
                return self._determine_next_query_point(c, greedy_optimizer)

            return non_parametric_policy