def __init__(self, optimizer_arguments=None):
        """
        Initializes a bayesian optimizer.

        Parameters
        ----------
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self.logger = get_logger(self)
        if optimizer_arguments is None:
            optimizer_arguments = {}
        self.initial_random_runs = optimizer_arguments.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_arguments.get('random_state', None))
        self.acquisition_hyperparams = optimizer_arguments.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_arguments.get('num_gp_restarts',
                                                       self.num_gp_restarts)
        if not isinstance(optimizer_arguments.get('acquisition'),
                          AcquisitionFunction):
            self.acquisition_function = optimizer_arguments.get(
                'acquisition',
                ExpectedImprovement)(self.acquisition_hyperparams)
        else:
            self.acquisition_function = optimizer_arguments.get("acquisition")
        self.kernel_params = optimizer_arguments.get("kernel_params", {})
        self.kernel = optimizer_arguments.get("kernel", "matern52")
        self.random_searcher = RandomSearch(
            {"random_state": self.random_state})

        if mcmc_imported:
            self.mcmc = optimizer_arguments.get("mcmc", False)
        else:
            self.mcmc = False

        self.num_precomputed = optimizer_arguments.get('num_precomputed', 10)
        self.logger.info("Bayesian optimization initialized.")
Example #2
0
 def test_get_next_candidate(self):
     opt = RandomSearch({"initial_random_runs": 3})
     exp = Experiment("test", {"x": MinMaxNumericParamDef(0, 1)}, NominalParamDef(["A", "B", "C"]))
     for i in range(5):
         cand = opt.get_next_candidates(exp)[0]
         assert_true(isinstance(cand, Candidate))
         cand.result = 2
         exp.add_finished(cand)
     cands = opt.get_next_candidates(exp, num_candidates=3)
     assert_equal(len(cands), 3)
    def __init__(self, optimizer_arguments=None):
        """
        Initializes a bayesian optimizer.

        Parameters
        ----------
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self.logger = get_logger(self)
        if optimizer_arguments is None:
            optimizer_arguments = {}
        self.initial_random_runs = optimizer_arguments.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_arguments.get('random_state', None))
        self.acquisition_hyperparams = optimizer_arguments.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_arguments.get(
            'num_gp_restarts', self.num_gp_restarts)
        if not isinstance(optimizer_arguments.get('acquisition'), AcquisitionFunction):
            self.acquisition_function = optimizer_arguments.get(
                'acquisition', ExpectedImprovement)(self.acquisition_hyperparams)
        else:
            self.acquisition_function = optimizer_arguments.get("acquisition")
        self.kernel_params = optimizer_arguments.get("kernel_params", {})
        self.kernel = optimizer_arguments.get("kernel", "matern52")
        self.random_searcher = RandomSearch({"random_state": self.random_state})

        if mcmc_imported:
            self.mcmc = optimizer_arguments.get("mcmc", False)
        else:
            self.mcmc = False

        self.num_precomputed = optimizer_arguments.get('num_precomputed', 10)
        self.logger.info("Bayesian optimization initialized.")
class SimpleBayesianOptimizer(Optimizer):
    """
    This implements a simple bayesian optimizer.

    It is simple because it only implements the simplest form - no freeze-thaw,
    (currently) no multiple workers, only numeric parameters.

    Attributes
    ----------
    SUPPORTED_PARAM_TYPES : list of ParamDefs
        The supported parameter types. Currently only numberic and position.
    kernel : GPy Kernel
        The Kernel to be used with the gp.
    acquisition_function : acquisition_function
        The acquisition function to use
    acquisition_hyperparams :
        The acquisition hyperparameters.
    random_state : scipy random_state or int.
        The scipy random state or object to initialize one. For reproduction.
    random_searcher : RandomSearch
        The random search instance used to generate the first
        initial_random_runs candidates.
    gp : GPy gaussian process
        The gaussian process used here.
    initial_random_runs : int
        The number of initial random runs before using the GP. Default is 10.
    num_gp_restarts : int
        GPy's optimization requires restarts to find a good solution. This
        parameter controls this. Default is 10.
    logger: logger
        The logger instance for this object.
    """
    SUPPORTED_PARAM_TYPES = [NumericParamDef, PositionParamDef]

    kernel = None
    kernel_params = None
    acquisition_function = None
    acquisition_hyperparams = None

    random_state = None
    random_searcher = None

    gp = None
    mcmc = False
    initial_random_runs = 10
    num_gp_restarts = 10

    num_precomputed = None

    logger = None

    def __init__(self, optimizer_arguments=None):
        """
        Initializes a bayesian optimizer.

        Parameters
        ----------
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self.logger = get_logger(self)
        if optimizer_arguments is None:
            optimizer_arguments = {}
        self.initial_random_runs = optimizer_arguments.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_arguments.get('random_state', None))
        self.acquisition_hyperparams = optimizer_arguments.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_arguments.get('num_gp_restarts',
                                                       self.num_gp_restarts)
        if not isinstance(optimizer_arguments.get('acquisition'),
                          AcquisitionFunction):
            self.acquisition_function = optimizer_arguments.get(
                'acquisition',
                ExpectedImprovement)(self.acquisition_hyperparams)
        else:
            self.acquisition_function = optimizer_arguments.get("acquisition")
        self.kernel_params = optimizer_arguments.get("kernel_params", {})
        self.kernel = optimizer_arguments.get("kernel", "matern52")
        self.random_searcher = RandomSearch(
            {"random_state": self.random_state})

        if mcmc_imported:
            self.mcmc = optimizer_arguments.get("mcmc", False)
        else:
            self.mcmc = False

        self.num_precomputed = optimizer_arguments.get('num_precomputed', 10)
        self.logger.info("Bayesian optimization initialized.")

    def get_next_candidates(self, experiment, num_candidates=None):
        if num_candidates is None:
            num_candidates = self.num_precomputed
        #check whether a random search is necessary.
        if len(experiment.candidates_finished) < self.initial_random_runs:
            return self.random_searcher.get_next_candidates(
                experiment, num_candidates)

        self._refit(experiment)
        #TODO refitted must be set, too.
        candidates = []
        new_candidate_points = self.acquisition_function.compute_proposals(
            self.gp, experiment, number_proposals=num_candidates)

        for point_and_value in new_candidate_points:
            #get the the candidate point which is the first entry in the tuple.
            point_candidate = Candidate(
                experiment.warp_pt_out(point_and_value[0]))
            candidates.append(point_candidate)
        return candidates

    def _refit(self, experiment):
        """
        Refits the GP with the data from experiment.

        Parameters
        ----------
        experiment : experiment
            The experiment on which to refit this gp.
        """
        candidate_matrix = np.zeros((len(experiment.candidates_finished),
                                     len(experiment.parameter_definitions)))
        results_vector = np.zeros((len(experiment.candidates_finished), 1))

        param_names = sorted(experiment.parameter_definitions.keys())
        self.kernel = self._check_kernel(self.kernel,
                                         len(param_names),
                                         kernel_params=self.kernel_params)
        for i, c in enumerate(experiment.candidates_finished):
            warped_in = experiment.warp_pt_in(c.params)
            param_values = []
            for pn in param_names:
                param_values.append(warped_in[pn])
            candidate_matrix[i, :] = param_values
            results_vector[i] = c.result

        self.logger.debug("Refitting gp with cand %s and results %s" %
                          (candidate_matrix, results_vector))
        self.gp = GPy.models.GPRegression(candidate_matrix, results_vector,
                                          self.kernel)

        if self.mcmc:
            proposal = pm.MALAProposal(dt=1.)
            mcmc = pm.MetropolisHastings(self.gp,
                                         proposal=proposal,
                                         db_filename='apsis.h5')

            mcmc.sample(
                100000,  # Number of MCMC steps
                num_thin=100,  # Number of steps to skip
                num_burn=1000,  # Number of steps to burn initially
                verbose=True)

        else:
            self.gp.constrain_positive("*")
            self.gp.constrain_bounded(0.1, 1, warning=False)
            self.gp.optimize_restarts(num_restarts=self.num_gp_restarts,
                                      verbose=False)

    def _check_kernel(self, kernel, dimension, kernel_params):
        """
        Checks and initializes a kernel.

        Parameters
        ----------
        kernel : kernel or string representation
            The kernel to use. If a kernel, is returned like that. If not, a
            new kernel is initialized with the respective parameters.
        dimension : int
            The dimensions of the new kernel.
        kernel_params : dict
            The dictionary of kernel parameters. Currently supported:
            "ARD" : bool, optional
                Whether to use ARD. Default is True.

        Returns
        -------
        kernel : GPy.kern
            A GPy kernel.
        """
        if (isinstance(kernel, GPy.kern.Kern)):
            return kernel
        translation_dict = {"matern52": GPy.kern.Matern52, "rbf": GPy.kern.RBF}

        if isinstance(kernel, str) and kernel in translation_dict:
            if kernel_params.get('ARD', None) is None:
                kernel_params['ARD'] = True

            constructed_kernel = translation_dict[kernel](dimension,
                                                          **kernel_params)
            return constructed_kernel

        raise ValueError("%s is not a kernel or string representing one!" %
                         kernel)
Example #5
0
    def __init__(self, experiment, optimizer_params=None):
        """
        Initializes a bayesian optimizer.
        Parameters
        ----------
        experiment : Experiment
            The experiment for which to optimize.
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self._logger = get_logger(self)
        self._logger.debug(
            "Initializing bayesian optimizer. Experiment is %s,"
            " optimizer_params %s", experiment, optimizer_params)
        if optimizer_params is None:
            optimizer_params = {}

        self.random_state = optimizer_params.get("random_state", None)
        self.initial_random_runs = optimizer_params.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_params.get('random_state', None))
        self.acquisition_hyperparams = optimizer_params.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_params.get('num_gp_restarts',
                                                    self.num_gp_restarts)

        self._logger.debug(
            "Initialized relevant parameters. "
            "initial_random_runs is %s, random_state is %s, "
            "acquisition_hyperparams %s, num_gp_restarts %s",
            self.initial_random_runs, self.random_state,
            self.acquisition_hyperparams, self.num_gp_restarts)

        if not isinstance(optimizer_params.get('acquisition'),
                          AcquisitionFunction):
            self.acquisition_function = optimizer_params.get(
                "acquisition", ExpectedImprovement)
            self.acquisition_function = check_acquisition(
                acquisition=self.acquisition_function,
                acquisition_params=self.acquisition_hyperparams)
            self._logger.debug(
                "acquisition is no AcquisitionFunction. Set "
                "it to %s", self.acquisition_function)
        else:
            self.acquisition_function = optimizer_params.get("acquisition")
            self._logger.debug(
                "Loaded acquisition function from "
                "optimizer_params. Is %s", self.acquisition_function)
        self.kernel_params = optimizer_params.get("kernel_params", {})
        self.kernel = optimizer_params.get("kernel", "matern52")

        self._logger.debug("Kernel details: Kernel is %s, kernel_params %s",
                           self.kernel, self.kernel_params)

        self.random_searcher = RandomSearch(experiment, optimizer_params)
        self._logger.debug("Initialized required RandomSearcher; is %s",
                           self.random_searcher)
        Optimizer.__init__(self, experiment, optimizer_params)
        self._logger.debug("Finished initializing bayOpt.")
Example #6
0
class BayesianOptimizer(Optimizer):
    """
    This is a bayesian optimizer class.

    It is a subclass of Optimizer, and internally uses GPy.
    Currently, it supports Numeric and PositionParamDefs, with support for
    NominalParamDef needing to be integrated.

    Attributes
    ----------
    SUPPORTED_PARAM_TYPES : list of ParamDefs
        The supported parameter types. Currently only numeric and position.
    kernel : GPy Kernel
        The Kernel to be used with the gp.
    acquisition_function : acquisition_function
        The acquisition function to use
    acquisition_hyperparams :
        The acquisition hyperparameters.
    random_state : scipy random_state or int.
        The scipy random state or object to initialize one. For reproduction.
    random_searcher : RandomSearch
        The random search instance used to generate the first
        initial_random_runs candidates.
    gp : GPy gaussian process
        The gaussian process used here.
    initial_random_runs : int
        The number of initial random runs before using the GP. Default is 10.
    num_gp_restarts : int
        GPy's optimization requires restarts to find a good solution. This
        parameter controls this. Default is 10.
    logger: logger
        The logger instance for this object.
    """
    SUPPORTED_PARAM_TYPES = [NumericParamDef, NominalParamDef]

    kernel = None
    kernel_params = None
    acquisition_function = None
    acquisition_hyperparams = None

    random_state = None
    random_searcher = None

    gp = None
    initial_random_runs = 10
    num_gp_restarts = 10

    name = "BayOpt"
    return_max = True

    def __init__(self, experiment, optimizer_params=None):
        """
        Initializes a bayesian optimizer.
        Parameters
        ----------
        experiment : Experiment
            The experiment for which to optimize.
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self._logger = get_logger(self)
        self._logger.debug(
            "Initializing bayesian optimizer. Experiment is %s,"
            " optimizer_params %s", experiment, optimizer_params)
        if optimizer_params is None:
            optimizer_params = {}

        self.random_state = optimizer_params.get("random_state", None)
        self.initial_random_runs = optimizer_params.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_params.get('random_state', None))
        self.acquisition_hyperparams = optimizer_params.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_params.get('num_gp_restarts',
                                                    self.num_gp_restarts)

        self._logger.debug(
            "Initialized relevant parameters. "
            "initial_random_runs is %s, random_state is %s, "
            "acquisition_hyperparams %s, num_gp_restarts %s",
            self.initial_random_runs, self.random_state,
            self.acquisition_hyperparams, self.num_gp_restarts)

        if not isinstance(optimizer_params.get('acquisition'),
                          AcquisitionFunction):
            self.acquisition_function = optimizer_params.get(
                "acquisition", ExpectedImprovement)
            self.acquisition_function = check_acquisition(
                acquisition=self.acquisition_function,
                acquisition_params=self.acquisition_hyperparams)
            self._logger.debug(
                "acquisition is no AcquisitionFunction. Set "
                "it to %s", self.acquisition_function)
        else:
            self.acquisition_function = optimizer_params.get("acquisition")
            self._logger.debug(
                "Loaded acquisition function from "
                "optimizer_params. Is %s", self.acquisition_function)
        self.kernel_params = optimizer_params.get("kernel_params", {})
        self.kernel = optimizer_params.get("kernel", "matern52")

        self._logger.debug("Kernel details: Kernel is %s, kernel_params %s",
                           self.kernel, self.kernel_params)

        self.random_searcher = RandomSearch(experiment, optimizer_params)
        self._logger.debug("Initialized required RandomSearcher; is %s",
                           self.random_searcher)
        Optimizer.__init__(self, experiment, optimizer_params)
        self._logger.debug("Finished initializing bayOpt.")

    def get_next_candidates(self, num_candidates=1):
        self._logger.debug("Returning next %s candidates", num_candidates)
        if len(self._experiment.candidates_finished
               ) < self.initial_random_runs:
            # we do a random search.
            random_candidates = self.random_searcher.get_next_candidates(
                num_candidates)
            self._logger.debug("Still in the random run phase. Returning %s",
                               random_candidates)
            return random_candidates
        candidates = []
        if self.gp is None:
            self._logger.debug("No gp available. Updating with %s",
                               self._experiment)
            self.update(self._experiment)

        new_candidate_points = self.acquisition_function.compute_proposals(
            self.gp,
            self._experiment,
            number_proposals=num_candidates,
            return_max=self.return_max)
        self._logger.debug("Generated new candidate points. Are %s",
                           new_candidate_points)
        self.return_max = False

        for point_and_value in new_candidate_points:
            # get the the candidate point which is the first entry in the tuple.
            point_candidate = Candidate(
                self._experiment.warp_pt_out(point_and_value[0]))
            candidates.append(point_candidate)
        self._logger.debug("Candidates extracted. Returning %s", candidates)
        return candidates

    def update(self, experiment):
        self._logger.debug("Updating bayOpt with %s", experiment)
        self._experiment = experiment
        if (len(self._experiment.candidates_finished) <
                self.initial_random_runs):
            self._logger.debug("Less than initial_random_runs. No refit "
                               "necessary.")
            return

        self.return_max = True

        candidate_matrix, results_vector = acq_utils.create_cand_matrix_vector(
            experiment, self.treat_failed)

        self.kernel = self._check_kernel(self.kernel,
                                         candidate_matrix.shape[1],
                                         kernel_params=self.kernel_params)
        self._logger.debug("Checked kernel. Kernel is %s", self.kernel)

        self._logger.log(
            5, "Refitting gp with cand %s and results %s" %
            (candidate_matrix, results_vector))
        self.gp = GPy.models.GPRegression(candidate_matrix, results_vector,
                                          self.kernel)
        self.gp.constrain_positive("*")
        self.gp.constrain_bounded(0.1, 1, warning=False)
        self._logger.debug("Starting gp optimize.")
        self.gp.optimize_restarts(num_restarts=self.num_gp_restarts,
                                  verbose=False)
        self._logger.debug("gp optimize finished.")

    def _check_kernel(self, kernel, dimension, kernel_params):
        """
        Checks and initializes a kernel.

        Parameters
        ----------
        kernel : kernel or string representation
            The kernel to use. If a kernel, is returned like that. If not, a
            new kernel is initialized with the respective parameters.
        dimension : int
            The dimensions of the new kernel.
        kernel_params : dict
            The dictionary of kernel parameters. Currently supported:
            "ARD" : bool, optional
                Whether to use ARD. Default is True.

        Returns
        -------
        kernel : GPy.kern
            A GPy kernel.
        """
        self._logger.debug(
            "Checking kernel. Kernel is %s, dimension %s, "
            "kernel_params %s", kernel, dimension, kernel_params)
        if (isinstance(kernel, GPy.kern.Kern)):
            self._logger.debug("Already instance. No changes.")
            return kernel

        translation_dict = {"matern52": GPy.kern.Matern52, "rbf": GPy.kern.RBF}

        if isinstance(kernel, unicode):
            kernel = str(kernel)

        if isinstance(kernel, str) and kernel in translation_dict:
            self._logger.debug("Is string and can be translated. Kernel is %s",
                               kernel)
            if kernel_params.get('ARD', None) is None:
                self._logger.debug("ARD unknown, setting to True.")
                kernel_params['ARD'] = True

            constructed_kernel = translation_dict[kernel](dimension,
                                                          **kernel_params)
            self._logger.debug("Constructed kernel. Is %s", constructed_kernel)
            return constructed_kernel

        raise ValueError("%s is not a kernel or string representing one!" %
                         kernel)
Example #7
0
 def test_init(self):
     #test initialization
     opt = RandomSearch(None)
Example #8
0
class BayesianOptimizer(Optimizer):
    """
    This is a bayesian optimizer class.

    It is a subclass of Optimizer, and internally uses GPy.
    Currently, it supports Numeric and PositionParamDefs, with support for
    NominalParamDef needing to be integrated.

    Attributes
    ----------
    SUPPORTED_PARAM_TYPES : list of ParamDefs
        The supported parameter types. Currently only numeric and position.
    kernel : GPy Kernel
        The Kernel to be used with the gp.
    acquisition_function : acquisition_function
        The acquisition function to use
    acquisition_hyperparams :
        The acquisition hyperparameters.
    random_state : scipy random_state or int.
        The scipy random state or object to initialize one. For reproduction.
    random_searcher : RandomSearch
        The random search instance used to generate the first
        initial_random_runs candidates.
    gp : GPy gaussian process
        The gaussian process used here.
    initial_random_runs : int
        The number of initial random runs before using the GP. Default is 10.
    num_gp_restarts : int
        GPy's optimization requires restarts to find a good solution. This
        parameter controls this. Default is 10.
    logger: logger
        The logger instance for this object.
    """
    SUPPORTED_PARAM_TYPES = [NumericParamDef, NominalParamDef]

    kernel = None
    kernel_params = None
    acquisition_function = None
    acquisition_hyperparams = None

    random_state = None
    random_searcher = None

    gp = None
    initial_random_runs = 10
    num_gp_restarts = 10

    return_max = True

    _logger = None

    def __init__(self, experiment, optimizer_params=None):
        """
        Initializes a bayesian optimizer.
        Parameters
        ----------
        experiment : Experiment
            The experiment for which to optimize.
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self._logger = get_logger(self)
        if optimizer_params is None:
            optimizer_params = {}
        self.random_state = optimizer_params.get("random_state", None)
        self.initial_random_runs = optimizer_params.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_params.get('random_state', None))
        self.acquisition_hyperparams = optimizer_params.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_params.get(
            'num_gp_restarts', self.num_gp_restarts)
        if not isinstance(optimizer_params.get('acquisition'), AcquisitionFunction):
            self.acquisition_function = optimizer_params.get(
                'acquisition', ExpectedImprovement)(self.acquisition_hyperparams)
        else:
            self.acquisition_function = optimizer_params.get("acquisition")
        self.kernel_params = optimizer_params.get("kernel_params", {})
        self.kernel = optimizer_params.get("kernel", "matern52")
        self.random_searcher = RandomSearch(experiment, optimizer_params)
        Optimizer.__init__(self, experiment, optimizer_params)

    def get_next_candidates(self, num_candidates=1):
        if len(self._experiment.candidates_finished) < self.initial_random_runs:
            # we do a random search.
            return self.random_searcher.get_next_candidates(num_candidates)
        candidates = []
        new_candidate_points = self.acquisition_function.compute_proposals(
            self.gp, self._experiment, number_proposals=num_candidates,
            return_max=self.return_max
        )
        self.return_max = False
        for point_and_value in new_candidate_points:
            # get the the candidate point which is the first entry in the tuple.
            point_candidate = Candidate(self._experiment.warp_pt_out(point_and_value[0]))
            candidates.append(point_candidate)
        return candidates

    def update(self, experiment):
        self._experiment = experiment
        if len(self._experiment.candidates_finished) < self.initial_random_runs:
            return
        self.return_max = True

        parameter_warped_size = 0
        for p in experiment.parameter_definitions.values():
            parameter_warped_size += p.warped_size()

        candidate_matrix = np.zeros((len(experiment.candidates_finished),
                                     parameter_warped_size))
        results_vector = np.zeros((len(experiment.candidates_finished), 1))

        param_names = sorted(experiment.parameter_definitions.keys())
        self.kernel = self._check_kernel(self.kernel, len(param_names),
                                         kernel_params=self.kernel_params)

        for i, c in enumerate(self._experiment.candidates_finished):
            warped_in = self._experiment.warp_pt_in(c.params)
            param_values = []
            for pn in param_names:
                param_values.extend(warped_in[pn])
            candidate_matrix[i, :] = param_values
            results_vector[i] = c.result


        self._logger.debug("Refitting gp with cand %s and results %s"
                          %(candidate_matrix, results_vector))
        self.gp = GPy.models.GPRegression(candidate_matrix, results_vector,
                                          self.kernel)
        self.gp.constrain_positive("*")
        self.gp.constrain_bounded(0.1, 1, warning=False)
        self.gp.optimize_restarts(num_restarts=self.num_gp_restarts,
                                  verbose=False)

    def _check_kernel(self, kernel, dimension, kernel_params):
        """
        Checks and initializes a kernel.

        Parameters
        ----------
        kernel : kernel or string representation
            The kernel to use. If a kernel, is returned like that. If not, a
            new kernel is initialized with the respective parameters.
        dimension : int
            The dimensions of the new kernel.
        kernel_params : dict
            The dictionary of kernel parameters. Currently supported:
            "ARD" : bool, optional
                Whether to use ARD. Default is True.

        Returns
        -------
        kernel : GPy.kern
            A GPy kernel.
        """
        if (isinstance(kernel, GPy.kern.Kern)):
            return kernel
        translation_dict = {
            "matern52": GPy.kern.Matern52,
            "rbf": GPy.kern.RBF
        }

        if isinstance(kernel, unicode):
            kernel = str(kernel)

        if isinstance(kernel, str) and kernel in translation_dict:
            if kernel_params.get('ARD', None) is None:
                kernel_params['ARD'] = True

            constructed_kernel = translation_dict[kernel](dimension, **kernel_params)
            return constructed_kernel

        raise ValueError("%s is not a kernel or string representing one!" %kernel)
Example #9
0
 def test_init(self):
     #test initialization
     exp = Experiment("test", {"x": MinMaxNumericParamDef(0, 1)}, NominalParamDef(["A", "B", "C"]))
     opt = RandomSearch(exp)
class SimpleBayesianOptimizer(Optimizer):
    """
    This implements a simple bayesian optimizer.

    It is simple because it only implements the simplest form - no freeze-thaw,
    (currently) no multiple workers, only numeric parameters.

    Attributes
    ----------
    SUPPORTED_PARAM_TYPES : list of ParamDefs
        The supported parameter types. Currently only numberic and position.
    kernel : GPy Kernel
        The Kernel to be used with the gp.
    acquisition_function : acquisition_function
        The acquisition function to use
    acquisition_hyperparams :
        The acquisition hyperparameters.
    random_state : scipy random_state or int.
        The scipy random state or object to initialize one. For reproduction.
    random_searcher : RandomSearch
        The random search instance used to generate the first
        initial_random_runs candidates.
    gp : GPy gaussian process
        The gaussian process used here.
    initial_random_runs : int
        The number of initial random runs before using the GP. Default is 10.
    num_gp_restarts : int
        GPy's optimization requires restarts to find a good solution. This
        parameter controls this. Default is 10.
    logger: logger
        The logger instance for this object.
    """
    SUPPORTED_PARAM_TYPES = [NumericParamDef, PositionParamDef]

    kernel = None
    kernel_params = None
    acquisition_function = None
    acquisition_hyperparams = None

    random_state = None
    random_searcher = None

    gp = None
    mcmc = False
    initial_random_runs = 10
    num_gp_restarts = 10

    num_precomputed = None

    logger = None

    def __init__(self, optimizer_arguments=None):
        """
        Initializes a bayesian optimizer.

        Parameters
        ----------
        optimizer_arguments: dict of string keys
            Sets the possible arguments for this optimizer. Available are:
            "initial_random_runs" : int, optional
                The number of initial random runs before using the GP. Default
                is 10.
            "random_state" : scipy random state, optional
                The scipy random state or object to initialize one. Default is
                None.
            "acquisition_hyperparameters" : dict, optional
                dictionary of acquisition-function hyperparameters
            "num_gp_restarts" : int
                GPy's optimization requires restarts to find a good solution.
                This parameter controls this. Default is 10.
            "acquisition" : AcquisitionFunction
                The acquisition function to use. Default is
                ExpectedImprovement.
            "num_precomputed" : int
                The number of points that should be kept precomputed for faster
                multiple workers.
        """
        self.logger = get_logger(self)
        if optimizer_arguments is None:
            optimizer_arguments = {}
        self.initial_random_runs = optimizer_arguments.get(
            'initial_random_runs', self.initial_random_runs)
        self.random_state = check_random_state(
            optimizer_arguments.get('random_state', None))
        self.acquisition_hyperparams = optimizer_arguments.get(
            'acquisition_hyperparams', None)
        self.num_gp_restarts = optimizer_arguments.get(
            'num_gp_restarts', self.num_gp_restarts)
        if not isinstance(optimizer_arguments.get('acquisition'), AcquisitionFunction):
            self.acquisition_function = optimizer_arguments.get(
                'acquisition', ExpectedImprovement)(self.acquisition_hyperparams)
        else:
            self.acquisition_function = optimizer_arguments.get("acquisition")
        self.kernel_params = optimizer_arguments.get("kernel_params", {})
        self.kernel = optimizer_arguments.get("kernel", "matern52")
        self.random_searcher = RandomSearch({"random_state": self.random_state})

        if mcmc_imported:
            self.mcmc = optimizer_arguments.get("mcmc", False)
        else:
            self.mcmc = False

        self.num_precomputed = optimizer_arguments.get('num_precomputed', 10)
        self.logger.info("Bayesian optimization initialized.")

    def get_next_candidates(self, experiment, num_candidates=None):
        if num_candidates is None:
            num_candidates = self.num_precomputed
        #check whether a random search is necessary.
        if len(experiment.candidates_finished) < self.initial_random_runs:
            return self.random_searcher.get_next_candidates(experiment, num_candidates)

        self._refit(experiment)
        #TODO refitted must be set, too.
        candidates = []
        new_candidate_points = self.acquisition_function.compute_proposals(
            self.gp, experiment, number_proposals=num_candidates)

        for point_and_value in new_candidate_points:
            #get the the candidate point which is the first entry in the tuple.
            point_candidate = Candidate(experiment.warp_pt_out(point_and_value[0]))
            candidates.append(point_candidate)
        return candidates



    def _refit(self, experiment):
        """
        Refits the GP with the data from experiment.

        Parameters
        ----------
        experiment : experiment
            The experiment on which to refit this gp.
        """
        candidate_matrix = np.zeros((len(experiment.candidates_finished),
                                     len(experiment.parameter_definitions)))
        results_vector = np.zeros((len(experiment.candidates_finished), 1))

        param_names = sorted(experiment.parameter_definitions.keys())
        self.kernel = self._check_kernel(self.kernel, len(param_names),
                                         kernel_params=self.kernel_params)
        for i, c in enumerate(experiment.candidates_finished):
            warped_in = experiment.warp_pt_in(c.params)
            param_values = []
            for pn in param_names:
                param_values.append(warped_in[pn])
            candidate_matrix[i, :] = param_values
            results_vector[i] = c.result

        self.logger.debug("Refitting gp with cand %s and results %s"
                          %(candidate_matrix, results_vector))
        self.gp = GPy.models.GPRegression(candidate_matrix, results_vector,
                                          self.kernel)


        if self.mcmc:
            proposal = pm.MALAProposal(dt=1.)
            mcmc = pm.MetropolisHastings(self.gp, proposal=proposal,
                                db_filename='apsis.h5')

            mcmc.sample(100000, # Number of MCMC steps
                        num_thin=100, # Number of steps to skip
                        num_burn=1000, # Number of steps to burn initially
                        verbose=True)

        else:
            self.gp.constrain_positive("*")
            self.gp.constrain_bounded(0.1, 1, warning=False)
            self.gp.optimize_restarts(num_restarts=self.num_gp_restarts,
                                  verbose=False)


    def _check_kernel(self, kernel, dimension, kernel_params):
        """
        Checks and initializes a kernel.

        Parameters
        ----------
        kernel : kernel or string representation
            The kernel to use. If a kernel, is returned like that. If not, a
            new kernel is initialized with the respective parameters.
        dimension : int
            The dimensions of the new kernel.
        kernel_params : dict
            The dictionary of kernel parameters. Currently supported:
            "ARD" : bool, optional
                Whether to use ARD. Default is True.

        Returns
        -------
        kernel : GPy.kern
            A GPy kernel.
        """
        if (isinstance(kernel, GPy.kern.Kern)):
            return kernel
        translation_dict = {
            "matern52": GPy.kern.Matern52,
            "rbf": GPy.kern.RBF
        }

        if isinstance(kernel, str) and kernel in translation_dict:
            if kernel_params.get('ARD', None) is None:
                kernel_params['ARD'] = True

            constructed_kernel = translation_dict[kernel](dimension, **kernel_params)
            return constructed_kernel

        raise ValueError("%s is not a kernel or string representing one!" %kernel)