Ejemplo n.º 1
0
    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):
        r"""
        Probability of Improvement solves the following equation
        :math:`PI(X) := \mathbb{P}\left( f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) > \xi\right)`, where
        :math:`f(X^+)` is the best input found so far.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """
        super(PI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 2
0
    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):
        r"""
        Computes for a given x the expected improvement as
        acquisition value.
        :math:`EI(X) :=
            \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) -
                f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with
        :math:`f(X^+)` as the incumbent.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        compute_incumbent: func
            A python function that takes as input a model and returns
            a np.array as incumbent
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(EI, self).__init__(model, X_lower, X_upper)
        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 3
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)

        kernel = george.kernels.Matern52Kernel(np.ones([1]),
                                               ndim=1)

        prior = TophatPrior(-2, 2)
        model = GaussianProcess(kernel, prior=prior)
        model.train(X, Y)

        rec = BestObservation(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 10)
        inc, inc_val = rec.estimate_incumbent(startpoints)

        # Check shapes
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_lower.shape[0]

        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        # Check if incumbent is in the bounds
        assert not np.any([np.any(inc[:, i] < X_lower[i])
                        for i in range(X_lower.shape[0])])
        assert not np.any([np.any(inc[:, i] > X_upper[i])
                        for i in range(X_upper.shape[0])])
Ejemplo n.º 4
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)

        kernel = george.kernels.Matern52Kernel(np.ones([1]),
                                               ndim=1)

        prior = TophatPrior(-2, 2)
        model = GaussianProcess(kernel, prior=prior)
        model.train(X, Y)

        rec = BestObservation(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 10)
        inc, inc_val = rec.estimate_incumbent(startpoints)

        # Check shapes
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_lower.shape[0]

        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        # Check if incumbent is in the bounds
        assert not np.any([np.any(inc[:, i] < X_lower[i])
                        for i in range(X_lower.shape[0])])
        assert not np.any([np.any(inc[:, i] > X_upper[i])
                        for i in range(X_upper.shape[0])])
Ejemplo n.º 5
0
    def __init__(self, model, X_lower, X_upper, par=0.01, **kwargs):
        r"""
        Computes for a given x the logarithm expected improvement as
        acquisition value.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(LogEI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 6
0
    def update(self, model):
        """
        This method will be called if the model is updated.
        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(PI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 7
0
    def __init__(self, task=None, save_dir=None, num_save=1, rng=None):
        """
        Random Search [1] that simply evaluates random points. We do not have
        any priors thus we sample points uniformly at random.

        [1] J. Bergstra and Y. Bengio.
            Random search for hyper-parameter optimization.
            JMLR, 2012.

        Parameters
        ----------
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        num_save: int
            Defines after how many iteration the output is saved.
        save_dir: String
            Output path
        rng: numpy.random.RandomState

        """

        if rng is None:
            self.rng = np.random.RandomState(42)
        else:
            self.rng = rng

        self.task = task
        self.save_dir = save_dir

        self.X = None
        self.Y = None

        self.estimator = BestObservation(self,
                                         self.task.X_lower,
                                         self.task.X_upper)
        self.time_func_eval = None
        self.time_overhead = None

        self.num_save = num_save

        self.model_untrained = True

        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.runtime = []
        if self.save_dir is not None:
            self.create_save_dir()
Ejemplo n.º 8
0
Archivo: pi.py Proyecto: aaronkl/RoBO
    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):
        r"""
        Probability of Improvement solves the following equation
        :math:`PI(X) := \mathbb{P}\left( f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) > \xi\right)`, where
        :math:`f(X^+)` is the best input found so far.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """
        super(PI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model,
                                                 self.X_lower,
                                                 self.X_upper)
Ejemplo n.º 9
0
    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):

        r"""
        Computes for a given x the logarithm expected improvement as
        acquisition value.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(LogEI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 10
0
Archivo: ei.py Proyecto: aaronkl/RoBO
    def update(self, model):
        """
        This method will be called if the model is updated.
        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(EI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 11
0
Archivo: ei.py Proyecto: aaronkl/RoBO
    def __init__(self,
            model,
            X_lower,
            X_upper,
            par=0.0,
            **kwargs):

        r"""
        Computes for a given x the expected improvement as
        acquisition value.
        :math:`EI(X) :=
            \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) -
                f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with
        :math:`f(X^+)` as the incumbent.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        compute_incumbent: func
            A python function that takes as input a model and returns
            a np.array as incumbent
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(EI, self).__init__(model, X_lower, X_upper)
        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
Ejemplo n.º 12
0
Archivo: ei.py Proyecto: aaronkl/RoBO
class EI(BaseAcquisitionFunction):

    def __init__(self,
            model,
            X_lower,
            X_upper,
            par=0.0,
            **kwargs):

        r"""
        Computes for a given x the expected improvement as
        acquisition value.
        :math:`EI(X) :=
            \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) -
                f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with
        :math:`f(X^+)` as the incumbent.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        compute_incumbent: func
            A python function that takes as input a model and returns
            a np.array as incumbent
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(EI, self).__init__(model, X_lower, X_upper)
        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def update(self, model):
        """
        This method will be called if the model is updated.
        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(EI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the EI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned

        Returns
        -------
        np.ndarray(1,1)
            Expected Improvement of X
        np.ndarray(1,D)
            Derivative of Expected Improvement at X (only if derivative=True)
        """

        if X.shape[0] > 1:
            raise ValueError("EI is only for single test points")

        if len(X.shape) == 1:
            X = X[:, np.newaxis]

        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            if derivative:
                f = 0
                df = np.zeros((1, X.shape[1]))
                return np.array([[f]]), np.array([df])
            else:
                return np.array([[0]])

        m, v = self.model.predict(X)

        # Use the best seen observation as incumbent
        _, eta = self.rec.estimate_incumbent(None)

        s = np.sqrt(v)

        if (s == 0).any():
            f = np.array([[0]])
            df = np.zeros((1, X.shape[1]))

        else:
            z = (eta - m - self.par) / s
#            f = (eta - m - self.par) * norm.cdf(z) + s * norm.pdf(z)
            f = s * ( z * norm.cdf(z) +  norm.pdf(z))

            if derivative:
                dmdx, ds2dx = self.model.predictive_gradients(X)
                dmdx = dmdx[0]
                ds2dx = ds2dx[0][:, None]
                dsdx = ds2dx / (2 * s)
                df = (-dmdx * norm.cdf(z) + (dsdx * norm.pdf(z))).T
            if (f < 0).any():
                logger.error("Expected Improvement is smaller than 0!")
                raise ValueError

        if derivative:
            return f, df
        else:
            return f
Ejemplo n.º 13
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)
        kernel = GPy.kern.Matern52(input_dim=1)
        model = GPyModel(kernel)
        model.train(X, Y)

        x_test = init_random_uniform(X_lower, X_upper, 3)

        # Shape matching predict
        m, v = model.predict(x_test, full_cov=True)
        assert len(m.shape) == 2
        assert m.shape[0] == x_test.shape[0]
        assert m.shape[1] == 1
        assert len(v.shape) == 2
        assert v.shape[0] == x_test.shape[0]
        assert v.shape[1] == x_test.shape[0]

        # Check gradients
        dm, dv = model.predictive_gradients(x_test)
        assert len(dm.shape) == 2
        assert dm.shape[0] == x_test.shape[0]
        assert dm.shape[1] == x_test.shape[1]
        assert len(dv.shape) == 2
        assert dv.shape[0] == x_test.shape[0]
        assert dv.shape[1] == 1

        # Shape matching function sampling
        x_ = np.linspace(X_lower, X_upper, 10)
        x_ = x_[:, np.newaxis]
        funcs = model.sample_functions(x_, n_funcs=2)
        assert len(funcs.shape) == 2
        assert funcs.shape[0] == 2
        assert funcs.shape[1] == x_.shape[0]

        # Shape matching predict variance
        x_test2 = np.array([np.random.rand(1)])
        x_test1 = np.random.rand(10)[:, np.newaxis]
        var = model.predict_variance(x_test1, x_test2)
        assert len(var.shape) == 2
        assert var.shape[0] == x_test1.shape[0]
        assert var.shape[1] == 1

        # Check compatibility with all acquisition functions
        acq_func = EI(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = PI(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = LCB(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = InformationGain(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        # Check compatibility with all incumbent estimation methods
        rec = BestObservation(model, X_lower, X_upper)
        inc, inc_val = rec.estimate_incumbent(None)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1
Ejemplo n.º 14
0
Archivo: pi.py Proyecto: aaronkl/RoBO
class PI(BaseAcquisitionFunction):

    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):
        r"""
        Probability of Improvement solves the following equation
        :math:`PI(X) := \mathbb{P}\left( f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) > \xi\right)`, where
        :math:`f(X^+)` is the best input found so far.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """
        super(PI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model,
                                                 self.X_lower,
                                                 self.X_upper)

    def update(self, model):
        """
        This method will be called if the model is updated.
        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(PI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the PI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned

        Returns
        -------
        np.ndarray(1,1)
            Probability of Improvement of X
        np.ndarray(1,D)
            Derivative of Probability of Improvement at X
            (only if derivative=True)
        """
        if X.shape[0] > 1:
            logger.error("PI is only for single x inputs")
            return
        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            if derivative:
                f = 0
                df = np.zeros((1, X.shape[1]))
                return np.array([[f]]), np.array([df])
            else:
                return np.array([[0]])

        m, v = self.model.predict(X)
        _, eta = self.rec.estimate_incumbent(None)

        s = np.sqrt(v)
        z = (eta - m - self.par) / s
        f = norm.cdf(z)
        if derivative:
            dmdx, ds2dx = self.model.predictive_gradients(X)
            dmdx = dmdx[0]
            ds2dx = ds2dx[0][:, None]
            dsdx = ds2dx / (2 * s)
            df = (-(-norm.pdf(z) / s) * (dmdx + dsdx * z)).T
            return f, df
        else:
            return f
Ejemplo n.º 15
0
class RandomSearch(BaseSolver):

    def __init__(self, task=None, save_dir=None, num_save=1, rng=None):
        """
        Random Search [1] that simply evaluates random points. We do not have
        any priors thus we sample points uniformly at random.

        [1] J. Bergstra and Y. Bengio.
            Random search for hyper-parameter optimization.
            JMLR, 2012.

        Parameters
        ----------
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        num_save: int
            Defines after how many iteration the output is saved.
        save_dir: String
            Output path
        rng: numpy.random.RandomState

        """

        if rng is None:
            self.rng = np.random.RandomState(42)
        else:
            self.rng = rng

        self.task = task
        self.save_dir = save_dir

        self.X = None
        self.Y = None

        self.estimator = BestObservation(self,
                                         self.task.X_lower,
                                         self.task.X_upper)
        self.time_func_eval = None
        self.time_overhead = None

        self.num_save = num_save

        self.model_untrained = True

        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.runtime = []
        if self.save_dir is not None:
            self.create_save_dir()

    def run(self, num_iterations=10):
        """
        The main optimization loop

        Parameters
        ----------
        num_iterations: int
            The number of iterations

        Returns
        -------
        np.ndarray(1,D)
            Incumbent
        np.ndarray(1,1)
            (Estimated) function value of the incumbent
        """
        self.time_start = time.time()

        for it in range(num_iterations):
            logger.info("Start iteration %d ... ", it)

            start_time = time.time()
            # Choose next point to evaluate

            new_x = self.choose_next()

            time_overhead = time.time() - start_time
            self.time_overhead = np.append(self.time_overhead,
                                           np.array([time_overhead]))

            logger.info("Optimization overhead was %f seconds" %
                            (self.time_overhead[-1]))

            logger.info("Evaluate candidate %s" % (str(new_x)))
            start_time = time.time()
            new_y = self.task.evaluate(new_x)
            time_func_eval = time.time() - start_time
            self.time_func_eval = np.append(self.time_func_eval,
                                            np.array([time_func_eval]))

            logger.info("Configuration achieved a performance of %f " %
                        (new_y[0, 0]))

            logger.info("Evaluation of this configuration took %f seconds" %
                        (self.time_func_eval[-1]))

            self.runtime.append(time.time() - self.time_start)

            # Update the data
            if self.X is None and self.Y is None:
                self.X = new_x
                self.Y = new_y
            else:
                self.X = np.append(self.X, new_x, axis=0)
                self.Y = np.append(self.Y, new_y, axis=0)

            # The incumbent is just the best observation we have seen so far
            start_time_inc = time.time()

            self.incumbent, self.incumbent_value = \
                    self.estimator.estimate_incumbent(None)

            self.incumbents.append(self.incumbent)
            self.incumbent_values.append(self.incumbent_value)

            logger.info("New incumbent %s found in %f seconds with "
                        "estimated performance %f",
                        str(self.incumbent), time.time() - start_time_inc,
                        self.incumbent_value)

            if self.save_dir is not None and (it) % self.num_save == 0:
                self.save_iteration(it)

        logger.info("Return %s as incumbent with predicted performance %f" %
                    (str(self.incumbent), self.incumbent_value))

        return self.incumbent, self.incumbent_value

    def choose_next(self):
        """
        Sample a new point uniformly at random.

        Returns
        -------
        np.ndarray(1,D)
            Suggested point to evaluate
        """
        return self.rng.uniform(self.task.X_lower,
                                 self.task.X_upper)[np.newaxis, :]
Ejemplo n.º 16
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)

        model = RandomForest(types=np.zeros([X_lower.shape[0]]))
        model.train(X, Y)

        x_test = init_random_uniform(X_lower, X_upper, 3)

        # Shape matching predict
        m, v = model.predict(x_test)

        assert len(m.shape) == 2
        assert m.shape[0] == x_test.shape[0]
        assert m.shape[1] == 1
        assert len(v.shape) == 2
        assert v.shape[0] == x_test.shape[0]
        assert v.shape[1] == 1

        # Shape matching function sampling
        x_ = np.linspace(X_lower, X_upper, 10)
        x_ = x_[:, np.newaxis]
        #funcs = model.sample_functions(x_, n_funcs=2)
        #assert len(funcs.shape) == 2
        #assert funcs.shape[0] == 2
        #assert funcs.shape[1] == x_.shape[0]

        # Check compatibility with all acquisition functions
        acq_func = EI(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = PI(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        # Check compatibility with all incumbent estimation methods
        rec = BestObservation(model, X_lower, X_upper)
        inc, inc_val = rec.estimate_incumbent(None)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1
Ejemplo n.º 17
0
    def __init__(self,
                 acquisition_func,
                 model,
                 maximize_func,
                 task,
                 save_dir=None,
                 initial_design=None,
                 initial_points=3,
                 incumbent_estimation=None,
                 num_save=1,
                 train_intervall=1,
                 n_restarts=1):
        """
        Implementation of the standard Bayesian optimization loop that uses
        an acquisition function and a model to optimize a given task.
        This module keeps track of additional information such as runtime,
        optimization overhead, evaluated points and saves the output
        in a csv file.

        Parameters
        ----------
        acquisition_func: BaseAcquisitionFunctionObject
            The acquisition function which will be maximized.
        model: ModelObject
            Model (i.e. GaussianProcess, RandomForest) that models our current
            believe of the objective function.
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        save_dir: String
            Output path
        initial_design: function
            Function that returns some points which will be evaluated before
            the Bayesian optimization loop is started. This allows to
            initialize the model.
        initial_points: int
            Defines the number of initial points that are evaluated before the
            actual Bayesian optimization.
        incumbent_estimation: IncumbentEstimationObject,
            Object to estimate the incumbent based on the current model. The
            incumbent is the current best guess of the global optimum and is
            estimated in each iteration.
        num_save: int
            Defines after how many iteration the output is saved.
        train_intervall: int
            Specifies after how many iterations the model is retrained.
        n_restarts: int
            How often the incumbent estimation is repeated.
        """

        super(BayesianOptimization,
              self).__init__(acquisition_func, model, maximize_func, task,
                             save_dir)
        self.start_time = time.time()

        if initial_design == None:
            self.initial_design = init_random_uniform
        else:
            self.initial_design = initial_design

        self.X = None
        self.Y = None
        self.time_func_eval = None
        self.time_overhead = None
        self.train_intervall = train_intervall

        self.num_save = num_save
        self.time_start = None

        self.model_untrained = True
        if incumbent_estimation is None:
            self.estimator = BestObservation(self.model, self.task.X_lower,
                                             self.task.X_upper)
        else:
            self.estimator = incumbent_estimation
        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.n_restarts = n_restarts
        self.init_points = initial_points
        self.runtime = []
Ejemplo n.º 18
0
class RandomSearch(BaseSolver):
    def __init__(self, task=None, save_dir=None, num_save=1, rng=None):
        """
        Random Search [1] that simply evaluates random points. We do not have
        any priors thus we sample points uniformly at random.

        [1] J. Bergstra and Y. Bengio.
            Random search for hyper-parameter optimization.
            JMLR, 2012.

        Parameters
        ----------
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        num_save: int
            Defines after how many iteration the output is saved.
        save_dir: String
            Output path
        rng: numpy.random.RandomState

        """

        if rng is None:
            self.rng = np.random.RandomState(np.random.randint(0, 10000))
        else:
            self.rng = rng

        self.task = task
        self.save_dir = save_dir

        self.X = None
        self.Y = None

        self.estimator = BestObservation(self, self.task.X_lower,
                                         self.task.X_upper)
        self.time_func_eval = []
        self.time_overhead = []

        self.num_save = num_save

        self.model_untrained = True

        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.runtime = []
        if self.save_dir is not None:
            self.create_save_dir()

    def run(self, num_iterations=10):
        """
        The main optimization loop

        Parameters
        ----------
        num_iterations: int
            The number of iterations

        Returns
        -------
        np.ndarray(1,D)
            Incumbent
        np.ndarray(1,1)
            (Estimated) function value of the incumbent
        """
        self.time_start = time.time()

        for it in range(num_iterations):
            logger.info("Start iteration %d ... ", it)

            start_time = time.time()
            # Choose next point to evaluate

            new_x = self.choose_next()

            time_overhead = time.time() - start_time
            self.time_overhead.append(time_overhead)

            logger.info("Optimization overhead was %f seconds" %
                        (self.time_overhead[-1]))

            logger.info("Evaluate candidate %s" % (str(new_x)))
            start_time = time.time()
            new_y = self.task.evaluate(new_x)
            time_func_eval = time.time() - start_time
            self.time_func_eval.append(time_func_eval)

            logger.info("Configuration achieved a performance of %f " %
                        (new_y[0, 0]))

            logger.info("Evaluation of this configuration took %f seconds" %
                        (self.time_func_eval[-1]))

            self.runtime.append(time.time() - self.time_start)

            # Update the data
            if self.X is None and self.Y is None:
                self.X = new_x
                self.Y = new_y
            else:
                self.X = np.append(self.X, new_x, axis=0)
                self.Y = np.append(self.Y, new_y, axis=0)

            # The incumbent is just the best observation we have seen so far
            start_time_inc = time.time()

            self.incumbent, self.incumbent_value = \
                    self.estimator.estimate_incumbent(None)

            self.incumbents.append(self.incumbent)
            self.incumbent_values.append(self.incumbent_value)

            logger.info(
                "New incumbent %s found in %f seconds with "
                "estimated performance %f", str(self.incumbent),
                time.time() - start_time_inc, self.incumbent_value)

            if self.save_dir is not None and (it) % self.num_save == 0:
                self.save_iteration(it)

        logger.info("Return %s as incumbent with predicted performance %f" %
                    (str(self.incumbent), self.incumbent_value))

        return self.incumbent, self.incumbent_value

    def choose_next(self):
        """
        Sample a new point uniformly at random.

        Returns
        -------
        np.ndarray(1,D)
            Suggested point to evaluate
        """
        x = self.rng.uniform(self.task.X_lower, self.task.X_upper)
        if type(x) == np.float:
            return np.array([[x]])
        else:
            return x[np.newaxis, :]
Ejemplo n.º 19
0
class BayesianOptimization(BaseSolver):

    def __init__(self,
            acquisition_func,
            model,
            maximize_func,
            task,
            save_dir=None,
            initial_design=None,
            initial_points=3,
            incumbent_estimation=None,
            num_save=1,
            train_intervall=1,
            n_restarts=1):
        """
        Implementation of the standard Bayesian optimization loop that uses
        an acquisition function and a model to optimize a given task.
        This module keeps track of additional information such as runtime,
        optimization overhead, evaluated points and saves the output
        in a csv file.

        Parameters
        ----------
        acquisition_func: AcquisitionFunctionObject
            The acquisition function which will be maximized.
        model: ModelObject
            Model (i.e. GaussianProcess, RandomForest) that models our current
            believe of the objective function.
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        save_dir: String
            Output path
        initial_design: function
            Function that returns some points which will be evaluated before
            the Bayesian optimization loop is started. This allows to
            initialize the model.
        initial_points: int
            Defines the number of initial points that are evaluated before the
            actual Bayesian optimization.
        incumbent_estimation: IncumbentEstimationObject,
            Object to estimate the incumbent based on the current model. The
            incumbent is the current best guess of the global optimum and is
            estimated in each iteration.
        num_save: int
            Defines after how many iteration the output is saved.
        train_intervall: int
            Specifies after how many iterations the model is retrained.
        n_restarts: int
            How often the incumbent estimation is repeated.
        """

        super(BayesianOptimization, self).__init__(acquisition_func,
                                                    model,
                                                    maximize_func,
                                                    task,
                                                    save_dir)
        self.start_time = time.time()

        if initial_design == None:
            self.initial_design = init_random_uniform
        else:
            self.initial_design = initial_design

        self.X = None
        self.Y = None
        self.time_func_eval = None
        self.time_overhead = None
        self.train_intervall = train_intervall

        self.num_save = num_save

        self.model_untrained = True
        if incumbent_estimation is None:
            self.estimator = BestObservation(self.model,
                                             self.task.X_lower,
                                             self.task.X_upper)
        else:
            self.estimator = incumbent_estimation
        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.n_restarts = n_restarts
        self.init_points = initial_points
        self.runtime = []

    def run(self, num_iterations=10, X=None, Y=None):
        """
        The main Bayesian optimization loop

        Parameters
        ----------
        num_iterations: int
            The number of iterations
        X: np.ndarray(N,D)
            Initial points that are already evaluated
        Y: np.ndarray(N,1)
            Function values of the already evaluated points

        Returns
        -------
        np.ndarray(1,D)
            Incumbent
        np.ndarray(1,1)
            (Estimated) function value of the incumbent
        """
        # Save the time where we start the Bayesian optimization procedure
        self.time_start = time.time()

        if X is None and Y is None:
            self.time_func_eval = np.zeros([self.init_points])
            self.time_overhead = np.zeros([self.init_points])
            self.X = np.zeros([self.init_points, self.task.n_dims])
            self.Y = np.zeros([self.init_points, 1])

            init = self.initial_design(self.task.X_lower,
                                       self.task.X_upper,
                                       N=self.init_points)

            for i, x in enumerate(init):
                x = x[np.newaxis, :]

                logger.info("Evaluate: %s" % x)

                start_time = time.time()
                y = self.task.evaluate(x)

                self.X[i] = x[0, :]
                self.Y[i] = y[0, :]
                self.time_func_eval[i] = time.time() - start_time
                self.time_overhead[i] = 0.0

                logger.info("Configuration achieved a performance "
                    "of %f in %f seconds" %
                    (self.Y[i], self.time_func_eval[i]))

                # Use best point seen so far as incumbent
                best_idx = np.argmin(self.Y)
                self.incumbent = np.array([self.X[best_idx]])
                self.incumbent_value = np.array([self.Y[best_idx]])

                self.incumbents.append(self.incumbent)
                self.incumbent_values.append(self.incumbent_value)
                self.runtime.append(time.time() - self.start_time)

                if self.save_dir is not None and (i) % self.num_save == 0:
                    self.save_iteration(i, hyperparameters=None,
                                        acquisition_value=0)

        else:
            self.X = X
            self.Y = Y
            self.time_func_eval = np.zeros([self.X.shape[0]])
            self.time_overhead = np.zeros([self.X.shape[0]])

#             best = np.argmin(Y)
#             incumbent = X[best]
#             incumbent_value = Y[best]
#             self.incumbents.append(incumbent[np.newaxis, :])
#             self.incumbent_values.append(incumbent_value[np.newaxis, :])
#             self.runtime.append(time.time() - self.start_time)

        for it in range(self.init_points, num_iterations):
            logger.info("Start iteration %d ... ", it)

            start_time = time.time()
            # Choose next point to evaluate
            if it % self.train_intervall == 0:
                do_optimize = True
            else:
                do_optimize = False

            new_x = self.choose_next(self.X, self.Y, do_optimize)

            # Estimate current incumbent
            start_time_inc = time.time()
            startpoints = init_random_uniform(self.task.X_lower,
                                              self.task.X_upper,
                                              self.n_restarts)
            self.incumbent, self.incumbent_value = \
                    self.estimator.estimate_incumbent(startpoints)

            self.incumbents.append(self.incumbent)
            self.incumbent_values.append(self.incumbent_value)

            logger.info("New incumbent %s found in %f seconds with "
                        "estimated performance %f",
                        str(self.incumbent), time.time() - start_time_inc,
                        self.incumbent_value)

            time_overhead = time.time() - start_time
            self.time_overhead = np.append(self.time_overhead,
                                           np.array([time_overhead]))

            logger.info("Optimization overhead was %f seconds" %
                            (self.time_overhead[-1]))

            logger.info("Evaluate candidate %s" % (str(new_x)))
            start_time = time.time()
            new_y = self.task.evaluate(new_x)
            time_func_eval = time.time() - start_time
            self.time_func_eval = np.append(self.time_func_eval,
                                            np.array([time_func_eval]))

            logger.info("Configuration achieved a performance of %f " %
                        (new_y[0, 0]))

            logger.info("Evaluation of this configuration took %f seconds" %
                        (self.time_func_eval[-1]))

            # Update the data
            self.X = np.append(self.X, new_x, axis=0)
            self.Y = np.append(self.Y, new_y, axis=0)

            self.runtime.append(time.time() - self.start_time)

            if self.save_dir is not None and (it) % self.num_save == 0:
                hypers = self.model.hypers
                self.save_iteration(
                    it,
                    hyperparameters=hypers,
                    acquisition_value=self.acquisition_func(new_x))

        # TODO: Retrain model and then return the incumbent
        logger.info("Return %s as incumbent with predicted performance %f" %
                    (str(self.incumbent), self.incumbent_value))

        return self.incumbent, self.incumbent_value

    def choose_next(self, X=None, Y=None, do_optimize=True):
        """
        Suggests a new point to evaluate.

        Parameters
        ----------
        num_iterations: int
            The number of iterations
        X: np.ndarray(N,D)
            Initial points that are already evaluated
        Y: np.ndarray(N,1)
            Function values of the already evaluated points
        do_optimize: bool
            If true the hyperparameters of the model are
            optimized before the acquisition function is
            maximized.
        Returns
        -------
        np.ndarray(1,D)
            Suggested point
        """

        if X is None and Y is None:
            x = self.initial_design(self.task.X_lower,
                                    self.task.X_upper,
                                    N=1)

        elif X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            x = self.initial_design(self.task.X_lower,
                                    self.task.X_upper,
                                    N=1)
        else:
            try:
                logger.info("Train model...")
                t = time.time()
                self.model.train(X, Y, do_optimize=do_optimize)
                logger.info("Time to train the model: %f", (time.time() - t))
            except:
                logger.error("Model could not be trained", X, Y)
                raise
            self.model_untrained = False
            self.acquisition_func.update(self.model)

            logger.info("Maximize acquisition function...")
            t = time.time()
            x = self.maximize_func.maximize()

            logger.info("Time to maximize the acquisition function: %f", \
                        (time.time() - t))

        return x
Ejemplo n.º 20
0
class PI(BaseAcquisitionFunction):
    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):
        r"""
        Probability of Improvement solves the following equation
        :math:`PI(X) := \mathbb{P}\left( f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) > \xi\right)`, where
        :math:`f(X^+)` is the best input found so far.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """
        super(PI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def update(self, model):
        """
        This method will be called if the model is updated.
        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(PI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the PI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned

        Returns
        -------
        np.ndarray(1,1)
            Probability of Improvement of X
        np.ndarray(1,D)
            Derivative of Probability of Improvement at X
            (only if derivative=True)
        """
        if X.shape[0] > 1:
            logger.error("PI is only for single x inputs")
            return
        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            if derivative:
                f = 0
                df = np.zeros((1, X.shape[1]))
                return np.array([[f]]), np.array([df])
            else:
                return np.array([[0]])

        m, v = self.model.predict(X)
        _, eta = self.rec.estimate_incumbent(None)

        s = np.sqrt(v)
        z = (eta - m - self.par) / s
        f = norm.cdf(z)
        if derivative:
            dmdx, ds2dx = self.model.predictive_gradients(X)
            dmdx = dmdx[0]
            ds2dx = ds2dx[0][:, None]
            dsdx = ds2dx / (2 * s)
            df = (-(-norm.pdf(z) / s) * (dmdx + dsdx * z)).T
            return f, df
        else:
            return f
Ejemplo n.º 21
0
    def __init__(self,
            acquisition_func,
            model,
            maximize_func,
            task,
            save_dir=None,
            initial_design=None,
            initial_points=3,
            incumbent_estimation=None,
            num_save=1,
            train_intervall=1,
            n_restarts=1):
        """
        Implementation of the standard Bayesian optimization loop that uses
        an acquisition function and a model to optimize a given task.
        This module keeps track of additional information such as runtime,
        optimization overhead, evaluated points and saves the output
        in a csv file.

        Parameters
        ----------
        acquisition_func: AcquisitionFunctionObject
            The acquisition function which will be maximized.
        model: ModelObject
            Model (i.e. GaussianProcess, RandomForest) that models our current
            believe of the objective function.
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        save_dir: String
            Output path
        initial_design: function
            Function that returns some points which will be evaluated before
            the Bayesian optimization loop is started. This allows to
            initialize the model.
        initial_points: int
            Defines the number of initial points that are evaluated before the
            actual Bayesian optimization.
        incumbent_estimation: IncumbentEstimationObject,
            Object to estimate the incumbent based on the current model. The
            incumbent is the current best guess of the global optimum and is
            estimated in each iteration.
        num_save: int
            Defines after how many iteration the output is saved.
        train_intervall: int
            Specifies after how many iterations the model is retrained.
        n_restarts: int
            How often the incumbent estimation is repeated.
        """

        super(BayesianOptimization, self).__init__(acquisition_func,
                                                    model,
                                                    maximize_func,
                                                    task,
                                                    save_dir)
        self.start_time = time.time()

        if initial_design == None:
            self.initial_design = init_random_uniform
        else:
            self.initial_design = initial_design

        self.X = None
        self.Y = None
        self.time_func_eval = None
        self.time_overhead = None
        self.train_intervall = train_intervall

        self.num_save = num_save

        self.model_untrained = True
        if incumbent_estimation is None:
            self.estimator = BestObservation(self.model,
                                             self.task.X_lower,
                                             self.task.X_upper)
        else:
            self.estimator = incumbent_estimation
        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.n_restarts = n_restarts
        self.init_points = initial_points
        self.runtime = []
Ejemplo n.º 22
0
class LogEI(BaseAcquisitionFunction):

    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):

        r"""
        Computes for a given x the logarithm expected improvement as
        acquisition value.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(LogEI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def update(self, model):
        """
        This method will be called if the model is updated.

        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(LogEI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the Log EI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned
            Not implemented yet!

        Returns
        -------
        np.ndarray(1,1)
            Log Expected Improvement of X
        np.ndarray(1,D)
            Derivative of Log Expected Improvement at X
            (only if derivative=True)
        """
        if derivative:
            logger.error("LogEI does not support derivative \
                calculation until now")
            return

        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            return np.array([[- np.finfo(np.float).max]])
        m, v = self.model.predict(X)

        _, eta = self.rec.estimate_incumbent(None)

        f_min = eta - self.par

        s = np.sqrt(v)

        z = (f_min - m) / s

        log_ei = np.zeros((m.size, 1))
        for i in range(0, m.size):
            mu, sigma = m[i], s[i]

        #    par_s = self.par * sigma

            # Degenerate case 1: first term vanishes
            if np.any(abs(f_min - mu)) == 0:
                if sigma > 0:
                    log_ei[i] = np.log(sigma) + norm.logpdf(z[i])
                else:
                    log_ei[i] = -np.Infinity
            # Degenerate case 2: second term vanishes and first term
            # has a special form.
            elif sigma == 0:
                if mu < np.any(f_min):
                    log_ei[i] = np.log(f_min - mu)
                else:
                    log_ei[i] = -np.Infinity
            # Normal case
            else:
                b = np.log(sigma) + norm.logpdf(z[i])
                # log(y+z) is tricky, we distinguish two cases:
                if np.any(f_min > mu):
                    # When y>0, z>0, we define a=ln(y), b=ln(z).
                    # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ],
                    # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|))
                    a = np.log(f_min - mu) + norm.logcdf(z[i])

                    log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a)))
                else:
                    # When y<0, z>0, we define a=ln(-y), b=ln(z),
                    # and it has to be true that b >= a in
                    # order to satisfy y+z>=0.
                    # Then y+z = exp[ b + ln(exp(b-a) -1) ],
                    # and thus log(y+z) = a + ln(exp(b-a) -1)
                    a = np.log(mu - f_min) + norm.logcdf(z[i])
                    if a >= b:
                        # a>b can only happen due to numerical inaccuracies
                        # or approximation errors
                        log_ei[i] = -np.Infinity
                    else:
                        log_ei[i] = b + np.log(1 - np.exp(a - b))

        return log_ei
Ejemplo n.º 23
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)

        kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1)

        prior = TophatPrior(-2, 2)
        model = GaussianProcess(kernel, prior=prior)
        model.train(X, Y)

        x_test = init_random_uniform(X_lower, X_upper, 3)

        # Shape matching predict
        m, v = model.predict(x_test)

        assert len(m.shape) == 2
        assert m.shape[0] == x_test.shape[0]
        assert m.shape[1] == 1
        assert len(v.shape) == 2
        assert v.shape[0] == x_test.shape[0]
        assert v.shape[1] == x_test.shape[0]

        #TODO: check gradients

        # Shape matching function sampling
        x_ = np.linspace(X_lower, X_upper, 10)
        x_ = x_[:, np.newaxis]
        funcs = model.sample_functions(x_, n_funcs=2)
        assert len(funcs.shape) == 2
        assert funcs.shape[0] == 2
        assert funcs.shape[1] == x_.shape[0]

        # Shape matching predict variance
        x_test1 = np.array([np.random.rand(1)])
        x_test2 = np.random.rand(10)[:, np.newaxis]
        var = model.predict_variance(x_test1, x_test2)
        assert len(var.shape) == 2
        assert var.shape[0] == x_test2.shape[0]
        assert var.shape[1] == 1

        # Check compatibility with all acquisition functions
        acq_func = EI(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = PI(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)
        # Check compatibility with all incumbent estimation methods
        rec = BestObservation(model, X_lower, X_upper)
        inc, inc_val = rec.estimate_incumbent(None)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1
Ejemplo n.º 24
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)

        model = RandomForest(types=np.zeros([X_lower.shape[0]]))
        model.train(X, Y)

        x_test = init_random_uniform(X_lower, X_upper, 3)

        # Shape matching predict
        m, v = model.predict(x_test)

        assert len(m.shape) == 2
        assert m.shape[0] == x_test.shape[0]
        assert m.shape[1] == 1
        assert len(v.shape) == 2
        assert v.shape[0] == x_test.shape[0]
        assert v.shape[1] == 1

        # Shape matching function sampling
        x_ = np.linspace(X_lower, X_upper, 10)
        x_ = x_[:, np.newaxis]
        #funcs = model.sample_functions(x_, n_funcs=2)
        #assert len(funcs.shape) == 2
        #assert funcs.shape[0] == 2
        #assert funcs.shape[1] == x_.shape[0]

        # Check compatibility with all acquisition functions
        acq_func = EI(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = PI(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = LCB(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        # Check compatibility with all incumbent estimation methods
        rec = BestObservation(model, X_lower, X_upper)
        inc, inc_val = rec.estimate_incumbent(None)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1
Ejemplo n.º 25
0
class BayesianOptimization(BaseSolver):
    def __init__(self,
                 acquisition_func,
                 model,
                 maximize_func,
                 task,
                 save_dir=None,
                 initial_design=None,
                 initial_points=3,
                 incumbent_estimation=None,
                 num_save=1,
                 train_intervall=1,
                 n_restarts=1):
        """
        Implementation of the standard Bayesian optimization loop that uses
        an acquisition function and a model to optimize a given task.
        This module keeps track of additional information such as runtime,
        optimization overhead, evaluated points and saves the output
        in a csv file.

        Parameters
        ----------
        acquisition_func: BaseAcquisitionFunctionObject
            The acquisition function which will be maximized.
        model: ModelObject
            Model (i.e. GaussianProcess, RandomForest) that models our current
            believe of the objective function.
        task: TaskObject
            Task object that contains the objective function and additional
            meta information such as the lower and upper bound of the search
            space.
        save_dir: String
            Output path
        initial_design: function
            Function that returns some points which will be evaluated before
            the Bayesian optimization loop is started. This allows to
            initialize the model.
        initial_points: int
            Defines the number of initial points that are evaluated before the
            actual Bayesian optimization.
        incumbent_estimation: IncumbentEstimationObject,
            Object to estimate the incumbent based on the current model. The
            incumbent is the current best guess of the global optimum and is
            estimated in each iteration.
        num_save: int
            Defines after how many iteration the output is saved.
        train_intervall: int
            Specifies after how many iterations the model is retrained.
        n_restarts: int
            How often the incumbent estimation is repeated.
        """

        super(BayesianOptimization,
              self).__init__(acquisition_func, model, maximize_func, task,
                             save_dir)
        self.start_time = time.time()

        if initial_design == None:
            self.initial_design = init_random_uniform
        else:
            self.initial_design = initial_design

        self.X = None
        self.Y = None
        self.time_func_eval = None
        self.time_overhead = None
        self.train_intervall = train_intervall

        self.num_save = num_save
        self.time_start = None

        self.model_untrained = True
        if incumbent_estimation is None:
            self.estimator = BestObservation(self.model, self.task.X_lower,
                                             self.task.X_upper)
        else:
            self.estimator = incumbent_estimation
        self.incumbent = None
        self.incumbents = []
        self.incumbent_values = []
        self.n_restarts = n_restarts
        self.init_points = initial_points
        self.runtime = []

    def run(self, num_iterations=10, X=None, Y=None):
        """
        The main Bayesian optimization loop

        Parameters
        ----------
        num_iterations: int
            The number of iterations
        X: np.ndarray(N,D)
            Initial points that are already evaluated
        Y: np.ndarray(N,1)
            Function values of the already evaluated points

        Returns
        -------
        np.ndarray(1,D)
            Incumbent
        np.ndarray(1,1)
            (Estimated) function value of the incumbent
        """
        # Save the time where we start the Bayesian optimization procedure
        self.time_start = time.time()

        if X is None and Y is None:
            self.time_func_eval = np.zeros([self.init_points])
            self.time_overhead = np.zeros([self.init_points])
            self.X = np.zeros([self.init_points, self.task.n_dims])
            self.Y = np.zeros([self.init_points, 1])

            init = self.initial_design(self.task.X_lower,
                                       self.task.X_upper,
                                       N=self.init_points)

            for i, x in enumerate(init):
                x = x[np.newaxis, :]

                logger.info("Evaluate: %s" % x)

                start_time = time.time()
                y = self.task.evaluate(x)

                self.X[i] = x[0, :]
                self.Y[i] = y[0, :]
                self.time_func_eval[i] = time.time() - start_time
                self.time_overhead[i] = 0.0

                logger.info("Configuration achieved a performance "
                            "of %f in %f seconds" %
                            (self.Y[i], self.time_func_eval[i]))

                # Use best point seen so far as incumbent
                best_idx = np.argmin(self.Y)
                self.incumbent = np.array([self.X[best_idx]])
                self.incumbent_value = np.array([self.Y[best_idx]])

                self.incumbents.append(self.incumbent)
                self.incumbent_values.append(self.incumbent_value)
                self.runtime.append(time.time() - self.start_time)

                if self.save_dir is not None and (i) % self.num_save == 0:
                    self.save_iteration(i,
                                        hyperparameters=None,
                                        acquisition_value=0)
                    self.save_json(i)

            #print self.X
            #print self.Y

        else:
            self.X = X
            self.Y = Y
            self.time_func_eval = np.zeros([self.X.shape[0]])
            self.time_overhead = np.zeros([self.X.shape[0]])
            self.init_points = X.shape[0]

            print X.shape, Y.shape

            for i in range(Y.shape[0]):
                print "Score:", Y[i][0], X[i]

#             best = np.argmin(Y)
#             incumbent = X[best]
#             incumbent_value = Y[best]
#             self.incumbents.append(incumbent[np.newaxis, :])
#             self.incumbent_values.append(incumbent_value[np.newaxis, :])
#             self.runtime.append(time.time() - self.start_time)

        it = self.init_points
        while it < num_iterations:
            self.acquisition_func.update_time(it)
            logger.info("Start iteration %d ... ", it)

            start_time = time.time()
            # Choose next point to evaluate
            if it % self.train_intervall == 0:
                do_optimize = True
            else:
                do_optimize = False

            try:
                new_x = self.choose_next(self.X, self.Y, do_optimize)

                # Estimate current incumbent
                start_time_inc = time.time()
                startpoints = init_random_uniform(self.task.X_lower,
                                                  self.task.X_upper,
                                                  self.n_restarts)
                self.incumbent, self.incumbent_value = \
                        self.estimator.estimate_incumbent(startpoints)

                self.incumbents.append(self.incumbent)
                self.incumbent_values.append(self.incumbent_value)

                logger.info(
                    "New incumbent %s found in %f seconds with "
                    "estimated performance %f", str(self.incumbent),
                    time.time() - start_time_inc, self.incumbent_value)

                time_overhead = time.time() - start_time
                self.time_overhead = np.append(self.time_overhead,
                                               np.array([time_overhead]))

                logger.info("Optimization overhead was %f seconds" %
                            (self.time_overhead[-1]))

                logger.info("Evaluate candidate %s" % (str(new_x)))
                start_time = time.time()
                new_y = self.task.evaluate(new_x)
                time_func_eval = time.time() - start_time
                self.time_func_eval = np.append(self.time_func_eval,
                                                np.array([time_func_eval]))

                logger.info("Configuration achieved a performance of %f " %
                            (new_y[0, 0]))

                logger.info(
                    "Evaluation of this configuration took %f seconds" %
                    (self.time_func_eval[-1]))

                # Update the data
                self.X = np.append(self.X, new_x, axis=0)
                self.Y = np.append(self.Y, new_y, axis=0)

                self.runtime.append(time.time() - self.start_time)

                if self.save_dir is not None and (it) % self.num_save == 0:
                    hypers = self.model.hypers
                    self.save_iteration(
                        it,
                        hyperparameters=hypers,
                        acquisition_value=self.acquisition_func(new_x))
                    self.save_json(it)

                it += 1
            except KeyboardInterrupt:
                raise Exception
            except:
                print "experiment failed, retrying"

        # TODO: Retrain model and then return the incumbent
        logger.info("Return %s as incumbent with predicted performance %f" %
                    (str(self.incumbent), self.incumbent_value))

        return self.incumbent, self.incumbent_value

    def choose_next(self, X=None, Y=None, do_optimize=True):
        """
        Suggests a new point to evaluate.

        Parameters
        ----------
        num_iterations: int
            The number of iterations
        X: np.ndarray(N,D)
            Initial points that are already evaluated
        Y: np.ndarray(N,1)
            Function values of the already evaluated points
        do_optimize: bool
            If true the hyperparameters of the model are
            optimized before the acquisition function is
            maximized.
        Returns
        -------
        np.ndarray(1,D)
            Suggested point
        """

        if X is None and Y is None:
            x = self.initial_design(self.task.X_lower, self.task.X_upper, N=1)

        elif X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            x = self.initial_design(self.task.X_lower, self.task.X_upper, N=1)
        else:
            try:
                logger.info("Train model...")
                t = time.time()
                self.model.train(X, Y, do_optimize=do_optimize)
                logger.info("Time to train the model: %f", (time.time() - t))
            except:
                logger.error("Model could not be trained", X, Y)
                raise
            self.model_untrained = False
            self.acquisition_func.update(self.model)

            logger.info("Maximize acquisition function...")
            t = time.time()
            x = self.maximize_func.maximize()

            logger.info("Time to maximize the acquisition function: %f", \
                        (time.time() - t))

        return x

    def get_json_data(self, it):
        '''

        Overrides method in BaseSolver.

        '''
        jsonData = dict()
        jsonData = {
            "optimization_overhead":
            None if self.time_overhead is None else self.time_overhead[it],
            "runtime":
            None if self.time_start is None else time.time() - self.time_start,
            "incumbent":
            None if self.incumbent is None else self.incumbent.tolist(),
            "incumbent_fval":
            None
            if self.incumbent_value is None else self.incumbent_value.tolist(),
            "time_func_eval":
            self.time_func_eval[it],
            "iteration":
            it
        }
        return jsonData
Ejemplo n.º 26
0
class LogEI(AcquisitionFunction):
    def __init__(self, model, X_lower, X_upper, par=0.01, **kwargs):
        r"""
        Computes for a given x the logarithm expected improvement as
        acquisition value.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(LogEI, self).__init__(model, X_lower, X_upper)

        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def update(self, model):
        """
        This method will be called if the model is updated.

        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(LogEI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the Log EI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned
            Not implemented yet!

        Returns
        -------
        np.ndarray(1,1)
            Log Expected Improvement of X
        np.ndarray(1,D)
            Derivative of Log Expected Improvement at X
            (only if derivative=True)
        """
        if derivative:
            logger.error("LogEI does not support derivative \
                calculation until now")
            return

        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            return np.array([[-np.finfo(np.float).max]])
        m, v = self.model.predict(X)

        _, eta = self.rec.estimate_incumbent(None)

        f_min = eta - self.par

        s = np.sqrt(v)

        z = (f_min - m) / s

        log_ei = np.zeros((m.size, 1))
        for i in range(0, m.size):
            mu, sigma = m[i], s[i]

            #    par_s = self.par * sigma

            # Degenerate case 1: first term vanishes
            if np.any(abs(f_min - mu)) == 0:
                if sigma > 0:
                    log_ei[i] = np.log(sigma) + norm.logpdf(z[i])
                else:
                    log_ei[i] = -np.Infinity
            # Degenerate case 2: second term vanishes and first term
            # has a special form.
            elif sigma == 0:
                if mu < np.any(f_min):
                    log_ei[i] = np.log(f_min - mu)
                else:
                    log_ei[i] = -np.Infinity
            # Normal case
            else:
                b = np.log(sigma) + norm.logpdf(z[i])
                # log(y+z) is tricky, we distinguish two cases:
                if np.any(f_min > mu):
                    # When y>0, z>0, we define a=ln(y), b=ln(z).
                    # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ],
                    # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|))
                    a = np.log(f_min - mu) + norm.logcdf(z[i])

                    log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a)))
                else:
                    # When y<0, z>0, we define a=ln(-y), b=ln(z),
                    # and it has to be true that b >= a in
                    # order to satisfy y+z>=0.
                    # Then y+z = exp[ b + ln(exp(b-a) -1) ],
                    # and thus log(y+z) = a + ln(exp(b-a) -1)
                    a = np.log(mu - f_min) + norm.logcdf(z[i])
                    if a >= b:
                        # a>b can only happen due to numerical inaccuracies
                        # or approximation errors
                        log_ei[i] = -np.Infinity
                    else:
                        log_ei[i] = b + np.log(1 - np.exp(a - b))

        return log_ei
Ejemplo n.º 27
0
    def test(self):
        X_lower = np.array([0])
        X_upper = np.array([1])
        X = init_random_uniform(X_lower, X_upper, 10)
        Y = np.sin(X)

        kernel = george.kernels.Matern52Kernel(np.ones([1]),
                                               ndim=1)

        prior = TophatPrior(-2, 2)
        model = GaussianProcess(kernel, prior=prior)
        model.train(X, Y)

        x_test = init_random_uniform(X_lower, X_upper, 3)

        # Shape matching predict
        m, v = model.predict(x_test)

        assert len(m.shape) == 2
        assert m.shape[0] == x_test.shape[0]
        assert m.shape[1] == 1
        assert len(v.shape) == 2
        assert v.shape[0] == x_test.shape[0]
        assert v.shape[1] == x_test.shape[0]

        #TODO: check gradients

        # Shape matching function sampling
        x_ = np.linspace(X_lower, X_upper, 10)
        x_ = x_[:, np.newaxis]
        funcs = model.sample_functions(x_, n_funcs=2)
        assert len(funcs.shape) == 2
        assert funcs.shape[0] == 2
        assert funcs.shape[1] == x_.shape[0]

        # Shape matching predict variance
        x_test1 = np.array([np.random.rand(1)])
        x_test2 = np.random.rand(10)[:, np.newaxis]
        var = model.predict_variance(x_test1, x_test2)
        assert len(var.shape) == 2
        assert var.shape[0] == x_test2.shape[0]
        assert var.shape[1] == 1

        # Check compatibility with all acquisition functions
        acq_func = EI(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = PI(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = LCB(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)

        acq_func = InformationGain(model,
                     X_upper=X_upper,
                     X_lower=X_lower)
        acq_func.update(model)
        acq_func(x_test)
        # Check compatibility with all incumbent estimation methods
        rec = BestObservation(model, X_lower, X_upper)
        inc, inc_val = rec.estimate_incumbent(None)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1

        rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper)
        startpoints = init_random_uniform(X_lower, X_upper, 4)
        inc, inc_val = rec.estimate_incumbent(startpoints)
        assert len(inc.shape) == 2
        assert inc.shape[0] == 1
        assert inc.shape[1] == X_upper.shape[0]
        assert len(inc_val.shape) == 2
        assert inc_val.shape[0] == 1
        assert inc_val.shape[1] == 1
Ejemplo n.º 28
0
class EI(BaseAcquisitionFunction):
    def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs):
        r"""
        Computes for a given x the expected improvement as
        acquisition value.
        :math:`EI(X) :=
            \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) -
                f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with
        :math:`f(X^+)` as the incumbent.

        Parameters
        ----------
        model: Model object
            A model that implements at least
                 - predict(X)
                 - getCurrentBestX().
            If you want to calculate derivatives than it should also support
                 - predictive_gradients(X)

        X_lower: np.ndarray (D)
            Lower bounds of the input space
        X_upper: np.ndarray (D)
            Upper bounds of the input space
        compute_incumbent: func
            A python function that takes as input a model and returns
            a np.array as incumbent
        par: float
            Controls the balance between exploration
            and exploitation of the acquisition function. Default is 0.01
        """

        super(EI, self).__init__(model, X_lower, X_upper)
        self.par = par
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def update(self, model):
        """
        This method will be called if the model is updated.
        Parameters
        ----------
        model : Model object
            Models the objective function.
        """

        super(EI, self).update(model)
        self.rec = BestObservation(self.model, self.X_lower, self.X_upper)

    def compute(self, X, derivative=False, **kwargs):
        """
        Computes the EI value and its derivatives.

        Parameters
        ----------
        X: np.ndarray(1, D), The input point where the acquisition function
            should be evaluate. The dimensionality of X is (N, D), with N as
            the number of points to evaluate at and D is the number of
            dimensions of one X.

        derivative: Boolean
            If is set to true also the derivative of the acquisition
            function at X is returned

        Returns
        -------
        np.ndarray(1,1)
            Expected Improvement of X
        np.ndarray(1,D)
            Derivative of Expected Improvement at X (only if derivative=True)
        """

        if X.shape[0] > 1:
            raise ValueError("EI is only for single test points")

        if len(X.shape) == 1:
            X = X[:, np.newaxis]

        if np.any(X < self.X_lower) or np.any(X > self.X_upper):
            if derivative:
                f = 0
                df = np.zeros((1, X.shape[1]))
                return np.array([[f]]), np.array([df])
            else:
                return np.array([[0]])

        m, v = self.model.predict(X)

        # Use the best seen observation as incumbent
        _, eta = self.rec.estimate_incumbent(None)

        s = np.sqrt(v)

        if (s == 0).any():
            f = np.array([[0]])
            df = np.zeros((1, X.shape[1]))

        else:
            z = (eta - m - self.par) / s
            #            f = (eta - m - self.par) * norm.cdf(z) + s * norm.pdf(z)
            f = s * (z * norm.cdf(z) + norm.pdf(z))

            if derivative:
                dmdx, ds2dx = self.model.predictive_gradients(X)
                dmdx = dmdx[0]
                ds2dx = ds2dx[0][:, None]
                dsdx = ds2dx / (2 * s)
                df = (-dmdx * norm.cdf(z) + (dsdx * norm.pdf(z))).T
            if (f < 0).any():
                logger.error("Expected Improvement is smaller than 0!")
                raise ValueError

        if derivative:
            return f, df
        else:
            return f