Beispiel #1
0
    def predict_variance(self, x1, X2):
        r"""
        Predicts the variance between a test points x1 and a set of points X2 by
           math: \sigma(X_1, X_2) = k_{X_1,X_2} - k_{X_1,X} * (K_{X,X}
                       + \sigma^2*\mathds{I})^-1 * k_{X,X_2})

        Parameters
        ----------
        x1: np.ndarray (1, D)
            First test point
        X2: np.ndarray (N, D)
            Set of test point
        Returns
        ----------
        np.array(N, 1)
            predictive variance between x1 and X2

        """

        if not self.is_trained:
            raise Exception('Model has to be trained first!')

        if self.normalize_input:
            x1_norm, _, _ = normalization.zero_one_normalization(x1, self.lower, self.upper)
            X2_norm, _, _ = normalization.zero_one_normalization(X2, self.lower, self.upper)
        else:
            x1_norm = x1
            X2_norm = X2

        x_ = np.concatenate((x1_norm, X2_norm))
        _, var = self.predict(x_, full_cov=True)

        var = var[-1, :-1, np.newaxis]

        return var
    def sample_functions(self, X_test, n_funcs=1):
        """
        Samples F function values from the current posterior at the N
        specified test points.
        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points
        n_funcs: int
            Number of function values that are drawn at each test point.
        Returns
        ----------
        function_samples: np.array(F, N)
            The F function values drawn at the N test points.
        """

        if self.normalize_input:
            X_test_norm, _, _ = normalization.zero_one_normalization(
                X_test, self.lower, self.upper)
        else:
            X_test_norm = X_test

        if not self.is_trained:
            raise Exception('Model has to be trained first!')

        funcs = self.gp.sample_conditional(self.y, X_test_norm, n_funcs)

        if self.normalize_output:
            funcs = normalization.zero_mean_unit_var_unnormalization(
                funcs, self.y_mean, self.y_std)

        if len(funcs.shape) == 1:
            return funcs[None, :]
        else:
            return funcs
Beispiel #3
0
    def sample_functions(self, X_test, n_funcs=1):
        """
        Samples F function values from the current posterior at the N
        specified test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points
        n_funcs: int
            Number of function values that are drawn at each test point.

        Returns
        ----------
        function_samples: np.array(F, N)
            The F function values drawn at the N test points.
        """

        if self.normalize_input:
            X_test_norm, _, _ = normalization.zero_one_normalization(X_test, self.lower, self.upper)
        else:
            X_test_norm = X_test

        if not self.is_trained:
            raise Exception('Model has to be trained first!')

        funcs = self.gp.sample_conditional(self.y, X_test_norm, n_funcs)

        if self.normalize_output:
            funcs = normalization.zero_mean_unit_var_unnormalization(funcs, self.y_mean, self.y_std)

        if len(funcs.shape) == 1:
            return funcs[None, :]
        else:
            return funcs
    def train(self, X, y, do_optimize=True):
        """
        Computes the Cholesky decomposition of the covariance of X and
        estimates the GP hyperparameters by optimizing the marginal
        loglikelihood. The prior mean of the GP is set to the empirical
        mean of X.

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true the hyperparameters are optimized otherwise
            the default hyperparameters of the kernel are used.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(
                X, self.lower, self.upper)
        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(
                y)
            if self.y_std == 0:
                raise ValueError(
                    "Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the empirical mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)

        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            self.hypers = self.optimize()
            self.gp.kernel[:] = self.hypers[:-1]
            self.noise = np.exp(self.hypers[-1])  # sigma^2
        else:
            self.hypers = self.gp.kernel[:]
            self.hypers = np.append(self.hypers, np.log(self.noise))

        logger.debug("GP Hyperparameters: " + str(self.hypers))

        try:
            self.gp.compute(self.X, yerr=np.sqrt(self.noise))
        except np.linalg.LinAlgError:
            self.noise *= 10
            self.gp.compute(self.X, yerr=np.sqrt(self.noise))

        self.is_trained = True
Beispiel #5
0
    def predict(self, X_test, **kwargs):
        r"""
        Returns the predictive mean and variance of the objective function
        at X average over all hyperparameter samples.
        The mean is computed by:
        :math \mu(x) = \frac{1}{M}\sum_{i=1}^{M}\mu_m(x)
        And the variance by:
        :math \sigma^2(x) = (\frac{1}{M}\sum_{i=1}^{M}(\sigma^2_m(x) + \mu_m(x)^2) - \mu^2

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """
        if not self.is_trained:
            raise Exception('Model has to be trained first!')

        if self.normalize_input:
            X_test_norm, _, _ = normalization.zero_one_normalization(
                X_test, self.lower, self.upper)
        else:
            X_test_norm = X_test

        mu = np.zeros([len(self.models), X_test_norm.shape[0]])
        var = np.zeros([len(self.models), X_test_norm.shape[0]])
        for i, model in enumerate(self.models):
            mu[i], var[i] = model.predict(X_test_norm)

        # See the Algorithm Runtime Prediction paper by Hutter et al.
        # for the derivation of the total variance
        m = mu.mean(axis=0)
        #v = np.mean(mu ** 2 + var) - m ** 2
        v = var.mean(axis=0)

        if self.normalize_output:
            m = normalization.zero_mean_unit_var_unnormalization(
                m, self.y_mean, self.y_std)

        # Clip negative variances and set them to the smallest
        # positive float value
        if v.shape[0] == 1:
            v = np.clip(v, np.finfo(v.dtype).eps, np.inf)
        else:
            v = np.clip(v, np.finfo(v.dtype).eps, np.inf)
            v[np.where((v < np.finfo(v.dtype).eps)
                       & (v > -np.finfo(v.dtype).eps))] = 0

        return m, v
    def test_zero_one_normalization(self):

        X = np.random.randn(100, 3)
        X_norm, lo, up = normalization.zero_one_normalization(X)

        assert X_norm.shape == X.shape
        assert np.min(X_norm) >= 0
        assert np.max(X_norm) <= 1
        assert lo.shape[0] == X.shape[1]
        assert up.shape[0] == X.shape[1]
Beispiel #7
0
    def train(self, X, y, do_optimize=True):
        """
        Computes the Cholesky decomposition of the covariance of X and
        estimates the GP hyperparameters by optimizing the marginal
        loglikelihood. The prior mean of the GP is set to the empirical
        mean of X.

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true the hyperparameters are optimized otherwise
            the default hyperparameters of the kernel are used.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper)
        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
            if self.y_std == 0:
                raise ValueError("Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the empirical mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)

        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            self.hypers = self.optimize()
            self.gp.kernel[:] = self.hypers[:-1]
            self.noise = np.exp(self.hypers[-1])  # sigma^2
        else:
            self.hypers = self.gp.kernel[:]
            self.hypers = np.append(self.hypers, np.log(self.noise))

        logger.debug("GP Hyperparameters: " + str(self.hypers))

        try:
            self.gp.compute(self.X, yerr=np.sqrt(self.noise))
        except np.linalg.LinAlgError:
            self.noise *= 10
            self.gp.compute(self.X, yerr=np.sqrt(self.noise))

        self.is_trained = True
Beispiel #8
0
    def predict(self, X_test, full_cov=False, **kwargs):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points
        full_cov: bool
            If set to true than the whole covariance matrix between the test points is returned

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,) or np.array(N, N) if full_cov == True
            predictive variance

        """

        if not self.is_trained:
            raise Exception('Model has to be trained first!')

        if self.normalize_input:
            X_test_norm, _, _ = normalization.zero_one_normalization(
                X_test, self.lower, self.upper)
        else:
            X_test_norm = X_test

        mu, var = self.gp.predict(self.y, X_test_norm)

        if self.normalize_output:
            mu = normalization.zero_mean_unit_var_unnormalization(
                mu, self.y_mean, self.y_std)
            var *= self.y_std**2
        if not full_cov:
            var = np.diag(var)

        # Clip negative variances and set them to the smallest
        # positive float value
        if var.shape[0] == 1:
            var = np.clip(var, np.finfo(var.dtype).eps, np.inf)
        else:
            var = np.clip(var, np.finfo(var.dtype).eps, np.inf)
            var[np.where((var < np.finfo(var.dtype).eps)
                         & (var > -np.finfo(var.dtype).eps))] = 0

        return mu, var
Beispiel #9
0
    def predict(self, X_test, full_cov=False, **kwargs):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points
        full_cov: bool
            If set to true than the whole covariance matrix between the test points is returned

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,) or np.array(N, N) if full_cov == True
            predictive variance

        """

        if not self.is_trained:
            raise Exception('Model has to be trained first!')

        if self.normalize_input:
            X_test_norm, _, _ = normalization.zero_one_normalization(X_test, self.lower, self.upper)
        else:
            X_test_norm = X_test

        mu, var = self.gp.predict(self.y, X_test_norm)

        if self.normalize_output:
            mu = normalization.zero_mean_unit_var_unnormalization(mu, self.y_mean, self.y_std)
            var *= self.y_std ** 2
        if not full_cov:
            var = np.diag(var)

        # Clip negative variances and set them to the smallest
        # positive float value
        if var.shape[0] == 1:
            var = np.clip(var, np.finfo(var.dtype).eps, np.inf)
        else:
            var = np.clip(var, np.finfo(var.dtype).eps, np.inf)
            var[np.where((var < np.finfo(var.dtype).eps) & (var > -np.finfo(var.dtype).eps))] = 0

        return mu, var
Beispiel #10
0
def transform(X, lower, upper):
    X_norm, _, _ = normalization.zero_one_normalization(
        X[:, :-1], lower, upper)
    X_norm = np.concatenate((X_norm, np.rint(X[:, None, -1])), axis=1)
    return X_norm
    def train(self, X, y, do_optimize=True, **kwargs):
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper)

        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
            if self.y_std == 0:
                raise ValueError("Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel.pars) + 1,
                                            self.loglikelihood)

            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = np.random.rand(self.n_hypers, len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            self.hypers = self.gp.kernel[:].tolist()
            self.hypers.append(self.noise)
            self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.pars = np.exp(sample[:-1])
            noise = np.exp(sample[-1])
            model = GaussianProcess(kernel,
                                    normalize_output=self.normalize_output,
                                    normalize_input=self.normalize_input,
                                    noise=noise,
                                    lower=self.lower,
                                    upper=self.upper,
                                    rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True
Beispiel #12
0
 def predict(self, X_test, **kwargs):
     X_test_norm, _, _ = normalization.zero_one_normalization(
         X_test[:, :-1], self.lower, self.upper)
     X_test_norm = np.concatenate((X_test_norm, X_test[:, None, -1]),
                                  axis=1)
     return super(MTBOGP, self).predict(X_test_norm, **kwargs)
Beispiel #13
0
    def train(self, X, y, do_optimize=True, **kwargs):

        X_norm, _, _ = normalization.zero_one_normalization(
            X[:, :-1], self.lower, self.upper)
        X_norm = np.concatenate((X_norm, X[:, None, -1]), axis=1)
        return super(MTBOGP, self).train(X_norm, y, do_optimize, **kwargs)
    def train(self, X, y, do_optimize=True, **kwargs):
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper)

        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
            if self.y_std == 0:
                raise ValueError("Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel.pars) + 1,
                                            self.loglikelihood)
            sampler.random_state = self.rng.get_state()
            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            self.hypers = self.gp.kernel[:].tolist()
            self.hypers.append(self.noise)
            self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.pars = np.exp(sample[:-1])
            noise = np.exp(sample[-1])
            model = GaussianProcess(kernel,
                                    normalize_output=self.normalize_output,
                                    normalize_input=self.normalize_input,
                                    noise=noise,
                                    lower=self.lower,
                                    upper=self.upper,
                                    rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True
Beispiel #15
0
 def normalize(self, X):
     X_norm, _, _ = normalization.zero_one_normalization(X[:, :-1], self.lower, self.upper)
     s_ = self.basis_function(X[:, -1])[:, None]
     X_norm = np.concatenate((X_norm, s_), axis=1)
     return X_norm
Beispiel #16
0
    def train(self, X, y, do_optimize=True, **kwargs):
        X_norm, _, _ = normalization.zero_one_normalization(X[:, :-1], self.lower, self.upper)
        s_ = self.basis_func(X[:, -1])[:, None]
        self.X = np.concatenate((X_norm, s_), axis=1)

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel.pars) + 1,
                                            self.loglikelihood)

            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = np.random.rand(self.n_hypers, len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            if self.hypers is None:
                self.hypers = self.gp.kernel[:].tolist()
                self.hypers.append(self.noise)
                self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.pars = np.exp(sample[:-1])
            noise = np.exp(sample[-1])
            model = FabolasGP(kernel,
                              basis_function=self.basis_func,
                              normalize_output=self.normalize_output,
                              noise=noise,
                              lower=self.lower,
                              upper=self.upper,
                              rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True
Beispiel #17
0
 def normalize(self, X):
     X_norm, _, _ = normalization.zero_one_normalization(X[:, :-1], self.lower, self.upper)
     s_ = self.basis_function(X[:, -1])[:, None]
     X_norm = np.concatenate((X_norm, s_), axis=1)
     return X_norm
Beispiel #18
0
    def train(self, X, y, do_optimize=True, **kwargs):
        X_norm, _, _ = normalization.zero_one_normalization(X[:, :-1], self.lower, self.upper)
        s_ = self.basis_func(X[:, -1])[:, None]
        self.X = np.concatenate((X_norm, s_), axis=1)

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel) + 1,
                                            self.loglikelihood)

            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = np.random.rand(self.n_hypers, len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            if self.hypers is None:
                self.hypers = self.gp.kernel[:].tolist()
                self.hypers.append(self.noise)
                self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            #kernel.set_parameter_vector(sample[:-1])
            kernel.pars = np.exp(sample[:-1])
            noise = np.exp(sample[-1])
            model = FabolasGP(kernel,
                              basis_function=self.basis_func,
                              normalize_output=self.normalize_output,
                              noise=noise,
                              lower=self.lower,
                              upper=self.upper,
                              rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True