def set_new_gp(self, noise_var=None):
     self.gp = GPRegression(
         input_dim=self.domain.d,
         kernel=self.kernel,
         noise_var=noise_var if noise_var else 2., # self.config.noise_var,
         calculate_gradients=self.config.calculate_gradients
     )
    def __init__(self, domain, always_calculate=False):
        super(BoringGP, self).__init__(domain)

        # passive projection matrix still needs to be created first!
        # print("WARNING: CONFIG MODE IS: ", config.DEV)
        self.burn_in_samples = 101  # 101 # 102
        self.recalculate_projection_every = 101
        self.active_projection_matrix = None
        self.passive_projection_matrix = None
        self.Q = None

        # some other parameters that are cached
        self.t = 0

        # Setting the datasaver (infrastructure which allows us to save the data to be projected again and again)
        placeholder_kernel = RBF(input_dim=self.domain.d)
        self.datasaver_gp = GPRegression(input_dim=self.domain.d,
                                         kernel=placeholder_kernel,
                                         noise_var=0.01,
                                         calculate_gradients=False)

        # Create a new kernel and create a new GP
        self.create_gp_and_kernels(self.domain.d, 0,
                                   first=True)  # self.domain.d - 2

        # Some post-processing
        self.kernel = self.kernel.copy()
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol
        )  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._bias = self.config.bias
        self.always_calculate = always_calculate
 def set_new_gp(self, noise_var=None):
     self.gp = GPRegression(
         input_dim=self.domain.d,
         kernel=self.kernel,
         noise_var=noise_var
         if noise_var else 2.,  # TODO: replace with config value!
         calculate_gradients=True  # TODO: replace with config value!
     )
Exemple #4
0
 def create_new_gp(self, noise_var=None):
     # Take over data from the old GP, if existent
     print("Creating a new gp!")
     self.gp = GPRegression(
         self.domain.d,
         self.kernel,
         noise_var=
         0.1,  # noise_var if noise_var is not None else self.config.noise_var,
         calculate_gradients=self.config.calculate_gradients)
 def create_new_gp(self, active_d, noise_var):
     # Take over data from the old GP, if existent
     print("Creating a new gp!")
     self.gp = GPRegression(
         active_d,
         self.kernel,
         noise_var=noise_var,  # noise_var if noise_var is not None else self.config.noise_var,
         calculate_gradients=False  # self.config.calculate_gradients
     )
Exemple #6
0
def run_optimization(X, Y, Y_noise, input_d):

    # TODO: Y_noise is notuuse!
    assert X.shape[1] == input_d, (X.shape, input_d)

    # Run the optimizer
    optimizer = TripathyOptimizer()
    W_hat, sn, l, s, d = optimizer.find_active_subspace(X, Y)

    # Spawn new kernel
    kernel = TripathyMaternKernel(
        real_dim=input_d,
        active_dim=d,
        W=W_hat,
        variance=s,
        lengthscale=l
    )

    # Spawn new GPRegression object
    gpreg = GPRegression(
        input_dim=input_d,
        kernel=kernel,
        noise_var=sn,
        calculate_gradients=True
    )

    # Return the optimized gpreg object
    return gpreg
    def create_gp(self):

        self.gp = GPRegression(input_dim=self.domain.d,
                               kernel=self.kernel,
                               noise_var=0.01,
                               calculate_gradients=False)

        # Let the GP take over datapoints from the datasaver!
        X = self.datasaver_gp.X
        Y = self.datasaver_gp.Y
        # Apply the Q transform if it was spawned already!
        if self.Q is not None:
            X = np.dot(X, self.Q)
        if self.Q is not None:
            assert X.shape[1] >= 2, ("Somehow, Q was not projected!", X.shape,
                                     2)  # TODO: change this back to ==!
        self.gp.set_XY(X, Y)
        self._update_cache()
Exemple #8
0
    def __init__(self, domain, calculate_always=False):
        super(BoringGP, self).__init__(domain)

        print("Starting tripathy model!")
        self.gp = None

        self.active_d = None
        self.W_hat = None
        self.variance = None
        self.lengthscale = None
        self.noise_var = None

        self.create_new_gp_and_kernel(
            active_d=self.domain.d if self.active_d is None else self.active_d,
            passive_d=0,
            W=np.eye(self.domain.d) if self.active_d is None else self.W,
            variance=1.0 if self.active_d is None else self.variance,
            lengtscale=1.5 if self.active_d is None else self.lengthscale,
            noise_var=None if self.active_d is None else self.noise_var,
        )

        # Create the datasaver GP
        placeholder_kernel = RBF(input_dim=self.domain.d)
        self.datasaver_gp = GPRegression(input_dim=self.domain.d,
                                         kernel=placeholder_kernel,
                                         noise_var=0.1,
                                         calculate_gradients=False)

        # number of data points
        self.t = 0
        self.i = 0
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol
        )  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._beta = 2
        self._bias = self.config.bias
        self.calculate_always = calculate_always

        self.optimizer = TripathyOptimizer()
class TripathyGP(ConfidenceBoundModel):
    """
    Base class for GP optimization.
    Handles common functionality.

    """

    def set_new_kernel(self, d, W=None, variance=None, lengthscale=None):
        self.kernel = TripathyMaternKernel(
            real_dim=self.domain.d,
            active_dim=d,
            W=W,
            variance=variance,
            lengthscale=lengthscale
        )

    def set_new_gp(self, noise_var=None):
        self.gp = GPRegression(
            input_dim=self.domain.d,
            kernel=self.kernel,
            noise_var=noise_var if noise_var else 2., # self.config.noise_var,
            calculate_gradients=self.config.calculate_gradients
        )

    def set_new_gp_and_kernel(self, d, W, variance, lengthscale, noise_var):
        self.set_new_kernel(d, W, variance, lengthscale)
        self.set_new_gp(noise_var)

    def __init__(self, domain, calculate_always=False):
        super(TripathyGP, self).__init__(domain)

        self.optimizer = TripathyOptimizer()

        # TODO: d is chosen to be an arbitrary value rn!
        self.set_new_gp_and_kernel(2, None, None, None, None)

        # number of data points
        self.t = 0
        self.kernel = self.kernel.copy()
        self._woodbury_chol = np.asfortranarray(self.gp.posterior._woodbury_chol)  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0,1))
        self._beta = 2
        self._bias = self.config.bias
        self.calculate_always = calculate_always

    @property
    def beta(self):
        return self._beta

    @property
    def scale(self):
        if self.gp.kern.name == 'sum':
            return sum([part.variance for part in self.gp.kern.parts])
        else:
            return np.sqrt(self.gp.kern.variance)

    @property
    def bias(self):
        return self._bias

    def _get_gp(self):
        return GPRegression(self.domain.d, self.kernel, noise_var=self.config.noise_var, calculate_gradients=self.config.calculate_gradients)

    def add_data(self, x, y):
        """
        Add a new function observation to the GPs.
        Parameters
        ----------
        x: 2d-array
        y: 2d-array
        """
        self.i = 1 if not ("i" in dir(self)) else self.i + 1
        # print("Add data ", self.i)
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)

        self.set_data(x, y, append=True)

    # TODO: check if this is called anyhow!
    def optimize(self):
        self._update_beta()

    def _update_cache(self):
        # if not self.config.calculate_gradients:
        self._woodbury_chol = np.asfortranarray(self.gp.posterior._woodbury_chol)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()

        self._update_beta()

    def _optimize_bias(self):
        self._bias = minimize(self._bias_loss, self._bias, method='L-BFGS-B')['x'].copy()
        self._set_bias(self._bias)
        logger.info(f"Updated bias to {self._bias}")

    def _bias_loss(self, c):
        # calculate mean and norm for new bias via a new woodbury_vector
        new_woodbury_vector,_= dpotrs(self._woodbury_chol, self._Y - c, lower=1)
        K = self.gp.kern.K(self.gp.X)
        mean = np.dot(K, new_woodbury_vector)
        norm = new_woodbury_vector.T.dot(mean)
        # loss is least_squares_error + norm
        return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm)

    def _set_bias(self, c):
        self._bias = c
        self.gp.set_Y(self._Y - c)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()

    def _update_beta(self):
        logdet = self._get_logdet()
        logdet_priornoise = self._get_logdet_prior_noise()
        self._beta = np.sqrt(2 * np.log(1/self.delta) + (logdet - logdet_priornoise)) + self._norm()

    def _optimize_var(self):
        # fix all parameters
        for p in self.gp.parameters:
            p.fix()

        if self.gp.kern.name == 'sum':
            for part in self.gp.kern.parts:
                part.variance.unfix()
        else:
            self.gp.kern.variance.unfix()
        self.gp.optimize()
        if self.gp.kern.name == 'sum':
            values = []
            for part in self.gp.kern.parts:
                values.append(np.asscalar(part.variance.values))
        else:
            values = np.asscalar(self.gp.kern.variance.values)

        logger.info(f"Updated prior variance to {values}")
        # unfix all parameters
        for p in self.gp.parameters:
            p.unfix()

    def _get_logdet(self):
        return 2.*np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol)))

    def _get_logdet_prior_noise(self):
        return self.t * np.log(self.gp.likelihood.variance.values)

    def mean_var(self, x):
        """Recompute the confidence intervals form the GP.
        Parameters
        ----------
        context: ndarray
            Array that contains the context used to compute the sets
        """
        x = np.atleast_2d(x)

        if self.config.calculate_gradients or True:
            mean,var = self.gp.predict_noiseless(x)
        else:
            mean,var = self._raw_predict(x)

        return mean + self._bias, var

    def mean_var_grad(self, x):
        return self.gp.predictive_gradients(x)

    def var(self, x):
        return self.mean_var(x)[1]

    def mean(self, x):
        return self.mean_var(x)[0]

    def set_data(self, X, Y, append=True):
        if append:
            X = np.concatenate((self.gp.X, X))
            Y = np.concatenate((self.gp.Y, Y))

        # Do our optimization now
        if self.i % 100 == 99 or self.calculate_always:
            import time
            start_time = time.time()
            print("Adding data: ", self.i)

            # TODO: UNCOMMENT THE FOLLOWING LINE AGAIN!
            # This is just to check if tripathy conforms with the other version
            # W_hat, sn, l, s, d = self.optimizer.find_active_subspace(X, Y)
            d = 2
            W_hat = np.asarray([
                [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912],
                [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551]
            ]).T
            s = 1.
            l = 1.5
            sn = 2. # 0.01 #self.config.noise_var

            print("--- %s seconds ---" % (time.time() - start_time))

            # Overwrite GP and kernel values
            self.set_new_gp_and_kernel(d=d, W=W_hat, variance=s, lengthscale=l, noise_var=sn)

        # TODO: Should the following not come before the optimization?

        self.gp.set_XY(X, Y)
        self.t = X.shape[0]
        self._update_cache()

    def sample(self, X=None):
        class GPSampler:
            def __init__(self, X, Y, kernel, var):
                self.X = X
                self.Y = Y
                self.N = var * np.ones(shape=Y.shape)
                self.kernel = kernel
                self.m = GPy.models.GPHeteroscedasticRegression(self.X, self.Y, self.kernel)
                self.m['.*het_Gauss.variance'] = self.N

            def __call__(self, X):
                X = np.atleast_2d(X)
                sample = np.empty(shape=(X.shape[0], 1))

                # iteratively generate sample values for all x in x_test
                for i, x in enumerate(X):
                    sample[i] = self.m.posterior_samples_f(x.reshape((1, -1)), size=1)

                    # add observation as without noise
                    self.X = np.vstack((self.X, x))
                    self.Y = np.vstack((self.Y, sample[i]))
                    self.N = np.vstack((self.N, 0))

                    # recalculate model
                    self.m = GPy.models.GPHeteroscedasticRegression(self.X, self.Y, self.kernel)
                    self.m['.*het_Gauss.variance'] = self.N  # Set the noise parameters to the error in Y

                return sample

        return GPSampler(self.gp.X.copy(), self.gp.Y.copy(), self.kernel, self.gp.likelihood.variance)

    def _raw_predict(self, Xnew):

        Kx = self.kernel.K(self._X, Xnew)
        mu = np.dot(Kx.T, self._woodbury_vector)

        if len(mu.shape)==1:
            mu = mu.reshape(-1,1)

        Kxx = self.kernel.Kdiag(Xnew)
        tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0]
        var = (Kxx - np.square(tmp).sum(0))[:,None]
        return mu, var

    def _raw_predict_covar(self, Xnew, Xcond):
        Kx = self.kernel.K(self._X, np.vstack((Xnew,Xcond)))
        tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0]

        n = Xnew.shape[0]
        tmp1 = tmp[:,:n]
        tmp2 = tmp[:,n:]

        Kxx = self.kernel.K(Xnew, Xcond)
        var = Kxx - (tmp1.T).dot(tmp2)

        Kxx_new = self.kernel.Kdiag(Xnew)
        var_Xnew = (Kxx_new - np.square(tmp1).sum(0))[:,None]
        return var_Xnew, var

    def _norm(self):
        norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(self._woodbury_vector)
        return np.asscalar(np.sqrt(norm))

    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict['gp'] # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky
        return self_dict
 def _get_gp(self):
     return GPRegression(self.domain.d, self.kernel, noise_var=self.config.noise_var, calculate_gradients=self.config.calculate_gradients)
Exemple #11
0
class BoringGP(ConfidenceBoundModel):
    """
    Base class for GP optimization.
    Handles common functionality.

    """
    def create_new_kernel(self,
                          active_d,
                          passive_d=0,
                          W=None,
                          variance=None,
                          lengthscale=None):
        print("Creating a new kernel!")
        self.kernel = Matern32(input_dim=active_d,
                               variance=variance,
                               lengthscale=lengthscale,
                               ARD=True,
                               active_dims=np.arange(active_d),
                               name="active_subspace_kernel")

        for i in range(passive_d):
            cur_kernel = Matern32(input_dim=1,
                                  variance=variance,
                                  lengthscale=1.,
                                  ARD=True,
                                  active_dims=[active_d + i],
                                  name="passive_subspace_kernel_" + str(i))
            self.kernel += cur_kernel

        print("New resulting kernel is: ", self.kernel)

    def create_new_gp(self, noise_var=None):
        # Take over data from the old GP, if existent
        print("Creating a new gp!")
        self.gp = GPRegression(
            self.domain.d,
            self.kernel,
            noise_var=
            0.1,  # noise_var if noise_var is not None else self.config.noise_var,
            calculate_gradients=self.config.calculate_gradients)

    def create_new_gp_and_kernel(self, active_d, passive_d, W, variance,
                                 lengtscale, noise_var):
        self.create_new_kernel(active_d=active_d,
                               passive_d=passive_d,
                               W=W,
                               variance=variance,
                               lengthscale=lengtscale)
        self.create_new_gp(noise_var=noise_var)
        print("Got kernel: ")
        print(self.kernel)

    def __init__(self, domain, calculate_always=False):
        super(BoringGP, self).__init__(domain)

        print("Starting tripathy model!")
        self.gp = None

        self.active_d = None
        self.W_hat = None
        self.variance = None
        self.lengthscale = None
        self.noise_var = None

        self.create_new_gp_and_kernel(
            active_d=self.domain.d if self.active_d is None else self.active_d,
            passive_d=0,
            W=np.eye(self.domain.d) if self.active_d is None else self.W,
            variance=1.0 if self.active_d is None else self.variance,
            lengtscale=1.5 if self.active_d is None else self.lengthscale,
            noise_var=None if self.active_d is None else self.noise_var,
        )

        # Create the datasaver GP
        placeholder_kernel = RBF(input_dim=self.domain.d)
        self.datasaver_gp = GPRegression(input_dim=self.domain.d,
                                         kernel=placeholder_kernel,
                                         noise_var=0.1,
                                         calculate_gradients=False)

        # number of data points
        self.t = 0
        self.i = 0
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol
        )  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._beta = 2
        self._bias = self.config.bias
        self.calculate_always = calculate_always

        self.optimizer = TripathyOptimizer()

    # Obligatory values
    @property
    def beta(self):
        return self._beta

    @property
    def scale(self):
        if self.gp.kern.name == 'sum':
            return sum([part.variance for part in self.gp.kern.parts])
        else:
            return np.sqrt(self.gp.kern.variance)

    @property
    def bias(self):
        return self._bias

    def _get_gp(self):
        return self.gp

    def add_data(self, x, y):
        """
        Add a new function observation to the GPs.
        Parameters
        ----------
        x: 2d-array
        y: 2d-array
        """
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)

        assert x.shape[
            1] == self.domain.d, "Input dimension is not the one of the domain!"

        self.i += 1

        self.set_data(x, y, append=True)

    def optimize(self):
        self._update_beta()

    def _update_cache(self):
        # if not self.config.calculate_gradients:
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()  # TODO: should it be gp, or datasaver_gp?

        self._update_beta()

    def _update_beta(self):
        logdet = self._get_logdet()
        logdet_priornoise = self._get_logdet_prior_noise()
        self._beta = np.sqrt(2 * np.log(1 / self.delta) +
                             (logdet - logdet_priornoise)) + self._norm()

    def _get_logdet(self):
        return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol)))

    def _get_logdet_prior_noise(self):
        return self.t * np.log(self.gp.likelihood.variance.values)

    def mean_var(self, x):
        """Recompute the confidence intervals form the GP.
        Parameters
        ----------
        context: ndarray
            Array that contains the context used to compute the sets
        """
        x = np.atleast_2d(x)

        # Need to project x to the matrix(
        if self.W_hat is not None:
            x = np.dot(x, self.W_hat)

        if self.config.calculate_gradients and False:  # or True:
            mean, var = self.gp.predict_noiseless(x)
        else:
            mean, var = self._raw_predict(x)

        return mean + self._bias, var

    def var(self, x):
        return self.mean_var(x)[1]

    def mean(self, x):
        return self.mean_var(x)[0]

    # TODO: Implement the thing finder in here!
    def set_data(self, X, Y, append=True):
        if append:
            X = np.concatenate((self.datasaver_gp.X, X), axis=0)
            Y = np.concatenate((self.datasaver_gp.Y, Y), axis=0)
        self._set_datasaver_data(X, Y)

        if self.i % 500 == 100 or self.calculate_always:

            print("Adding datapoint: ", self.i)

            # print("Datasaver X is: ")
            # print(self.datasaver_gp.X)
            #
            # print("Datasaver Y is: ")
            # print(self.datasaver_gp.Y)
            #
            # print("That's it")
            # exit(0)

            self.A, self.noise_var, self.lengthscale, self.variance, self.active_d = self.optimizer.find_active_subspace(
                X, Y, load=False)

            gc.collect()

            passive_dimensions = max(self.domain.d - self.active_d, 0)
            passive_dimensions = min(passive_dimensions, 1)

            # Generate the subspace projection
            # Generate A^{bot} if there's more dimensions
            if passive_dimensions > 0:
                self.AT = generate_orthogonal_matrix_to_A(A=self.A,
                                                          n=passive_dimensions)
                self.W_hat = np.concatenate((self.A, self.AT), axis=1)
            else:
                self.AT = None
                self.W_hat = self.A

            assert not np.isnan(
                self.W_hat).all(), ("The projection matrix contains nan's!",
                                    self.Q)
            assert self.W_hat.shape == (self.domain.d,
                                        self.active_d + passive_dimensions), (
                                            "Created wrong projectoin shape: ",
                                            self.At.shape, self.active_d,
                                            passive_dimensions)

            print("Found parameters are: ")
            print("W: ", self.W_hat)
            print("noise_var: ", self.noise_var)
            print("lengthscale: ", self.lengthscale)
            print("variance: ", self.variance)

            # For the sake of creating a kernel with new dimensions!
            self.create_new_gp_and_kernel(active_d=self.active_d,
                                          passive_d=passive_dimensions,
                                          W=self.W_hat,
                                          variance=self.variance,
                                          lengtscale=self.lengthscale,
                                          noise_var=self.noise_var)

        if self.W_hat is None:
            self._set_data(X, Y)
        else:
            Z = np.dot(X, self.W_hat)
            self._set_data(Z, Y)

        # self.gp.optimize()

    def _set_datasaver_data(self, X, Y):
        self.datasaver_gp.set_XY(X, Y)

    def _set_data(self, X, Y):
        self.gp.set_XY(X, Y)
        self.t = X.shape[0]
        self._update_cache()

    def _raw_predict(self, Xnew):

        Kx = self.kernel.K(self._X, Xnew)
        mu = np.dot(Kx.T, self._woodbury_vector)

        if len(mu.shape) == 1:
            mu = mu.reshape(-1, 1)

        Kxx = self.kernel.Kdiag(Xnew)
        tmp = lapack.dtrtrs(self._woodbury_chol,
                            Kx,
                            lower=1,
                            trans=0,
                            unitdiag=0)[0]
        var = (Kxx - np.square(tmp).sum(0))[:, None]
        return mu, var

    def _norm(self):
        norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(
            self._woodbury_vector)
        return np.asscalar(np.sqrt(norm))

    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict[
            'gp']  # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky
        return self_dict
    def __init__(self, domain, calculate_always=False):
        super(TripathyGP, self).__init__(domain)

        print("Starting tripathy model!")
        self.gp = None

        # Just for completeness
        # self.active_d = None
        # self.W_hat = None
        # self.variance = None
        # self.lengthscale = None
        # self.noise_var = None

        # DEFAULT SETTINGS
        self.W_hat = np.eye(self.domain.d)
        # print(self.config.kernels[0][1])
        self.noise_var = 0.005
        self.lengthscale = 2.5
        self.variance = 1.0
        self.active_d = self.domain.d

        self.W_hat = np.asarray([
            [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912],
            [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551]
        ])
        self.noise_var = 0.005
        self.lengthscale = 2.5
        self.variance = 1.0
        self.active_d = 2

        self.create_new_gp_and_kernel(
            active_d=self.active_d,
            variance=self.variance,
            lengthscale=self.lengthscale,
            noise_var=self.noise_var
        )

        # Create the datasaver GP
        placeholder_kernel = RBF(
            input_dim=self.domain.d
        )
        self.datasaver_gp = GPRegression(
            input_dim=self.domain.d,
            kernel=placeholder_kernel,
            noise_var=self.noise_var,
            calculate_gradients=False
        )

        # JOHANNES: Die folgenden Operationen habe ich übernommen aus dem febo GP

        # number of data points
        self.t = 0
        self.i = 0
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol)  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._beta = 2
        self._bias = self.config.bias
        self.calculate_always = calculate_always

        self.optimizer = TripathyOptimizer()
class TripathyGP(ConfidenceBoundModel):
    """
    Base class for GP optimization.
    Handles common functionality.

    """

    # JOHANNES: Die folgenden drei funktionen
    # sind helper functions welche den kernel und gp neu-spawnend, da wir das später noch einmal machen werden müssen

    def create_new_kernel(self, active_d, variance, lengthscale):
        print("Creating a new kernel!")
        self.kernel = Matern32(
            input_dim=active_d,
            variance=variance,
            lengthscale=lengthscale,
            ARD=True,
            active_dims=np.arange(active_d),
            name="active_subspace_kernel"
        )
        print("Kernel is: ", self.kernel)

    def create_new_gp(self, active_d, noise_var):
        # Take over data from the old GP, if existent
        print("Creating a new gp!")
        self.gp = GPRegression(
            active_d,
            self.kernel,
            noise_var=noise_var,  # noise_var if noise_var is not None else self.config.noise_var,
            calculate_gradients=False  # self.config.calculate_gradients
        )

    def create_new_gp_and_kernel(self, active_d, variance, lengthscale, noise_var):
        self.create_new_kernel(
            active_d=active_d,
            variance=variance,
            lengthscale=lengthscale
        )
        self.create_new_gp(
            active_d=active_d,
            noise_var=noise_var
        )
        print("Got kernel: ")
        print(self.kernel)

    def __init__(self, domain, calculate_always=False):
        super(TripathyGP, self).__init__(domain)

        print("Starting tripathy model!")
        self.gp = None

        # Just for completeness
        # self.active_d = None
        # self.W_hat = None
        # self.variance = None
        # self.lengthscale = None
        # self.noise_var = None

        # DEFAULT SETTINGS
        self.W_hat = np.eye(self.domain.d)
        # print(self.config.kernels[0][1])
        self.noise_var = 0.005
        self.lengthscale = 2.5
        self.variance = 1.0
        self.active_d = self.domain.d

        self.W_hat = np.asarray([
            [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912],
            [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551]
        ])
        self.noise_var = 0.005
        self.lengthscale = 2.5
        self.variance = 1.0
        self.active_d = 2

        self.create_new_gp_and_kernel(
            active_d=self.active_d,
            variance=self.variance,
            lengthscale=self.lengthscale,
            noise_var=self.noise_var
        )

        # Create the datasaver GP
        placeholder_kernel = RBF(
            input_dim=self.domain.d
        )
        self.datasaver_gp = GPRegression(
            input_dim=self.domain.d,
            kernel=placeholder_kernel,
            noise_var=self.noise_var,
            calculate_gradients=False
        )

        # JOHANNES: Die folgenden Operationen habe ich übernommen aus dem febo GP

        # number of data points
        self.t = 0
        self.i = 0
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol)  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._beta = 2
        self._bias = self.config.bias
        self.calculate_always = calculate_always

        self.optimizer = TripathyOptimizer()
        # self.set_data(self._X, self._Y)

    # Obligatory values
    @property
    def beta(self):
        return self._beta

    @property
    def scale(self):
        if self.gp.kern.name == 'sum':
            return sum([part.variance for part in self.gp.kern.parts])
        else:
            return np.sqrt(self.gp.kern.variance)

    @property
    def bias(self):
        return self._bias

    def _get_gp(self):
        return self.gp

    def add_data(self, x, y):
        """
        Add a new function observation to the GPs.
        Parameters
        ----------
        x: 2d-array
        y: 2d-array
        """
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)

        assert x.shape[1] == self.domain.d, "Input dimension is not the one of the domain!"

        self.i += 1

        self.set_data(x, y, append=True)

    def optimize(self):
        self._update_beta()

    def _update_cache(self):
        # if not self.config.calculate_gradients:
        self._woodbury_chol = np.asfortranarray(self.gp.posterior._woodbury_chol)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()  # TODO: should it be gp, or datasaver_gp?

        self._update_beta()

    def _update_beta(self):
        logdet = self._get_logdet()
        logdet_priornoise = self._get_logdet_prior_noise()
        self._beta = np.sqrt(2 * np.log(1 / self.delta) + (logdet - logdet_priornoise)) + self._norm()

    def _get_logdet(self):
        return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol)))

    def _get_logdet_prior_noise(self):
        return self.t * np.log(self.gp.likelihood.variance.values)

    def mean_var(self, x):
        """Recompute the confidence intervals form the GP.
        Parameters
        ----------
        context: ndarray
            Array that contains the context used to compute the sets
        """
        x = np.atleast_2d(x)

        x = np.dot(x, self.W_hat.T)
        assert x.shape[1] == self.active_d, (
            "The projected dimension does not equal to the active dimension: ", (self.active_d, x.shape))

        if self.config.calculate_gradients and False:  # or True:
            mean, var = self.gp.predict_noiseless(x)
        else:
            mean, var = self._raw_predict(x)

        return mean + self._bias, var

    def var(self, x):
        return self.mean_var(x)[1]

    def mean(self, x):
        return self.mean_var(x)[0]

    # TODO: Implement the thing finder in here!
    def set_data(self, X, Y, append=True):
        if append:
            X = np.concatenate((self.datasaver_gp.X, X), axis=0)
            Y = np.concatenate((self.datasaver_gp.Y, Y), axis=0)
        self._set_datasaver_data(X, Y)

        if self.i % 500 == 1 or self.calculate_always:
            print("Adding datapoint: ", self.i)

            ####################
            # PRETRAINED VALUES
            ####################

            # CAMELBACK
            # self.W_hat = np.asarray(
            #     [[-0.33867927, -0.46107057],
            #      [0.45801778, 0.2080514],
            #      [0.26060095, 0.65276822],
            #      [0.56757381, -0.28423894],
            #      [0.53428755, -0.48706305]
            #      ]).T
            # self.noise_var = 0.005
            # self.lengthscale = np.asarray([1.5, 0.5])
            # self.variance = 44.0
            # self.active_d = 2

            #############
            # REAL VALUES
            #############
            if self.domain.d == 2:
                self.W_hat = np.asarray([
                    [-0.46375963, -0.88596106],
                    [-0.88596106, 0.46375963]
                ])
                self.noise_var = 0.005
                self.lengthscale = 2.5
                self.variance = 1.0
                self.active_d = 2

            elif self.domain.d == 3:
                # CAMELBACK-5D
                self.W_hat = np.asarray([
                    [-0.46554187, -0.36224966, 0.80749362],
                    [0.69737806, -0.711918, 0.08268378]
                ])
                self.noise_var = 0.005
                self.lengthscale = 2.5
                self.variance = 1.0
                self.active_d = 2

            elif self.domain.d == 4:
                # CAMELBACK-4D
                self.W_hat = np.asarray([
                    [-0.50445148, -0.40016722, -0.48737089, -0.58980041],
                    [-0.20042413, -0.65288502, -0.12700055, 0.71933454]
                ])
                self.noise_var = 0.005
                self.lengthscale = 2.5
                self.variance = 1.0
                self.active_d = 2

            elif self.domain.d == 5:
                # CAMELBACK-3D
                self.W_hat = np.asarray([
                    [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912],
                    [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551]
                ])
                self.noise_var = 0.005
                self.lengthscale = 2.5
                self.variance = 1.0
                self.active_d = 2

            else:
                print("Something went terribly wrong!")
                exit(0)

            # self.W_hat, self.noise_var, self.lengthscale, self.variance, self.active_d = self.optimizer.find_active_subspace(
            #     X, Y, load=False)

            gc.collect()

            print("USING CAMELBACK FUNCTION IN HIGHER D ::: ", self.domain.d)

            print("Found parameters are: ")
            print("W: ", self.W_hat)
            print("noise_var: ", self.noise_var)
            print("lengthscale: ", self.lengthscale)
            print("variance: ", self.variance)

            # For the sake of creating a kernel with new dimensions!
            self.create_new_gp_and_kernel(
                active_d=self.active_d,
                variance=self.variance,
                lengthscale=self.lengthscale,
                noise_var=self.noise_var
            )

        if self.i % 500 == 299:

            print("TRIPATHY :: Likelihood of the current GP is: ", self.gp.log_likelihood())

        assert X.shape[1] == self.W_hat.shape[1], (X.shape, self.W_hat.shape)
        # print(X.shape, self.W_hat.shape)
        Z = np.dot(X, self.W_hat.T)
        assert Z.shape[1] == self.active_d, (
            "Projected Z does not conform to active dimension", (Z.shape, self.active_d))
        self._set_data(Z, Y)

    def _set_datasaver_data(self, X, Y):
        self.datasaver_gp.set_XY(X, Y)

    def _set_data(self, X, Y):
        self.gp.set_XY(X, Y)
        self.t = X.shape[0]
        self._update_cache()

    def _raw_predict(self, Xnew):

        assert Xnew.shape[1] == self.active_d, ("Somehow, the input was not project")

        Kx = self.kernel.K(self._X, Xnew)
        mu = np.dot(Kx.T, self._woodbury_vector)

        if len(mu.shape) == 1:
            mu = mu.reshape(-1, 1)

        Kxx = self.kernel.Kdiag(Xnew)
        tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0]
        var = (Kxx - np.square(tmp).sum(0))[:, None]
        return mu, var

    def _norm(self):
        norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(self._woodbury_vector)
        return np.asscalar(np.sqrt(norm))

    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict[
            'gp']  # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky
        return self_dict
class ClassicalActiveSubspaceGP(ConfidenceBoundModel):
    """
    Base class for GP optimization.
    Handles common functionality.

    """
    def set_new_kernel(self, d, W=None, variance=None, lengthscale=None):
        self.kernel = TripathyMaternKernel(real_dim=self.domain.d,
                                           active_dim=d,
                                           W=W,
                                           variance=variance,
                                           lengthscale=lengthscale)

    def set_new_gp(self, noise_var=None):
        self.gp = GPRegression(
            input_dim=self.domain.d,
            kernel=self.kernel,
            noise_var=noise_var
            if noise_var else 2.,  # TODO: replace with config value!
            calculate_gradients=True  # TODO: replace with config value!
        )

    def set_new_gp_and_kernel(self, d, W, variance, lengthscale, noise_var):
        self.set_new_kernel(d, W, variance, lengthscale)
        self.set_new_gp(noise_var)

    #         # from .t_kernel import TripathyMaternKernel
    #         TripathyMaternKernel.__module__ = "tripathy.src.t_kernel"

    def __init__(self, domain):
        super(ClassicalActiveSubspaceGP, self).__init__(domain)

        self.optimizer = TripathyOptimizer()

        # TODO: d is chosen to be an arbitrary value rn!
        # self.set_new_kernel(2, None, None)
        # self.set_new_gp(None)
        self.set_new_gp_and_kernel(2, None, None, None, None)

        # calling of the kernel
        # self.gp = self._get_gp() # TODO: does this actually create a new gp?
        # number of data points
        self.t = 0
        self.kernel = self.kernel.copy()
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol
        )  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._beta = 2
        self._bias = self.config.bias

    @property
    def beta(self):
        return self._beta

    @property
    def scale(self):
        if self.gp.kern.name == 'sum':
            return sum([part.variance for part in self.gp.kern.parts])
        else:
            return np.sqrt(self.gp.kern.variance)

    @property
    def bias(self):
        return self._bias

    def _get_gp(self):
        return GPRegression(
            self.domain.d,
            self.kernel,
            noise_var=self.config.noise_var,
            calculate_gradients=self.config.calculate_gradients)

    def add_data(self, x, y):
        """
        Add a new function observation to the GPs.
        Parameters
        ----------
        x: 2d-array
        y: 2d-array
        """
        self.i = 1 if not ("i" in dir(self)) else self.i + 1
        print("Add data ", self.i)
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)

        self.set_data(x, y, append=True)

    # TODO: check if this is called anyhow!
    def optimize(self):
        # if self.config.optimize_bias:
        #     self._optimize_bias()
        # if self.config.optimize_var:
        #     self._optimize_var()

        # self.optimizer.find_active_subspace(self.X, self.Y)

        self._update_beta()

    def _update_cache(self):
        # if not self.config.calculate_gradients:
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()

        self._update_beta()

    def _optimize_bias(self):
        self._bias = minimize(self._bias_loss, self._bias,
                              method='L-BFGS-B')['x'].copy()
        self._set_bias(self._bias)
        logger.info(f"Updated bias to {self._bias}")

    def _bias_loss(self, c):
        # calculate mean and norm for new bias via a new woodbury_vector
        new_woodbury_vector, _ = dpotrs(self._woodbury_chol,
                                        self._Y - c,
                                        lower=1)
        K = self.gp.kern.K(self.gp.X)
        mean = np.dot(K, new_woodbury_vector)
        norm = new_woodbury_vector.T.dot(mean)
        # loss is least_squares_error + norm
        return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm)

    def _set_bias(self, c):
        self._bias = c
        self.gp.set_Y(self._Y - c)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()

    def _update_beta(self):
        logdet = self._get_logdet()
        logdet_priornoise = self._get_logdet_prior_noise()
        self._beta = np.sqrt(2 * np.log(1 / self.delta) +
                             (logdet - logdet_priornoise)) + self._norm()

    def _optimize_var(self):
        # fix all parameters
        for p in self.gp.parameters:
            p.fix()

        if self.gp.kern.name == 'sum':
            for part in self.gp.kern.parts:
                part.variance.unfix()
        else:
            self.gp.kern.variance.unfix()
        self.gp.optimize()
        if self.gp.kern.name == 'sum':
            values = []
            for part in self.gp.kern.parts:
                values.append(np.asscalar(part.variance.values))
        else:
            values = np.asscalar(self.gp.kern.variance.values)

        logger.info(f"Updated prior variance to {values}")
        # unfix all parameters
        for p in self.gp.parameters:
            p.unfix()

    def _get_logdet(self):
        return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol)))

    def _get_logdet_prior_noise(self):
        return self.t * np.log(self.gp.likelihood.variance.values)

    def mean_var(self, x):
        """Recompute the confidence intervals form the GP.
        Parameters
        ----------
        context: ndarray
            Array that contains the context used to compute the sets
        """
        x = np.atleast_2d(x)

        if self.config.calculate_gradients:
            mean, var = self.gp.predict_noiseless(x)
        else:
            mean, var = self._raw_predict(x)

        return mean + self._bias, var

    def mean_var_grad(self, x):
        return self.gp.predictive_gradients(x)

    def var(self, x):
        return self.mean_var(x)[1]

    # TODO: is this a bug?
    def predictive_var(self, X, X_cond, S_X, var_Xcond=None):
        X = np.atleast_2d(X)
        X_cond = np.atleast_2d(X_cond)
        var_X, KXX = self._raw_predict_covar(X, X_cond)

        if var_Xcond is None:
            var_Xcond = self.var(X_cond)

        return var_Xcond - KXX * KXX / (S_X * S_X + var_X)

    def mean(self, x):
        return self.mean_var(x)[0]

    def set_data(self, X, Y, append=True):
        if append:
            X = np.concatenate((self.gp.X, X))
            Y = np.concatenate((self.gp.Y, Y))

        # Do our optimization now
        if self.i % 3 == 0:
            import time
            start_time = time.time()

            W_hat, sn, l, s, d = self.optimizer.find_active_subspace(X, Y)

            print("--- %s seconds ---" % (time.time() - start_time))

            # Overwrite GP and kernel values
            # TODO: W_hat not used ----
            self.set_new_gp_and_kernel(d=d,
                                       W=W_hat,
                                       variance=s,
                                       lengthscale=l,
                                       noise_var=sn)

        self.gp.set_XY(X, Y)
        self.t = X.shape[0]
        self._update_cache()

    # TODO: merge all the following code with the current function!
    #         print("Looking for optimal subspace!")
    #         W_hat, sn, l, s, d = self.optimizer.find_active_subspace(X=X, Y=Y)
    #
    #         print("Found optimal subspace")
    #
    #         # Set the newly found hyperparameters everywhere
    #         # Not found by pycharm bcs the kernel is an abstract object as of now
    #         # self.kernel.update_params(W=W_hat, s=s, l=l)
    #         # self.gp.kern.update_params(W=W_hat, s=s, l=l)
    #
    #         # Create a new GP (bcs this is spaghetti code!)
    #         self.set_new_kernel_and_gp(
    #             d=d,
    #             variance=s,
    #             lengthscale=l,
    #             noise_var=sn
    #         )

    def sample(self, X=None):
        class GPSampler:
            def __init__(self, X, Y, kernel, var):
                self.X = X
                self.Y = Y
                self.N = var * np.ones(shape=Y.shape)
                self.kernel = kernel
                self.m = GPy.models.GPHeteroscedasticRegression(
                    self.X, self.Y, self.kernel)
                self.m['.*het_Gauss.variance'] = self.N

            def __call__(self, X):
                X = np.atleast_2d(X)
                sample = np.empty(shape=(X.shape[0], 1))

                # iteratively generate sample values for all x in x_test
                for i, x in enumerate(X):
                    sample[i] = self.m.posterior_samples_f(x.reshape((1, -1)),
                                                           size=1)

                    # add observation as without noise
                    self.X = np.vstack((self.X, x))
                    self.Y = np.vstack((self.Y, sample[i]))
                    self.N = np.vstack((self.N, 0))

                    # recalculate model
                    self.m = GPy.models.GPHeteroscedasticRegression(
                        self.X, self.Y, self.kernel)
                    self.m[
                        '.*het_Gauss.variance'] = self.N  # Set the noise parameters to the error in Y

                return sample

        return GPSampler(self.gp.X.copy(), self.gp.Y.copy(), self.kernel,
                         self.gp.likelihood.variance)

    def _raw_predict(self, Xnew):

        Kx = self.kernel.K(self._X, Xnew)
        mu = np.dot(Kx.T, self._woodbury_vector)

        if len(mu.shape) == 1:
            mu = mu.reshape(-1, 1)

        Kxx = self.kernel.Kdiag(Xnew)
        tmp = lapack.dtrtrs(self._woodbury_chol,
                            Kx,
                            lower=1,
                            trans=0,
                            unitdiag=0)[0]
        var = (Kxx - np.square(tmp).sum(0))[:, None]
        return mu, var

    def _raw_predict_covar(self, Xnew, Xcond):
        Kx = self.kernel.K(self._X, np.vstack((Xnew, Xcond)))
        tmp = lapack.dtrtrs(self._woodbury_chol,
                            Kx,
                            lower=1,
                            trans=0,
                            unitdiag=0)[0]

        n = Xnew.shape[0]
        tmp1 = tmp[:, :n]
        tmp2 = tmp[:, n:]

        Kxx = self.kernel.K(Xnew, Xcond)
        var = Kxx - (tmp1.T).dot(tmp2)

        Kxx_new = self.kernel.Kdiag(Xnew)
        var_Xnew = (Kxx_new - np.square(tmp1).sum(0))[:, None]
        return var_Xnew, var

    def _norm(self):
        norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(
            self._woodbury_vector)
        return np.asscalar(np.sqrt(norm))

    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict[
            'gp']  # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky
        return self_dict
    def __init__(self, domain, calculate_always=False):
        super(TripathyGP, self).__init__(domain)

        print("Starting tripathy model!")
        self.gp = None

        # Just for completeness
        # self.active_d = None
        # self.W_hat = None
        # self.variance = None
        # self.lengthscale = None
        # self.noise_var = None

        # DEFAULT SETTINGS
        self.W_hat = np.eye(self.domain.d)
        # print(self.config.kernels[0][1])
        # self.lengthscale = 2.5  # self.config.kernels[0][1]['lengthscale']  # TODO: how to get it from config!!!
        # self.variance = 1.  # self.config.kernels[0][1]['variance']
        # self.noise_var = 0.005
        self.active_d = self.domain.d

        # PARABOLA
        # self.W_hat = np.asarray([[0.49969147, 0.1939272]]) # np.random.rand(self.d, 1).T
        # self.noise_var = 0.005
        # self.lengthscale = 6
        # self.variance = 2.5
        # self.active_d = 1

        # SINUSOIDAL
        # self.W_hat = np.asarray([
        #     [-0.41108301, 0.22853536, -0.51593653, -0.07373475, 0.71214818],
        #     [ 0.00412458, -0.95147725, -0.28612815, -0.06316891, 0.093885]
        # ])
        # self.noise_var = 0.005
        # self.lengthscale = 1.3
        # self.variance = 0.15
        # self.active_d = 2

        # CAMELBACK
        # self.W_hat = np.asarray([
        #     [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912],
        #     [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551]
        # ])
        self.noise_var = 0.005
        self.lengthscale = 2.5
        self.variance = 1.0
        # self.active_d = 2

        self.create_new_gp_and_kernel(active_d=self.active_d,
                                      variance=self.variance,
                                      lengthscale=self.lengthscale,
                                      noise_var=self.noise_var)

        # JOHANNES: Damit wir später andere Matrizen zur  Projektion nutzen können,
        # speichere ich die Daten irgendwoch ab. Ich benutze die GP datenstruktur um
        # diese Daten abzuspeichern, einfach weil das einfacher ist

        # Create the datasaver GP
        placeholder_kernel = RBF(input_dim=self.domain.d)
        self.datasaver_gp = GPRegression(input_dim=self.domain.d,
                                         kernel=placeholder_kernel,
                                         noise_var=self.noise_var,
                                         calculate_gradients=False)

        # JOHANNES: Die folgenden Operationen habe ich übernommen aus dem febo GP

        # number of data points
        self.t = 0
        self.i = 0
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol
        )  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._beta = 2
        self._bias = self.config.bias
        self.calculate_always = calculate_always

        self.optimizer = TripathyOptimizer()
class BoringGP(ConfidenceBoundModel):
    """
    Base class for GP optimization.
    Handles common functionality.

    """
    def create_kernels(self,
                       active_dimensions,
                       passive_dimensions,
                       first=False,
                       k_variance=None,
                       k_lengthscales=None):

        # Use tripathy kernel here instead, because it includes W in it's stuff

        active_kernel = Matern32(
            input_dim=active_dimensions,
            variance=1. if k_variance is None else k_variance,
            lengthscale=1.5
            if k_lengthscales is None else k_lengthscales,  # 0.5,
            ARD=True,
            active_dims=np.arange(active_dimensions),
            name="active_subspace_kernel")

        self.kernel = active_kernel

        if first:  # TODO: need to change this back!

            # Now adding the additional kernels:
            for i in range(passive_dimensions):
                cur_kernel = RBF(
                    input_dim=1,
                    variance=2.,
                    lengthscale=0.5,  # 0.5,
                    ARD=False,
                    active_dims=[active_dimensions + i],
                    name="passive_subspace_kernel_dim_" + str(i))

                self.kernel += cur_kernel
        print("Got kernel: ")
        print(self.kernel)

    def create_gp(self):

        self.gp = GPRegression(input_dim=self.domain.d,
                               kernel=self.kernel,
                               noise_var=0.01,
                               calculate_gradients=False)

        # Let the GP take over datapoints from the datasaver!
        X = self.datasaver_gp.X
        Y = self.datasaver_gp.Y
        # Apply the Q transform if it was spawned already!
        if self.Q is not None:
            X = np.dot(X, self.Q)
        if self.Q is not None:
            assert X.shape[1] >= 2, ("Somehow, Q was not projected!", X.shape,
                                     2)  # TODO: change this back to ==!
        self.gp.set_XY(X, Y)
        self._update_cache()

    def create_gp_and_kernels(self,
                              active_dimensions,
                              passive_dimensions,
                              first=False,
                              k_variance=None,
                              k_lengthscales=None):
        self.create_kernels(active_dimensions,
                            passive_dimensions,
                            first=first,
                            k_variance=k_variance,
                            k_lengthscales=k_lengthscales)
        self.create_gp()

    # From here on, it's the usual functions
    def __init__(self, domain, always_calculate=False):
        super(BoringGP, self).__init__(domain)

        # passive projection matrix still needs to be created first!
        # print("WARNING: CONFIG MODE IS: ", config.DEV)
        self.burn_in_samples = 101  # 101 # 102
        self.recalculate_projection_every = 101
        self.active_projection_matrix = None
        self.passive_projection_matrix = None
        self.Q = None

        # some other parameters that are cached
        self.t = 0

        # Setting the datasaver (infrastructure which allows us to save the data to be projected again and again)
        placeholder_kernel = RBF(input_dim=self.domain.d)
        self.datasaver_gp = GPRegression(input_dim=self.domain.d,
                                         kernel=placeholder_kernel,
                                         noise_var=0.01,
                                         calculate_gradients=False)

        # Create a new kernel and create a new GP
        self.create_gp_and_kernels(self.domain.d, 0,
                                   first=True)  # self.domain.d - 2

        # Some post-processing
        self.kernel = self.kernel.copy()
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol
        )  # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()
        self._Y = np.empty(shape=(0, 1))
        self._bias = self.config.bias
        self.always_calculate = always_calculate

    @property
    def beta(self):
        return np.sqrt(np.log(self.datasaver_gp.X.shape[0]))

    @property
    def scale(self):
        if self.gp.kern.name == 'sum':
            return sum([part.variance for part in self.gp.kern.parts])
        else:
            return np.sqrt(self.gp.kern.variance)

    @property
    def bias(self):
        return self._bias

    def _get_gp(self):
        return self.gp  # GPRegression(self.domain.d, self.kernel, noise_var=self.config.noise_var, calculate_gradients=self.config.calculate_gradients)

    def add_data(self, x, y):
        """
        Add a new function observation to the GPs.
        Parameters
        ----------
        x: 2d-array
        y: 2d-array
        """
        self.i = 1 if not ("i" in dir(self)) else self.i + 1
        # print("Add data ", self.i)
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)

        self.set_data(x, y, append=True)

    # TODO: check if this is called anyhow!
    def optimize(self):
        self._update_beta()

    def _update_cache(self):
        # if not self.config.calculate_gradients:
        self._woodbury_chol = np.asfortranarray(
            self.gp.posterior._woodbury_chol)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()
        self._X = self.gp.X.copy()

        self._update_beta()

    def _optimize_bias(self):
        self._bias = minimize(self._bias_loss, self._bias,
                              method='L-BFGS-B')['x'].copy()
        self._set_bias(self._bias)
        logger.info(f"Updated bias to {self._bias}")

    def _bias_loss(self, c):
        # calculate mean and norm for new bias via a new woodbury_vector
        new_woodbury_vector, _ = dpotrs(self._woodbury_chol,
                                        self._Y - c,
                                        lower=1)
        K = self.gp.kern.K(self.gp.X)
        mean = np.dot(K, new_woodbury_vector)
        norm = new_woodbury_vector.T.dot(mean)
        # loss is least_squares_error + norm
        return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm)

    def _set_bias(self, c):
        self._bias = c
        self.gp.set_Y(self._Y - c)
        self._woodbury_vector = self.gp.posterior._woodbury_vector.copy()

    def _update_beta(self):
        logdet = self._get_logdet()
        logdet_priornoise = self._get_logdet_prior_noise()
        self._beta = np.sqrt(2 * np.log(1 / self.delta) +
                             (logdet - logdet_priornoise)) + self._norm()

    def _optimize_var(self):
        # fix all parameters
        for p in self.gp.parameters:
            p.fix()

        if self.gp.kern.name == 'sum':
            for part in self.gp.kern.parts:
                part.variance.unfix()
        else:
            self.gp.kern.variance.unfix()
        self.gp.optimize()
        if self.gp.kern.name == 'sum':
            values = []
            for part in self.gp.kern.parts:
                values.append(np.asscalar(part.variance.values))
        else:
            values = np.asscalar(self.gp.kern.variance.values)

        logger.info(f"Updated prior variance to {values}")
        # unfix all parameters
        for p in self.gp.parameters:
            p.unfix()

    def _get_logdet(self):
        return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol)))

    def _get_logdet_prior_noise(self):
        return self.t * np.log(self.gp.likelihood.variance.values)

    def mean_var(self, x):
        """Recompute the confidence intervals form the GP.
        Parameters
        ----------
        context: ndarray
            Array that contains the context used to compute the sets
        """
        x = np.atleast_2d(x)

        assert not np.isnan(x).all(), ("X is nan at some point!", x)

        if self.config.calculate_gradients or True:
            # In the other case, projection is done in a subfunction
            if self.Q is not None:
                x = np.dot(x, self.Q)
            mean, var = self.gp.predict_noiseless(x)
        else:
            mean, var = self._raw_predict(x)

        return mean + self._bias, var

    def mean_var_grad(self, x):
        # TODO: should this be here aswell?
        # TODO: check, that this is not actually used for new AND saved gp's!
        if self.Q is not None:
            x = np.dot(x, self.Q)
        return self.gp.predictive_gradients(x)

    def var(self, x):
        return self.mean_var(x)[1]

    def mean(self, x):
        return self.mean_var(x)[0]

    def set_data(self, X, Y, append=True):

        # First of all, save everything in the saver GP
        if append:
            X = np.concatenate((self.datasaver_gp.X, X), axis=0)
            Y = np.concatenate((self.datasaver_gp.Y, Y),
                               axis=0)  # Should be axis=0
        self.datasaver_gp.set_XY(X, Y)

        # Now, save everything in the other GP but with a projected X value
        #
        # TODO: This is pretty wrong!
        X = self.datasaver_gp.X
        Y = self.datasaver_gp.Y

        # Do our optimization now
        if self.burn_in_samples == self.i or self.always_calculate:  # (self.i >= self.burn_in_samples and self.i % self.recalculate_projection_every == 1) or
            import time
            start_time = time.time()
            # print("Adding data: ", self.i)

            optimizer = TripathyOptimizer()

            # TODO: the following part is commented out, so we can test, if the function works well if we give it the real matrix!
            self.active_projection_matrix, sn, l, s, d = optimizer.find_active_subspace(
                X, Y)
            #
            # print("BORING sampled the following active matrix: ")
            # print(self.active_projection_matrix)
            #
            passive_dimensions = max(self.domain.d - d, 0)
            passive_dimensions = min(passive_dimensions, 2)
            # passive_dimensions = 1 # TODO: take out this part!
            # # passive_dimensions = 0
            #
            # Generate A^{bot} if there's more dimensions
            if passive_dimensions > 0:
                self.passive_projection_matrix = generate_orthogonal_matrix_to_A(
                    self.active_projection_matrix, passive_dimensions)
            else:
                self.passive_projection_matrix = None
            #
            # print("BORING sampled the following passive matrix: ")
            # print(self.passive_projection_matrix)

            # d = 2
            # self.active_projection_matrix = np.asarray([
            #     [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912],
            #     [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551]
            # ]).T
            # s = 1.
            # l = 1.5
            # passive_dimensions = 0

            # Create Q by concatenateing the active and passive projections
            if passive_dimensions > 0:
                self.Q = np.concatenate((self.active_projection_matrix,
                                         self.passive_projection_matrix),
                                        axis=1)
            else:
                self.Q = self.active_projection_matrix

            assert not np.isnan(
                self.Q).all(), ("The projection matrix contains nan's!",
                                self.Q)

            # print("BORING sampled the following matrix: ")
            # print(self.Q)

            assert d == self.active_projection_matrix.shape[1]

            self.create_gp_and_kernels(active_dimensions=d,
                                       passive_dimensions=passive_dimensions,
                                       first=True,
                                       k_variance=s,
                                       k_lengthscales=l)

            # print("Projection matrix is: ", self.Q.shape)
            # print("Dimensions found are: ", d)
            # print("Active projection matrix is ", self.active_projection_matrix.shape)
            # print("How many datapoints do we have in the kernel?", self.gp.X.shape)
            # print("How many datapoints do we have in the kernel?", self.datasaver_gp.X.shape)

            print("--- %s seconds ---" % (time.time() - start_time))

        # if self.i > self.burn_in_samples:
        #     assert self.Q is not None, "After the burning in, self.Q is still None!"

        # Add the points to the newly shapes GP!
        if (self.i < self.burn_in_samples
                or self.Q is None) and (not self.always_calculate):
            # print("Still using the old method!")
            self.gp.set_XY(X, Y)
        else:
            # print("We use the dot product thingy from now on!")
            Z = np.dot(X, self.Q)
            # print("Old shape: ", X.shape)
            # print("New shape: ", Z.shape)
            self.gp.set_XY(Z, Y)

        # print("Added data: ", self.i)
        # print("Datasave has shape: ", self.datasaver_gp.X.shape)
        # print("Another shape: ", self.gp.X.shape)

        self.t = X.shape[0]
        self._update_cache()

    def _raw_predict(self, Xnew):
        m, n = Xnew.shape

        # Need to project Xnew here?
        # if self.Q is not None:
        #     Xnew = np.dot(Xnew, self.Q)
        #     assert Xnew.shape[1] == self.Q.reshape(self.Q.shape[0], -1).shape[1], ("Shapes are wrong: ", Xnew.shape, self.Q.shape)
        # else:
        #     assert Xnew.shape[1] == self.domain.d, ("Shapes are wrong when we have no Q!", Xnew.shape, self.domain.d)

        if not hasattr(self.kernel, 'parts'):  # TODO: take this out?
            mu, var = self._raw_predict_given_kernel(Xnew, self.kernel)
            # print("Using the cool values! ")
        else:
            mu = np.zeros((Xnew.shape[0], 1))
            var = np.zeros((Xnew.shape[0], 1))
            for kernel in self.kernel.parts:
                cur_mu, cur_var = self._raw_predict_given_kernel(Xnew, kernel)
                assert not np.isnan(cur_mu).all(), (
                    "nan encountered for mean!", cur_mu)
                assert not np.isnan(cur_var).all(), (
                    "nan encountered for var!", cur_var)
                mu += cur_mu
                var += cur_var

        assert not np.isnan(mu).all(), ("nan encountered for mean!", mu)
        assert not np.isnan(var).all(), ("nan encountered for mean!", var)

        assert mu.shape == (m, 1), ("Shape of mean is different! ", mu.shape,
                                    (m, 1))
        assert var.shape == (m, 1), ("Shape of variance is different! ",
                                     var.shape, (m, 1))

        return mu, var

    def _raw_predict_given_kernel(self, Xnew, kernel):
        Kx = kernel.K(self._X, Xnew)
        mu = np.dot(Kx.T, self._woodbury_vector)

        if len(mu.shape) == 1:
            mu = mu.reshape(-1, 1)

        Kxx = kernel.Kdiag(Xnew)
        tmp = lapack.dtrtrs(self._woodbury_chol,
                            Kx,
                            lower=1,
                            trans=0,
                            unitdiag=0)[0]
        var = (Kxx - np.square(tmp).sum(0))[:, None]
        return mu, var

    def _norm(self):
        norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(
            self._woodbury_vector)
        return np.asscalar(np.sqrt(norm))

    def __getstate__(self):
        self_dict = self.__dict__.copy()
        del self_dict[
            'gp']  # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky
        return self_dict