Beispiel #1
0
class ChangepointRBF(Kern):
    def __init__(self, input_dim, 
                 variance1=1., variance2=1., lengthscale1=1., lengthscale2=1., xc=1, 
                 active_dims=None):
        super(ChangepointRBF, self).__init__(input_dim, active_dims, 'chngpt')
        assert input_dim == 1, "For this kernel we assume input_dim = 1"
        self.variance1 = Param('variance1', variance1)
        self.variance2 = Param('variance2', variance2)
        self.lengthscale1 = Param('lengthscale1', lengthscale1)
        self.lengthscale2 = Param('lengthscale2', lengthscale2)
        self.rbf = RBF(input_dim=input_dim, lengthscale=1., variance=1.)
        self.xc = Param('xc', xc)
        self.add_parameters(self.variance1, self.variance2, self.lengthscale1, self.lengthscale2, self.xc)

    def parameters_changed(self):
        pass

    def K(self, X, X2):
        """Covariance matrix"""
        u1 = self.u(X)
        a1 = self.a(X)
        if X2 is None:
            u2 = u1
            a2 = a1
        else:
            u2 = self.u(X2)
            a2 = self.a(X2)
        return a1 * a2 * self.rbf.K(X=u1, X2=u2)

    def Kdiag(self, X):
        """Diagonal of covariance matrix"""
        u = self.u(X)
        a = self.a(X)
        return a * self.rbf.Kdiag(u)

    def u(self, X: np.ndarray):
        """u operation in the paper"""
        u = np.empty(X.shape)
        for i in X.shape[0]:
            if X[i] < self.xc: u[i] = X[i] / self.variance1
            else: u[i] = self.xc/self.variance1 + (X[i] - self.xc)/self.variance2
        return u

    def a(self, X: np.ndarray):
        """a operation in the paper"""
        a = np.empty(X.shape)
        for i in X.shape[0]:
            if X[i] < self.xc: a[i] = self.lengthscale1
            else: a[i] = self.lengthscale2
        return a
Beispiel #2
0
class GPQuad(BayesianQuadratureTransform):
    def __init__(self, dim, unit_sp=None, hypers=None):
        super(GPQuad, self).__init__(dim, unit_sp, hypers)
        # GPy RBF kernel with given hypers
        self.kern = RBF(self.d,
                        variance=self.hypers['sig_var'],
                        lengthscale=self.hypers['lengthscale'],
                        ARD=True)

    def weights_rbf(self, unit_sp, hypers):
        # BQ weights for RBF kernel with given hypers, computations adopted from the GP-ADF code [Deisenroth] with
        # the following assumptions:
        #   (A1) the uncertain input is zero-mean with unit covariance
        #   (A2) one set of hyper-parameters is used for all output dimensions (one GP models all outputs)
        d, n = unit_sp.shape
        # GP kernel hyper-parameters
        alpha, el, jitter = hypers['sig_var'], hypers['lengthscale'], hypers[
            'noise_var']
        assert len(el) == d
        # pre-allocation for convenience
        eye_d, eye_n = np.eye(d), np.eye(n)
        iLam1 = np.atleast_2d(np.diag(el**-1))  # sqrt(Lambda^-1)
        iLam2 = np.atleast_2d(np.diag(el**-2))

        inp = unit_sp.T.dot(
            iLam1
        )  # sigmas / el[:, na] (x - m)^T*sqrt(Lambda^-1) # (numSP, xdim)
        K = np.exp(2 * np.log(alpha) - 0.5 * maha(inp, inp))
        iK = cho_solve(cho_factor(K + jitter * eye_n), eye_n)
        B = iLam2 + eye_d  # (D, D)
        c = alpha**2 / np.sqrt(det(B))
        t = inp.dot(inv(B))  # inn*(P + Lambda)^-1
        l = np.exp(-0.5 * np.sum(inp * t, 1))  # (N, 1)
        zet = 2 * np.log(alpha) - 0.5 * np.sum(inp * inp, 1)
        inp = inp.dot(iLam1)
        R = 2 * iLam2 + eye_d
        t = 1 / np.sqrt(det(R))
        L = np.exp((zet[:, na] + zet[:, na].T) +
                   maha(inp, -inp, V=0.5 * inv(R)))
        q = c * l  # evaluations of the kernel mean map (from the viewpoint of RHKS methods)
        # mean weights
        wm_f = q.dot(iK)
        iKQ = iK.dot(t * L)
        # covariance weights
        wc_f = iKQ.dot(iK)
        # cross-covariance "weights"
        wc_fx = np.diag(q).dot(iK)
        # used for self.D.dot(x - mean).dot(wc_fx).dot(fx)
        self.D = inv(eye_d +
                     np.diag(el**2))  # S(S+Lam)^-1; for S=I, (I+Lam)^-1
        # model variance; to be added to the covariance
        # this diagonal form assumes independent GP outputs (cov(f^a, f^b) = 0 for all a, b: a neq b)
        self.model_var = np.diag((alpha**2 - np.trace(iKQ)) * np.ones((d, 1)))
        return wm_f, wc_f, wc_fx

    def plot_gp_model(self,
                      f,
                      unit_sp,
                      args,
                      test_range=(-5, 5, 50),
                      plot_dims=(0, 0)):
        # plot out_dim vs. in_dim
        in_dim, out_dim = plot_dims
        # test input must have the same dimension as specified in kernel
        test = np.linspace(*test_range)
        test_pts = np.zeros((self.d, len(test)))
        test_pts[in_dim, :] = test
        # function value observations at training points (unit sigma-points)
        y = np.apply_along_axis(f, 0, unit_sp, args)
        fx = np.apply_along_axis(f, 0, test_pts,
                                 args)  # function values at test points
        K = self.kern.K(unit_sp.T)  # covariances between sigma-points
        k = self.kern.K(
            test_pts.T,
            unit_sp.T)  # covariance between test inputs and sigma-points
        kxx = self.kern.Kdiag(test_pts.T)  # prior predictive variance
        k_iK = cho_solve(cho_factor(K), k.T).T
        gp_mean = k_iK.dot(y[out_dim, :])  # GP mean
        gp_var = np.diag(np.diag(kxx) -
                         k_iK.dot(k.T))  # GP predictive variance
        # plot the GP mean, predictive variance and the true function
        plt.figure()
        plt.plot(test, fx[out_dim, :], color='r', ls='--', lw=2, label='true')
        plt.plot(test, gp_mean, color='b', ls='-', lw=2, label='GP mean')
        plt.fill_between(test,
                         gp_mean + 2 * np.sqrt(gp_var),
                         gp_mean - 2 * np.sqrt(gp_var),
                         color='b',
                         alpha=0.25,
                         label='GP variance')
        plt.plot(unit_sp[in_dim, :],
                 y[out_dim, :],
                 color='k',
                 ls='',
                 marker='o',
                 ms=8,
                 label='data')
        plt.legend()
        plt.show()

    def _weights(self, sigma_points, hypers):
        return self.weights_rbf(sigma_points, hypers)

    def _fcn_eval(self, fcn, x, fcn_pars):
        return np.apply_along_axis(fcn, 0, x, fcn_pars)

    def _mean(self, weights, fcn_evals):
        return fcn_evals.dot(weights)

    def _covariance(self, weights, fcn_evals, mean_out):
        return fcn_evals.dot(weights).dot(fcn_evals.T) - np.outer(
            mean_out, mean_out.T) + self.model_var

    def _cross_covariance(self, weights, fcn_evals, x, mean_out, mean_in):
        return fcn_evals.dot(weights.T).dot((x - mean_in).T).dot(self.D)

    def _int_var_rbf(self, X, hyp, jitter=1e-8):
        """
        Posterior integral variance of the Gaussian Process quadrature.
        X - vector (1, 2*xdim**2+xdim)
        hyp - kernel hyperparameters [s2, el_1, ... el_d]
        """
        # reshape X to SP matrix
        X = np.reshape(X, (self.n, self.d))
        # set kernel hyper-parameters
        s2, el = hyp[0], hyp[1:]
        self.kern.param_array[0] = s2  # variance
        self.kern.param_array[1:] = el  # lengthscale
        K = self.kern.K(X)
        L = np.diag(el**2)
        # posterior variance of the integral
        ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal(
            mean=np.zeros(self.d), cov=L).pdf(X)
        postvar = -ks.dot(solve(K + jitter * np.eye(self.n), ks.T))
        return postvar

    def _int_var_rbf_hyp(self, hyp, X, jitter=1e-8):
        """
        Posterior integral variance as a function of hyper-parameters
        :param hyp: RBF kernel hyper-parameters [s2, el_1, ..., el_d]
        :param X: sigma-points
        :param jitter: numerical jitter (for stabilizing computations)
        :return: posterior integral variance
        """
        # reshape X to SP matrix
        X = np.reshape(X, (self.n, self.d))
        # set kernel hyper-parameters
        s2, el = 1, hyp  # sig_var hyper always set to 1
        self.kern.param_array[0] = s2  # variance
        self.kern.param_array[1:] = el  # lengthscale
        K = self.kern.K(X)
        L = np.diag(el**2)
        # posterior variance of the integral
        ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal(
            mean=np.zeros(self.d), cov=L).pdf(X)
        postvar = s2 * np.sqrt(det(2 * inv(L) + np.eye(self.d)))**-1 - ks.dot(
            solve(K + jitter * np.eye(self.n), ks.T))
        return postvar

    def _min_var_sigmas(self):
        # solver options
        op = {'disp': True}
        # bounds based on input unit Gaussian (-2*std, +2std)
        bnds = tuple((-2, 2) for i in range(self.n * self.d))
        hyp = np.hstack((self.hypers['sig_var'], self.hypers['lengthscale']))
        # unconstrained
        #        res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op)
        #        res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op)
        res = minimize(self._int_var_rbf,
                       self.unit_sp,
                       args=hyp,
                       method='L-BFGS-B',
                       bounds=bnds,
                       options=op)
        return res.x

    def _min_var_hypers(self):
        """
        Finds kernel hyper-parameters minimizing the posterior integral variance.
        :return: optimized kernel hyper-parameters
        """
        # solver options
        op = {'disp': True}
        hyp = self.hypers[
            'lengthscale']  # np.hstack((self.hypers['sig_var'], self.hypers['lengthscale']))
        # bounds based on input unit Gaussian (-2*std, +2std)
        bnds = tuple((1e-3, 1000) for i in range(len(hyp)))
        # unconstrained
        #        res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op)
        #        res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op)
        res = minimize(self._int_var_rbf_hyp,
                       hyp,
                       args=self.unit_sp,
                       method='L-BFGS-B',
                       bounds=bnds,
                       options=op)
        return res.x

    def _min_logmarglik_hypers(self):
        # finds hypers by maximizing the marginal likelihood (empirical bayes)
        # the multiple output dimensions should be reflected in the log marglik
        pass

    def _min_intvar_logmarglik_hypers(self):
        # finds hypers by minimizing the sum of log-marginal likelihood and the integral variance objectives
        pass