def backsub_both_sides(L, X, transpose='left'): """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" if transpose == 'left': tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1) return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T else: tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0) return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T
def _qK_graminv(self): """ Inverse kernel mean multiplied with inverse kernel Gram matrix, all evaluated at training locations. .. math:: \int k(x, X)\mathrm{d}x [k(X, X) + \sigma^2 I]^{-1} :return: weights of shape (1, n_train_points) """ lower_chol = self.model.base_gp.gram_chol() qK = self.model.base_gp.kern.qK(self.model.base_gp.X) graminv_qK_trans = lapack.dtrtrs(lower_chol.T, (lapack.dtrtrs(lower_chol, qK.T, lower=1)[0]), lower=0)[0] return np.transpose(graminv_qK_trans)
def _graminv_Kx(self, x): """ Inverse kernel Gram matrix multiplied with kernel function k(x, x') evaluated at existing training datapoints and location x. .. math:: [K(X, X) + \sigma^2 I]^{-1} K (X, x) :param x: (n_points x input_dim) locations where to evaluate :return: (n_train_points, n_points) """ lower_chol = self.model.base_gp.gram_chol() KXx = self.model.base_gp.kern.K(self.model.base_gp.X, x) return lapack.dtrtrs(lower_chol.T, (lapack.dtrtrs(lower_chol, KXx, lower=1)[0]), lower=0)[0]
def _qK_graminv(self): """ Inverse kernel mean multiplied with inverse kernel Gram matrix, all evaluated at training locations. .. math:: \int k(x, X)\mathrm{d}x [k(X, X) + \sigma^2 I]^{-1} :return: weights of shape (1, n_train_points) """ lower_chol = self.model.base_gp.gram_chol() qK = self.model.base_gp.kern.qK(self.model.base_gp.X) graminv_qK_trans = lapack.dtrtrs( lower_chol.T, (lapack.dtrtrs(lower_chol, qK.T, lower=1)[0]), lower=0)[0] return np.transpose(graminv_qK_trans)
def LAPACK_solve_ls_with_QR(A, b): # Ref: https://stackoverflow.com/questions/21970510/solving-a-linear-system-with-lapacks-dgeqrf # The corresponding procedure in LAPACK is https://www.netlib.org/lapack/lug/node40.html qr, tau, work, info = dgeqrf(A) cq, work, info = dormqr('L', 'T', qr, tau, b, qr.shape[0]) x_qr, info = dtrtrs(qr, cq) return x_qr[0:A.shape[1]]
def _compute_integral_mean_and_variance(self): integral_mean, kernel_mean_X = self._compute_integral_mean_and_kernel_mean( ) integral_var = self.base_gp.kern.qKq() - np.square( lapack.dtrtrs(self.base_gp.gram_chol(), kernel_mean_X.T, lower=1)[0]).sum(axis=0, keepdims=True).T return np.float(integral_mean), np.float(integral_var)
def residual_variable_projection(matrix: np.ndarray, data: np.ndarray) \ -> typing.Tuple[typing.List[str], np.ndarray]: """Calculates the conditionaly linear parameters and residual with the variable projection method. Parameters ---------- matrix : The model matrix. data : np.ndarray The data to analyze. """ # TODO: Reference Kaufman paper # Kaufman Q2 step 3 qr, tau, _, _ = lapack.dgeqrf(matrix) # Kaufman Q2 step 4 temp, _, _ = lapack.dormqr("L", "T", qr, tau, data, max(1, matrix.shape[1]), overwrite_c=0) clp, _ = lapack.dtrtrs(qr, temp) for i in range(matrix.shape[1]): temp[i] = 0 # Kaufman Q2 step 5 residual, _, _ = lapack.dormqr("L", "N", qr, tau, temp, max(1, matrix.shape[1]), overwrite_c=0) return clp[:matrix.shape[1]], residual
def _solve_triangular(L, b, lower=True): ''' Solve the triangular system of equations `Lx = b` using `dtrtrs`. Parameters ---------- L : (n, n) float array b : (n, *) float array Returns ------- (n, *) float array ''' if any(i == 0 for i in b.shape): return np.zeros(b.shape) x, info = dtrtrs(L, b, lower=lower) if info < 0: raise ValueError('The %s-th argument had an illegal value' % (-info)) elif info > 0: raise np.linalg.LinAlgError( 'The %s-th diagonal element of A is zero, indicating that the matrix is ' 'singular and the solutions X have not been computed.' % info) return x
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0): """ Wrapper for lapack dtrtrs function :param A: Matrix A :param B: Matrix B :param lower: is matrix lower (true) or upper (false) :returns: """ return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
def solve_triangular(A, B, trans=False): """ Solve the system Ax=B where A is lower triangular. If `trans` is True then solve the system A'x=B. """ X, info = lapack.dtrtrs(A, B, lower=1, trans=int(trans)) if info != 0: raise LinAlgError('Matrix is singular') return X
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0): """Wrapper for lapack dtrtrs function :param A: Matrix A :param B: Matrix B :param lower: is matrix lower (true) or upper (false) :returns: """ return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
def integrate(self) -> Tuple[float, float]: """ Computes an estimator of the integral as well as its variance. :returns: estimator of integral and its variance """ integral_mean, kernel_mean_X = self._compute_integral_mean_and_kernel_mean() integral_var = self.base_gp.kern.qKq() - np.square(lapack.dtrtrs(self.base_gp.gram_chol(), kernel_mean_X.T, lower=1)[0]).sum(axis=0, keepdims=True)[0][0] return integral_mean, integral_var
def integrate(self) -> Tuple[float, float]: """ Computes an estimator of the integral as well as its variance. :returns: estimator of integral and its variance """ kernel_mean_X = self.base_gp.kern.qK(self.X) integral_mean = np.dot(kernel_mean_X, self.base_gp.graminv_residual())[0, 0] integral_var = self.base_gp.kern.qKq() - np.square(lapack.dtrtrs(self.base_gp.gram_chol(), kernel_mean_X.T, lower=1)[0]).sum(axis=0, keepdims=True)[0][0] return integral_mean, integral_var
def integrate(self) -> Tuple[float, float]: """ Computes an estimator of the integral as well as its variance. :returns: estimator of integral and its variance """ integral_mean, kernel_mean_X = self._compute_integral_mean_and_kernel_mean( ) integral_var = self.base_gp.kern.qKq() - np.square( lapack.dtrtrs(self.base_gp.gram_chol(), kernel_mean_X.T, lower=1)[0]).sum(axis=0, keepdims=True)[0][0] return integral_mean, integral_var
def _raw_predict(self, Xnew): Kx = self.kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape)==1: mu = mu.reshape(-1,1) Kxx = self.kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:,None] return mu, var
def _solve_triangular(L, b, lower=True): ''' Solves `Lx = b` for a triangular `L` using `dtrtrs` ''' if any(i == 0 for i in b.shape): return np.zeros(b.shape, dtype=float) x, info = dtrtrs(L, b, lower=lower) if info < 0: raise ValueError('The %s-th argument had an illegal value' % -info) elif info > 0: raise np.linalg.LinAlgError('Singular matrix') return x
def get_prediction_gradients(self, X: np.ndarray) -> Tuple: """ Computes and returns model gradients of mean and variance at given points :param X: points to compute gradients at, shape (num_points, dim) :returns: Tuple of gradients of mean and variance, shapes of both (num_points, dim) """ # gradient of mean d_mean_dx = (self.base_gp.kern.dK_dx1(X, self.X) @ self.base_gp.graminv_residual())[:, :, 0].T # gradient of variance dKdiag_dx = self.base_gp.kern.dKdiag_dx(X) dKxX_dx1 = self.base_gp.kern.dK_dx1(X, self.X) lower_chol = self.base_gp.gram_chol() KXx = self.base_gp.kern.K(self.base_gp.X, X) graminv_KXx = lapack.dtrtrs( lower_chol.T, (lapack.dtrtrs(lower_chol, KXx, lower=1)[0]), lower=0)[0] d_var_dx = dKdiag_dx - 2. * (dKxX_dx1 * np.transpose(graminv_KXx)).sum( axis=2, keepdims=False) return d_mean_dx, d_var_dx.T
def _raw_predict_covar(self, Xnew, Xcond): Kx = self.kernel.K(self._X, np.vstack((Xnew,Xcond))) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] n = Xnew.shape[0] tmp1 = tmp[:,:n] tmp2 = tmp[:,n:] Kxx = self.kernel.K(Xnew, Xcond) var = Kxx - (tmp1.T).dot(tmp2) Kxx_new = self.kernel.Kdiag(Xnew) var_Xnew = (Kxx_new - np.square(tmp1).sum(0))[:,None] return var_Xnew, var
def _raw_predict(self, Xnew): assert Xnew.shape[1] == self.active_d, ("Somehow, the input was not project") Kx = self.kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape) == 1: mu = mu.reshape(-1, 1) Kxx = self.kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:, None] return mu, var
def residual_variable_projection( matrix: np.ndarray, data: np.ndarray) -> typing.Tuple[typing.List[str], np.ndarray]: """Calculates the conditionally linear parameters and residual with the variable projection method. Parameters ---------- matrix : The model matrix. data : np.ndarray The data to analyze. """ # TODO: Reference Kaufman paper # Kaufman Q2 step 3 qr, tau, _, _ = lapack.dgeqrf(matrix) # Kaufman Q2 step 4 temp, _, _ = lapack.dormqr("L", "T", qr, tau, data, max(1, matrix.shape[1]), overwrite_c=0) clp, _ = lapack.dtrtrs(qr, temp) for i in range(matrix.shape[1]): temp[i] = 0 # Kaufman Q2 step 5 residual, _, _ = lapack.dormqr("L", "N", qr, tau, temp, max(1, matrix.shape[1]), overwrite_c=0) return clp[:matrix.shape[1]], residual
def dtrtrs(A, B, lower=1, trans=0, unitdiag=0): """ Wrapper for lapack dtrtrs function DTRTRS solves a triangular system of the form A * X = B or A**T * X = B, where A is a triangular matrix of order N, and B is an N-by-NRHS matrix. A check is made to verify that A is nonsingular. :param A: Matrix A(triangular) :param B: Matrix B :param lower: is matrix lower (true) or upper (false) :returns: Solution to A * X = B or A**T * X = B """ A = np.asfortranarray(A) #Note: B does not seem to need to be F ordered! return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
def solve_linear(self, z: np.ndarray) -> np.ndarray: lower_chol = self.gpy_model.posterior.woodbury_chol return lapack.dtrtrs(lower_chol.T, (lapack.dtrtrs(lower_chol, z, lower=1)[0]), lower=0)[0]
def _lbf(cov_term, stats, ldet_prior): ltri = np.linalg.cholesky(cov_term) ldet_term = np.log(np.prod(np.diagonal(ltri))) * 2 temp = np.asfortranarray(stats[:, np.newaxis]) lp.dtrtrs(ltri, temp, lower=1, overwrite_b=1) return (-ldet_term - ldet_prior * stats.size + np.sum(temp ** 2)) / 2
def test_draw_samples(self, dtype, X, X_isSamples, X_cond, X_cond_isSamples, Y_cond, Y_cond_isSamples, rbf_lengthscale, rbf_lengthscale_isSamples, rbf_variance, rbf_variance_isSamples, rv_shape, num_samples): from scipy.linalg.lapack import dtrtrs X_mx = prepare_mxnet_array(X, X_isSamples, dtype) X_cond_mx = prepare_mxnet_array(X_cond, X_cond_isSamples, dtype) Y_cond_mx = prepare_mxnet_array(Y_cond, Y_cond_isSamples, dtype) rbf_lengthscale_mx = prepare_mxnet_array(rbf_lengthscale, rbf_lengthscale_isSamples, dtype) rbf_variance_mx = prepare_mxnet_array(rbf_variance, rbf_variance_isSamples, dtype) rand = np.random.randn(num_samples, *rv_shape) rand_gen = MockMXNetRandomGenerator( mx.nd.array(rand.flatten(), dtype=dtype)) rbf = RBF(2, True, 1., 1., 'rbf', None, dtype) X_var = Variable(shape=(5, 2)) X_cond_var = Variable(shape=(8, 2)) Y_cond_var = Variable(shape=(8, 1)) gp = ConditionalGaussianProcess.define_variable( X=X_var, X_cond=X_cond_var, Y_cond=Y_cond_var, kernel=rbf, shape=rv_shape, dtype=dtype, rand_gen=rand_gen).factor variables = { gp.X.uuid: X_mx, gp.X_cond.uuid: X_cond_mx, gp.Y_cond.uuid: Y_cond_mx, gp.rbf_lengthscale.uuid: rbf_lengthscale_mx, gp.rbf_variance.uuid: rbf_variance_mx } samples_rt = gp.draw_samples(F=mx.nd, variables=variables, num_samples=num_samples).asnumpy() samples_np = [] for i in range(num_samples): X_i = X[i] if X_isSamples else X X_cond_i = X_cond[i] if X_cond_isSamples else X_cond Y_cond_i = Y_cond[i] if Y_cond_isSamples else Y_cond lengthscale_i = rbf_lengthscale[ i] if rbf_lengthscale_isSamples else rbf_lengthscale variance_i = rbf_variance[ i] if rbf_variance_isSamples else rbf_variance rand_i = rand[i] rbf_np = GPy.kern.RBF(input_dim=2, ARD=True) rbf_np.lengthscale = lengthscale_i rbf_np.variance = variance_i K_np = rbf_np.K(X_i) Kc_np = rbf_np.K(X_cond_i, X_i) Kcc_np = rbf_np.K(X_cond_i) L = np.linalg.cholesky(Kcc_np) LInvY = dtrtrs(L, Y_cond_i, lower=1, trans=0)[0] LinvKxt = dtrtrs(L, Kc_np, lower=1, trans=0)[0] mu = LinvKxt.T.dot(LInvY) cov = K_np - LinvKxt.T.dot(LinvKxt) L_cov_np = np.linalg.cholesky(cov) sample_np = mu + L_cov_np.dot(rand_i) samples_np.append(sample_np) samples_np = np.array(samples_np) assert np.issubdtype(samples_rt.dtype, dtype) assert get_num_samples(mx.nd, samples_rt) == num_samples assert np.allclose(samples_np, samples_rt)
def test_draw_samples_w_mean(self, dtype, X, X_isSamples, X_cond, X_cond_isSamples, Y_cond, Y_cond_isSamples, rbf_lengthscale, rbf_lengthscale_isSamples, rbf_variance, rbf_variance_isSamples, rv_shape, num_samples): net = nn.HybridSequential(prefix='nn_') with net.name_scope(): net.add( nn.Dense(rv_shape[-1], flatten=False, activation="tanh", in_units=X.shape[-1], dtype=dtype)) net.initialize(mx.init.Xavier(magnitude=3)) from scipy.linalg.lapack import dtrtrs X_mx = prepare_mxnet_array(X, X_isSamples, dtype) X_cond_mx = prepare_mxnet_array(X_cond, X_cond_isSamples, dtype) Y_cond_mx = prepare_mxnet_array(Y_cond, Y_cond_isSamples, dtype) rbf_lengthscale_mx = prepare_mxnet_array(rbf_lengthscale, rbf_lengthscale_isSamples, dtype) rbf_variance_mx = prepare_mxnet_array(rbf_variance, rbf_variance_isSamples, dtype) mean_mx = net(X_mx) mean_np = mean_mx.asnumpy() mean_cond_mx = net(X_cond_mx) mean_cond_np = mean_cond_mx.asnumpy() rand = np.random.randn(num_samples, *rv_shape) rand_gen = MockMXNetRandomGenerator( mx.nd.array(rand.flatten(), dtype=dtype)) rbf = RBF(2, True, 1., 1., 'rbf', None, dtype) X_var = Variable(shape=(5, 2)) X_cond_var = Variable(shape=(8, 2)) Y_cond_var = Variable(shape=(8, 1)) mean_func = MXFusionGluonFunction(net, num_outputs=1, broadcastable=True) mean_var = mean_func(X_var) mean_cond_var = mean_func(X_cond_var) gp = ConditionalGaussianProcess.define_variable( X=X_var, X_cond=X_cond_var, Y_cond=Y_cond_var, mean=mean_var, mean_cond=mean_cond_var, kernel=rbf, shape=rv_shape, dtype=dtype, rand_gen=rand_gen).factor variables = { gp.X.uuid: X_mx, gp.X_cond.uuid: X_cond_mx, gp.Y_cond.uuid: Y_cond_mx, gp.rbf_lengthscale.uuid: rbf_lengthscale_mx, gp.rbf_variance.uuid: rbf_variance_mx, gp.mean.uuid: mean_mx, gp.mean_cond.uuid: mean_cond_mx } samples_rt = gp.draw_samples(F=mx.nd, variables=variables, num_samples=num_samples).asnumpy() samples_np = [] for i in range(num_samples): X_i = X[i] if X_isSamples else X X_cond_i = X_cond[i] if X_cond_isSamples else X_cond Y_cond_i = Y_cond[i] if Y_cond_isSamples else Y_cond Y_cond_i = Y_cond_i - mean_cond_np[ i] if X_cond_isSamples else Y_cond_i - mean_cond_np[0] lengthscale_i = rbf_lengthscale[ i] if rbf_lengthscale_isSamples else rbf_lengthscale variance_i = rbf_variance[ i] if rbf_variance_isSamples else rbf_variance rand_i = rand[i] rbf_np = GPy.kern.RBF(input_dim=2, ARD=True) rbf_np.lengthscale = lengthscale_i rbf_np.variance = variance_i K_np = rbf_np.K(X_i) Kc_np = rbf_np.K(X_cond_i, X_i) Kcc_np = rbf_np.K(X_cond_i) L = np.linalg.cholesky(Kcc_np) LInvY = dtrtrs(L, Y_cond_i, lower=1, trans=0)[0] LinvKxt = dtrtrs(L, Kc_np, lower=1, trans=0)[0] mu = LinvKxt.T.dot(LInvY) cov = K_np - LinvKxt.T.dot(LinvKxt) L_cov_np = np.linalg.cholesky(cov) sample_np = mu + L_cov_np.dot(rand_i) samples_np.append(sample_np) samples_np = np.array(samples_np) + mean_np assert np.issubdtype(samples_rt.dtype, dtype) assert get_num_samples(mx.nd, samples_rt) == num_samples assert np.allclose(samples_np, samples_rt)
def dtrtrs(A, B, lower=1, trans=0, unitdiag=0): A = np.asfortranarray(A) #Note: B does not seem to need to be F ordered! return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
def test_log_pdf(self, dtype, X, X_isSamples, X_cond, X_cond_isSamples, Y_cond, Y_cond_isSamples, rbf_lengthscale, rbf_lengthscale_isSamples, rbf_variance, rbf_variance_isSamples, rv, rv_isSamples, num_samples): from scipy.linalg.lapack import dtrtrs X_mx = prepare_mxnet_array(X, X_isSamples, dtype) X_cond_mx = prepare_mxnet_array(X_cond, X_cond_isSamples, dtype) Y_cond_mx = prepare_mxnet_array(Y_cond, Y_cond_isSamples, dtype) rbf_lengthscale_mx = prepare_mxnet_array(rbf_lengthscale, rbf_lengthscale_isSamples, dtype) rbf_variance_mx = prepare_mxnet_array(rbf_variance, rbf_variance_isSamples, dtype) rv_mx = prepare_mxnet_array(rv, rv_isSamples, dtype) rv_shape = rv.shape[1:] if rv_isSamples else rv.shape rbf = RBF(2, True, 1., 1., 'rbf', None, dtype) X_var = Variable(shape=(5, 2)) X_cond_var = Variable(shape=(8, 2)) Y_cond_var = Variable(shape=(8, 1)) gp = ConditionalGaussianProcess.define_variable(X=X_var, X_cond=X_cond_var, Y_cond=Y_cond_var, kernel=rbf, shape=rv_shape, dtype=dtype).factor variables = { gp.X.uuid: X_mx, gp.X_cond.uuid: X_cond_mx, gp.Y_cond.uuid: Y_cond_mx, gp.rbf_lengthscale.uuid: rbf_lengthscale_mx, gp.rbf_variance.uuid: rbf_variance_mx, gp.random_variable.uuid: rv_mx } log_pdf_rt = gp.log_pdf(F=mx.nd, variables=variables).asnumpy() log_pdf_np = [] for i in range(num_samples): X_i = X[i] if X_isSamples else X X_cond_i = X_cond[i] if X_cond_isSamples else X_cond Y_cond_i = Y_cond[i] if Y_cond_isSamples else Y_cond lengthscale_i = rbf_lengthscale[ i] if rbf_lengthscale_isSamples else rbf_lengthscale variance_i = rbf_variance[ i] if rbf_variance_isSamples else rbf_variance rv_i = rv[i] if rv_isSamples else rv rbf_np = GPy.kern.RBF(input_dim=2, ARD=True) rbf_np.lengthscale = lengthscale_i rbf_np.variance = variance_i K_np = rbf_np.K(X_i) Kc_np = rbf_np.K(X_cond_i, X_i) Kcc_np = rbf_np.K(X_cond_i) L = np.linalg.cholesky(Kcc_np) LInvY = dtrtrs(L, Y_cond_i, lower=1, trans=0)[0] LinvKxt = dtrtrs(L, Kc_np, lower=1, trans=0)[0] mu = LinvKxt.T.dot(LInvY) cov = K_np - LinvKxt.T.dot(LinvKxt) log_pdf_np.append( multivariate_normal.logpdf(rv_i[:, 0], mean=mu[:, 0], cov=cov)) log_pdf_np = np.array(log_pdf_np) isSamples_any = any([ X_isSamples, rbf_lengthscale_isSamples, rbf_variance_isSamples, rv_isSamples ]) assert np.issubdtype(log_pdf_rt.dtype, dtype) assert array_has_samples(mx.nd, log_pdf_rt) == isSamples_any if isSamples_any: assert get_num_samples(mx.nd, log_pdf_rt) == num_samples assert np.allclose(log_pdf_np, log_pdf_rt)