def test_blocks_to_banded(T=5, D=3): """ Test blocks_to_banded correctness """ Ad = np.zeros((T, D, D)) Aod = np.zeros((T-1, D, D)) M = np.arange(1, D+1)[:, None] * 10 + np.arange(1, D+1) for t in range(T): Ad[t, :, :] = 100 * ((t+1)*10 + (t+1)) + M for t in range(T-1): Aod[t, :, :] = 100 * ((t+2)*10 + (t+1)) + M # print("Lower") # L = blocks_to_bands(Ad, Aod, lower=True) # print(L) # print("Upper") # U = blocks_to_bands(Ad, Aod, lower=False) # print(U) # Check inverse with random symmetric matrices Ad = npr.randn(T, D, D) Ad = (Ad + np.swapaxes(Ad, -1, -2)) / 2 Aod = npr.randn(T-1, D, D) Ad2, Aod2 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=True), lower=True) assert np.allclose(np.tril(Ad), np.tril(Ad2)) assert np.allclose(Aod, Aod2) Ad3, Aod3 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=False), lower=False) assert np.allclose(np.triu(Ad), np.triu(Ad3)) assert np.allclose(Aod, Aod3)
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1, 2)) else: raise ArithmeticError
def make_symm(X): ''' Ensures that a matric is symmetric by setting the over-diagonal coefficients as the transposed under-diagonal coefficients. In our case, it keeps matrices robustly symmetric to rounding errors. X (2d-array): A matrix ---------------------------------------------------------------------- returns (2d-array): The "symmetrized" matrix ''' return np.tril(X, k=-1) + np.tril(X).T
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError
def logZ(params): A, L_Q_full = params L_Q = L_Q_full * np.tril(np.ones_like(L_Q_full)) Q = np.dot(L_Q, L_Q.T) return lds_logZ(Y, A, C, Q, R, mu0, Q0)
def invPsd(A, AChol=None, returnChol=False): # https://github.com/mattjj/pybasicbayes/blob/9c00244b2d6fd767549de6ab5d0436cec4e06818/pybasicbayes/util/general.py L = np.linalg.cholesky(A) if AChol is None else AChol Ainv = lapack.dpotri(L, lower=True)[0] Ainv += np.tril(Ainv, k=-1).T if returnChol: return Ainv, L return Ainv
def _is_upper_triangular(A): # This function could possibly be of wider interest. if isspmatrix(A): lower_part = scipy.sparse.tril(A, -1) # Check structural upper triangularity, # then coincidental upper triangularity if needed. return lower_part.nnz == 0 or lower_part.count_nonzero() == 0 else: return not np.tril(A, -1).any()
def unpack_params(params): """Unpacks parameter vector into the proportions, means and covariances of each mixture component. The covariance matrices are parametrized by their Cholesky decompositions.""" log_proportions = parser.get(params, "log proportions") normalized_log_proportions = log_proportions - logsumexp(log_proportions) means = parser.get(params, "means") lower_tris = np.tril(parser.get(params, "lower triangles"), k=-1) diag_chols = np.exp(parser.get(params, "log diagonals")) chols = lower_tris + np.make_diagonal(diag_chols, axis1=-1, axis2=-2) return normalized_log_proportions, means, chols
def KL_two_gaussians(params): d = np.shape(params)[0]-1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
def unpack_params(params): """Unpacks parameter vector into the proportions, means and covariances of each mixture component. The covariance matrices are parametrized by their Cholesky decompositions.""" log_proportions = parser.get(params, 'log proportions') normalized_log_proportions = log_proportions - logsumexp(log_proportions) means = parser.get(params, 'means') lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1) diag_chols = np.exp( parser.get(params, 'log diagonals')) chols = lower_tris + np.make_diagonal(diag_chols, axis1=-1, axis2=-2) return normalized_log_proportions, means, chols
def plot_covariances(Ca, Cb, ax, only_sds=False, corr_coefs=False): """ scatter plot comparison for two covariance mats """ if corr_coefs: sda = np.sqrt(np.diag(Ca)) sdb = np.sqrt(np.diag(Ca)) Ca = Ca / np.dot(sda[:, None], sda[None, :]) Cb = Cb / np.dot(sdb[:, None], sdb[None, :]) if only_sds: avals = np.sqrt(np.diag(Ca)) bvals = np.sqrt(np.diag(Cb)) else: ac = np.tril(Ca, -1) avals = ac[ac != 0].flatten() bc = np.tril(Cb, -1) bvals = bc[bc != 0].flatten() lim = [ np.min([avals.min(), bvals.min()]), np.max([avals.max(), bvals.max()]) ] ax.scatter(avals, bvals) ax.plot(lim, lim, c='grey', alpha=.25)
def unpack_params(params): """Unpacks parameter vector into the proportions, means and covariances of each mixture component. The covariance matrices are parametrized by their Cholesky decompositions.""" log_proportions = parser.get(params, 'log proportions') normalized_log_proportions = log_proportions - logsumexp(log_proportions) means = parser.get(params, 'means') lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1) diag_chols = np.exp( parser.get(params, 'log diagonals')) chols = [] for lower_tri, diag in zip(lower_tris, diag_chols): chols.append(np.expand_dims(lower_tri + np.diag(diag), 0)) chols = np.concatenate(chols, axis=0) return normalized_log_proportions, means, chols
def KL_two_gaussians(params): d = np.shape(params)[0] - 1 mu = params[0:d, 0] toSigma = params[0:d, 1:d + 1] intSigma = toSigma - np.diag(np.diag(toSigma)) + np.diag( np.exp(np.diag(toSigma))) Sigma = intSigma - np.tril(intSigma) + np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1 / 2 * (np.log(np.linalg.det(Sigma) / np.linalg.det(sigmaPrior)) - d + np.trace(np.dot(np.linalg.inv(Sigma), sigmaPrior)) + np.dot(np.transpose(mu - muPrior), np.dot(np.linalg.inv(Sigma), mu - muPrior)))
def unpack_params(params): """Unpacks parameter vector into the proportions, means and covariances of each mixture component. The covariance matrices are parametrized by their Cholesky decompositions.""" log_proportions = parser.get(params, 'log proportions') normalized_log_proportions = log_proportions - logsumexp( log_proportions) means = parser.get(params, 'means') lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1) diag_chols = np.exp(parser.get(params, 'log diagonals')) chols = [] for lower_tri, diag in zip(lower_tris, diag_chols): chols.append(np.expand_dims(lower_tri + np.diag(diag), 0)) chols = np.concatenate(chols, axis=0) return normalized_log_proportions, means, chols
def get_laplaces_init_params(log_p, z_len, num_epochs, ε=1e-4): """ A function to generate Laplaces approximation Args: log_p ([function]): [An autograd differentiable function] z_len ([int]): # of latent dimensions num_epochs ([int]): # of iterations Returns: [tuple]: [MAP estimate and the cholesky factor of \ inverse of negative Hessian at MAP estimate] """ z_0 = npr.rand(z_len) # using minimize to maximize val_and_grad = autograd.value_and_grad(lambda z: -log_p(z)) rez = scipy.optimize.minimize(val_and_grad, z_0, method='BFGS', jac=True, options={ 'maxiter': num_epochs, 'disp': True }) mu = rez.x H = Hessian_finite_differences(z=mu, grad_f=autograd.grad(lambda z: log_p(z)), ε=ε) try: neg_H_inv = np.linalg.inv(-H) L = np.linalg.cholesky(neg_H_inv) # -H_inv = inv(-H) # modification to adjust for pos_tril function L = inv_pos_tril(L) except Exception as e: print('Using noisy unit covariance...') L = np.tril(0.1 * npr.randn(z_len, z_len), -1) + np.eye(z_len) # modification to adjust for pos_tril function L = inv_pos_tril(L) return mu, L
def expectation(params,y,X,eps,N,u): #for each sample of theta, calculate likelihood #likelihood has participants #for each participant, we have N particles #with L samples, n participants, N particles per participant and sample, we have #L*n*N particles #get the first column to be mu d = np.shape(X)[-1]+1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) print mu print Sigma n = X.shape[0] E = 0 #iterate over number of samples of theta for j in range(np.shape(eps)[0]): beta = mu+np.dot(Sigma,eps[j,:]) #this log likelihood will iterate over both the participants and the particles E+=log_likelihood(beta,y,X,u[j*(n*N):(j+1)*(n*N)]) return E/len(beta)
def expectation(params, y, X, eps, N, u): #for each sample of theta, calculate likelihood #likelihood has participants #for each participant, we have N particles #with L samples, n participants, N particles per participant and sample, we have #L*n*N particles #get the first column to be mu d = np.shape(X)[-1] + 1 mu = params[0:d, 0] toSigma = params[0:d, 1:d + 1] intSigma = toSigma - np.diag(np.diag(toSigma)) + np.diag( np.exp(np.diag(toSigma))) Sigma = intSigma - np.tril(intSigma) + np.transpose(np.triu(intSigma)) print mu print Sigma n = X.shape[0] E = 0 #iterate over number of samples of theta for j in range(np.shape(eps)[0]): beta = mu + np.dot(Sigma, eps[j, :]) #this log likelihood will iterate over both the participants and the particles E += log_likelihood(beta, y, X, u[j * (n * N):(j + 1) * (n * N)]) return E / len(beta)
def param_func(param, matrix): param = (anp.tril(param) if matrix.factor.lower else anp.triu(param)) return param @ param.T
def fun(x): return to_scalar(np.tril(x, k=2))
def fun(x): return np.tril(x, k=2)
def symmetrize(A): L = np.tril(A) return (L + L.T) / 2.
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
def symmetrize(A): L = np.tril(A) return (L + T(L)) / 2.
def solve_triangular_grad(g): v = solve_triangular(a, g, trans=_flip(a, trans), lower=lower) return -transpose(tri(anp.matmul(anp.reshape(v, ans.shape), T(ans)))) return solve_triangular_grad solve_triangular.defgrad(make_grad_solve_triangular) solve_triangular.defgrad(lambda ans, a, b, trans=0, lower=False, **kwargs: lambda g: solve_triangular(a, g, trans=_flip(a, trans), lower=lower), argnum=1) ### cholesky solve_trans = lambda L, X: solve_triangular(L, X, lower=True, trans='T') solve_conj = lambda L, X: solve_trans(L, T(solve_trans(L, T(X)))) phi = lambda X: anp.tril(X) / (1. + anp.eye(X.shape[-1])) cholesky = primitive(np.linalg.cholesky) cholesky.defgrad(lambda L, A: lambda g: symm(solve_conj(L, phi(anp.matmul(T(L), g))))) ### operations on cholesky factors solve_tri = partial(solve_triangular, lower=True) solve_posdef_from_cholesky = lambda L, x: solve_tri(L, solve_tri(L, x), trans='T') @primitive def inv_posdef_from_cholesky(L, lower=True): flat_L = np.reshape(L, (-1,) + L.shape[-2:]) return np.reshape(cyla.inv_posdef_from_cholesky(C(flat_L), lower), L.shape)
def _symmetrize(a): # assumes 0-diags bott = np.tril(a) + np.tril(a).T top = np.triu(a) + np.triu(a).T # return (bott+top)/2. + infs return np.fmax(bott, top)
def symmetrize(A): L = np.tril(A) return (L + T(L))/2.
def fun(x): return to_scalar(np.tril(x, k = 2)) d_fun = lambda x : to_scalar(grad(fun)(x))
def symmetrize(A): L = np.tril(A) return (L + L.T)/2.
def num_sqrt_cov(num_weights): n = np.tril(np.ones((num_weights,))) return n.sum().astype(int)
compute_entropy_grad = grad(compute_entropy) # settings #step_size= 1e-5/N #itt_max = 30000 num_samples = 1 num_samples_swa = 1 num_params = K means1 = np.ones((num_params,)) means2 = np.zeros((num_params,)) means = means_all[n*num_params:(n+1)*num_params] betas = betas_all[num_params*(num_params+1)*n//2:num_params*(num_params+1)*(n+1)//2] tmpL = np.tril(np.ones((K, K))) L = choleskies._flat_to_triang_pure(betas)[0,:] Sigma = L @ L.T #means = means_list[n] #sigmas = sigmas_list[n] params = [means.copy(), betas.copy()] means_vb_clr, betas_vb_clr = means.copy(), betas.copy() means_vb_swa, betas_vb_swa = means.copy(), betas.copy() means_vb_rms, betas_vb_rms = means.copy(), betas.copy() L_vb_clr = L.copy() L_vb_swa = L.copy() L_vb_rms = L.copy() params_constant_lr = [means_vb_clr, betas_vb_clr]
def copyltu(x): return anp.tril(x) + anp.transpose(anp.tril(x, -1))
anp.dsplit.defjvp(lambda g, ans, gvs, vs, ary, idxs: anp.dsplit(g, idxs)) anp.ravel.defjvp( lambda g, ans, gvs, vs, x, order=None: anp.ravel(g, order=order)) anp.expand_dims.defjvp( lambda g, ans, gvs, vs, x, axis: anp.expand_dims(g, axis)) anp.squeeze.defjvp(lambda g, ans, gvs, vs, x, axis=None: anp.squeeze(g, axis)) anp.diag.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.diag(g, k)) anp.flipud.defjvp(lambda g, ans, gvs, vs, x, : anp.flipud(g)) anp.fliplr.defjvp(lambda g, ans, gvs, vs, x, : anp.fliplr(g)) anp.rot90.defjvp(lambda g, ans, gvs, vs, x, k=1: anp.rot90(g, k)) anp.trace.defjvp(lambda g, ans, gvs, vs, x, offset=0: anp.trace(g, offset)) anp.full.defjvp(lambda g, ans, gvs, vs, shape, fill_value, dtype=None: anp. full(shape, g, dtype), argnum=1) anp.triu.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.triu(g, k=k)) anp.tril.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.tril(g, k=k)) anp.clip.defjvp(lambda g, ans, gvs, vs, x, a_min, a_max: g * anp.logical_and( ans != a_min, ans != a_max)) anp.swapaxes.defjvp( lambda g, ans, gvs, vs, x, axis1, axis2: anp.swapaxes(g, axis1, axis2)) anp.rollaxis.defjvp( lambda g, ans, gvs, vs, a, axis, start=0: anp.rollaxis(g, axis, start)) anp.real_if_close.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex(vs, g)) anp.real.defjvp(lambda g, ans, gvs, vs, x: anp.real(g)) anp.imag.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex(vs, -1j * g)) anp.conj.defjvp(lambda g, ans, gvs, vs, x: anp.conj(g)) anp.angle.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex( vs, g * anp.conj(x * 1j) / anp.abs(x)**2)) anp.where.defjvp(lambda g, ans, gvs, vs, c, x=None, y=None: anp.where( c, g, anp.zeros(anp.shape(g))),
def inv_pos_tril(x): assert x.ndim == 2 return np.tril(x, -1) + np.diag(inv_pos_diag(np.diag(x)))
def fun(x): return np.tril(x, k = 2) mat = npr.randn(5, 5)
def check_symmetric_matrix_grads(fun, *args): symmetrize = lambda A: symm(np.tril(A)) new_fun = lambda *args: fun(symmetrize(args[0]), *args[1:]) return check_grads(new_fun, *args)
def full_chol(flat):return np.tril(flat) def num_sqrt_cov(num_weights):