def test_rts_backward_step(): npr.seed(0) n = 3 Jns = rand_psd(n) hns = npr.randn(n) mun = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = rand_psd(n) + 10*np.eye(n) hf = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = -1./2*bigJ[:n,:n], -bigJ[:n,n:], -1./2*bigJ[n:,n:] next_smooth = -1./2*Jns, hns, mun next_pred = -1./2*Jnp, hnp filtered = -1./2*Jf, hf pair_param = J11, J12, J22, 0. Js1, hs1, (mu1, ExxT1, ExxnT1) = natural_rts_backward_step( next_smooth, next_pred, filtered, pair_param) Js2, hs2, (mu2, ExxT2, ExnxT2) = rts_backward_step( next_smooth, next_pred, filtered, pair_param) assert np.allclose(Js1, Js2) assert np.allclose(hs1, hs2) assert np.allclose(mu1, mu2) assert np.allclose(ExxT1, ExxT2) assert np.allclose(ExxnT1, ExnxT2)
def add_data(self, S, F=None): """ Add a data set to the list of observations. First, filter the data with the impulse response basis, then instantiate a set of parents for this data set. :param S: a TxK matrix of of event counts for each time bin and each process. """ assert isinstance(S, np.ndarray) and S.ndim == 2 and S.shape[1] == self.K \ and np.amin(S) >= 0 and S.dtype == np.int, \ "Data must be a TxK array of event counts" T = S.shape[0] if F is None: # Filter the data into a TxKxB array Ftens = self.basis.convolve_with_basis(S) # Flatten this into a T x (KxB) matrix # [F00, F01, F02, F10, F11, ... F(K-1)0, F(K-1)(B-1)] F = Ftens.reshape((T, self.K * self.B)) assert np.allclose(F[:,0], Ftens[:,0,0]) if self.B > 1: assert np.allclose(F[:,1], Ftens[:,0,1]) if self.K > 1: assert np.allclose(F[:,self.B], Ftens[:,1,0]) # Prepend a column of ones F = np.hstack((np.ones((T,1)), F)) for k,node in enumerate(self.nodes): node.add_data(F, S[:,k])
def test_make_diagonal(): def fun(D): return to_scalar(np.make_diagonal(D, axis1=-1, axis2=-2)) D = np.random.randn(4) A = np.make_diagonal(D, axis1=-1, axis2=-2) assert np.allclose(np.diag(A), D) check_grads(fun, D) D = np.random.randn(3, 4) A = np.make_diagonal(D, axis1=-1, axis2=-2) assert all([np.allclose(np.diag(A[i]), D[i]) for i in range(3)]) check_grads(fun, D)
def test_checkpoint_correctness(): bar = lambda x, y: 2*x + y + 5 checkpointed_bar = checkpoint(bar) foo = lambda x: bar(x, x/3.) + bar(x, x**2) foo2 = lambda x: checkpointed_bar(x, x/3.) + checkpointed_bar(x, x**2) assert np.allclose(foo(3.), foo2(3.)) assert np.allclose(grad(foo)(3.), grad(foo2)(3.)) baz = lambda *args: sum(args) checkpointed_baz = checkpoint(baz) foobaz = lambda x: baz(x, x/3.) foobaz2 = lambda x: checkpointed_baz(x, x/3.) assert np.allclose(foobaz(3.), foobaz2(3.)) assert np.allclose(grad(foobaz)(3.), grad(foobaz2)(3.))
def test_getter(): def fun(input_tuple): A = np.sum(input_tuple[0]) B = np.sum(input_tuple[1]) C = np.sum(input_tuple[1]) return A + B + C d_fun = grad(fun) input_tuple = (npr.randn(5, 6), npr.randn(4, 3), npr.randn(2, 4)) result = d_fun(input_tuple) assert np.allclose(result[0], np.ones((5, 6))) assert np.allclose(result[1], 2 * np.ones((4, 3))) assert np.allclose(result[2], np.zeros((2, 4)))
def test_logit(): """ simple test to ensure logistic(logit(x)) = x """ p = rand_logistic_norm(10, 4) x = logit(p) pt = logistic(x) assert np.allclose(p, pt), "Test logit fails!" print "test_logit passes!"
def test_getter(): def fun(input_dict): A = np.sum(input_dict['item_1']) B = np.sum(input_dict['item_2']) C = np.sum(input_dict['item_2']) return A + B + C d_fun = grad(fun) input_dict = {'item_1' : npr.randn(5, 6), 'item_2' : npr.randn(4, 3), 'item_X' : npr.randn(2, 4)} result = d_fun(input_dict) assert np.allclose(result['item_1'], np.ones((5, 6))) assert np.allclose(result['item_2'], 2 * np.ones((4, 3))) assert np.allclose(result['item_X'], np.zeros((2, 4)))
def _set_startprob(self, startprob): if startprob is None: startprob = np.tile(1.0 / self.n_components, self.n_components) else: startprob = np.asarray(startprob, dtype=np.float) if not np.alltrue(startprob <= 1.0): normalize(startprob) if len(startprob) != self.n_components: if len(startprob) == self.n_unique: startprob_split = np.copy(startprob) / (1.0+self.n_tied) startprob = np.zeros(self.n_components) for u in range(self.n_unique): for t in range(self.n_chain): startprob[u*(self.n_chain)+t] = \ startprob_split[u].copy() else: raise ValueError("cannot match shape of startprob") if not np.allclose(np.sum(startprob), 1.0): raise ValueError('startprob must sum to 1.0') self._log_startprob = np.log(np.asarray(startprob).copy())
def test_fast_conv_grad(): skip = 1 block_size = (11, 11) depth = 1 img = np.random.randn(51, 51, depth) filt = np.dstack([cv.gauss_filt_2D(shape=block_size,sigma=2) for k in range(depth)]) filt = cv.gauss_filt_2D(shape=block_size, sigma=2) def loss_fun(filt): out = fc.convolve(filt, img) return np.sum(np.sin(out) + out**2) loss_fun(filt) loss_grad = grad(loss_fun) def loss_fun_slow(filt): out = auto_convolve(img.squeeze(), filt, mode='valid') return np.sum(np.sin(out) + out**2) loss_fun_slow(filt) loss_grad_slow = grad(loss_fun_slow) # compare gradient timing loss_grad_slow(filt) loss_grad(filt) ## check numerical gradients num_grad = np.zeros(filt.shape) for i in xrange(filt.shape[0]): for j in xrange(filt.shape[1]): de = np.zeros(filt.shape) de[i, j] = 1e-4 num_grad[i,j] = (loss_fun(filt + de) - loss_fun(filt - de)) / (2*de[i,j]) assert np.allclose(loss_grad(filt), num_grad), "convolution gradient failed!"
def test_rts_1(): npr.seed(0) n = 3 # inputs Jns = rand_psd(n) + 10*np.eye(n) hns = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = rand_psd(n) hf = npr.randn(n) # constants bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] L = np.linalg.cholesky(Jns - Jnp + J22) # outgrads g_Js = npr.randn(n,n) g_hs = npr.randn(n) def step1(L, hns, hnp, Jf, hf): temp = solve_triangular(L, J12.T) Js = Jf + J11 - np.dot(temp.T, temp) hs = hf - np.dot(temp.T, solve_triangular(L, hns - hnp)) return Js, hs # ans Js, hs = step1(L, hns, hnp, Jf, hf) def fun(args): Js, hs = step1(*args) return np.sum(g_Js * Js) + np.sum(g_hs * hs) g_L1, g_hns1, g_hnp1, g_Jf1, g_hf1 = grad(fun)((L, hns, hnp, Jf, hf)) g_L2, g_hns2, g_hnp2, g_Jf2, g_hf2 = rts_1_grad( g_Js, g_hs, Js, hs, L, hns, hnp, Jf, hf, J11, J12) assert np.allclose(g_hns1, g_hns2) assert np.allclose(g_hnp1, g_hnp2) assert np.allclose(g_Jf1, g_Jf2) assert np.allclose(g_hf1, g_hf2) assert np.allclose(g_L1, g_L2)
def test_rts_3(): npr.seed(0) n = 3 # inputs L = np.linalg.cholesky(rand_psd(n)) Sigma = rand_psd(n) mu = npr.randn(n) mun = npr.randn(n) # constants J12 = rand_psd(2*n)[:n,n:] # outgrads g_ExnxT = npr.randn(n,n) g_ExxT = npr.randn(n,n) g_Ex = npr.randn(n) def step3(L, Sigma, mu, mun): temp2 = np.dot(-J12.T, Sigma) Sigma_21 = solve_posdef_from_cholesky(L, temp2) ExnxT = Sigma_21 + np.outer(mun, mu) ExxT = Sigma + np.outer(mu, mu) return mu, ExxT, ExnxT # ans Ex, ExxT, ExnxT = step3(L, Sigma, mu, mun) # compare grads def fun(args): Ex, ExxT, ExnxT = step3(*args) return np.sum(ExnxT * g_ExnxT) + np.sum(ExxT * g_ExxT) + np.sum(Ex * g_Ex) g_L1, g_Sigma1, g_mu1, g_mun1 = grad(fun)((L, Sigma, mu, mun)) g_L2, g_Sigma2, g_mu2, g_mun2 = rts_3_grad( g_Ex, g_ExxT, g_ExnxT, Ex, ExxT, ExnxT, L, Sigma, mu, mun, J12) assert np.allclose(g_L1, g_L2) assert np.allclose(g_Sigma1, g_Sigma2) assert np.allclose(g_mu1, g_mu2) assert np.allclose(g_mun1, g_mun2)
def test_natural_predict(): npr.seed(0) n = 3 J = rand_psd(n) h = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] logZ = npr.randn() J, J11, J12, J22 = -1./2*J, -1./2*J11, -J12, -1./2*J22 (J_pred_1, h_pred_1), lognorm1 = _natural_predict(J, h, J11, J12, J22, logZ) (J_pred_2, h_pred_2), lognorm2 = __natural_predict(J, h, J11, J12, J22, logZ) assert np.allclose(J_pred_1, J_pred_2) assert np.allclose(h_pred_1, h_pred_2) assert np.isclose(lognorm1, lognorm2)
def test_getter(): def fun(input_list): A = np.sum(input_list[0]) B = np.sum(input_list[1]) C = np.sum(input_list[1]) return A + B + C d_fun = grad(fun) input_list = [npr.randn(5, 6), npr.randn(4, 3), npr.randn(2, 4)] result = d_fun(input_list) print result assert np.allclose(result[0], np.ones((5, 6))) assert np.allclose(result[1], 2 * np.ones((4, 3))) assert np.allclose(result[2], np.zeros((2, 4)))
def check_vjp(fun, arg): vs_in = vspace(arg) vs_out = vspace(fun(arg)) autograd_jac = linear_fun_to_matrix( flatten_fun(make_vjp(fun)(arg)[0], vs_out), vs_out).T numerical_jac = linear_fun_to_matrix( numerical_deriv(flatten_fun(fun, vs_in), vspace_flatten(arg)), vs_in) assert np.allclose(autograd_jac, numerical_jac)
def test_nograd(): # we want this to raise non-differentiability error fun = lambda x: np.allclose(x, (x*3.0)/3.0) try: grad(fun)(np.array([1., 2., 3.])) except TypeError: pass else: raise Exception('Expected non-differentiability exception')
def check_equivalent(A, B, rtol=RTOL, atol=ATOL): A_vspace = vspace(A) B_vspace = vspace(B) A_flat = vspace_flatten(A) B_flat = vspace_flatten(B) assert A_vspace == B_vspace, \ "VSpace mismatch:\nanalytic: {}\nnumeric: {}".format(A_vspace, B_vspace) assert np.allclose(vspace_flatten(A), vspace_flatten(B), rtol=rtol, atol=atol), \ "Diffs are:\n{}.\nanalytic is:\n{}.\nnumeric is:\n{}.".format( A_flat - B_flat, A_flat, B_flat)
def test_dpotrs_grad(): npr.seed(0) n = 3 s = 5 J = rand_psd(n) h = npr.randn(n, s) L = np.linalg.cholesky(J) dpotrs = lambda (L, h): solve_triangular(L, solve_triangular(L, h), 'T') ans = dpotrs((L, h)) dotter = npr.randn(*ans.shape) assert np.allclose(ans, np.linalg.solve(J, h)) g_L_1, g_h_1 = grad(lambda x: np.sum(dotter * dpotrs(x)))((L, h)) g_L_2, g_h_2 = dpotrs_grad(dotter, ans, L, h) assert np.allclose(g_L_1, g_L_2) assert np.allclose(g_h_1, g_h_2)
def test_jacobian_against_wrapper(): A = npr.randn(3,3,3) fun = lambda x: np.einsum( 'ijk,jkl->il', A, np.sin(x[...,None] * np.tanh(x[None,...]))) B = npr.randn(3,3) jac1 = jacobian(fun)(B) jac2 = old_jacobian(fun)(B) assert np.allclose(jac1, jac2)
def testRemapPointsToSegments01(self): points = np.array([[1, 2], [3, 4], [5, 6]]) indices = np.array([[0, 1], [2, 0]]) res = contourloss.remapPointsToSegements(points, indices) ans = np.array([ [[1, 2], [3, 4]], [[5, 6], [1, 2]] ]) self.assertEqual(res.shape, ans.shape) self.assertTrue(np.allclose(res, ans))
def test_natural_lognorm_grad(): npr.seed(0) n = 3 J = rand_psd(n) h = npr.randn(n) def natural_lognorm((J, h)): L = np.linalg.cholesky(J) v = solve_triangular(L, h) return 1./2*np.dot(v, v) - np.sum(np.log(np.diag(L))) g_J_1, g_h_1 = grad(lambda x: np.pi*natural_lognorm(x))((J, h)) L = np.linalg.cholesky(J) v = solve_triangular(L, h) g_J_2, g_h_2 = natural_lognorm_grad(np.pi, L, v) assert np.allclose(g_J_1, g_J_2) assert np.allclose(g_h_1, g_h_2)
def check_equivalent(A, B, rtol=1e-4, atol=1e-6): assert eq_class(type(A)) == eq_class(type(B)),\ "Types are: {0} and {1}".format(eq_class(type(A)), eq_class(type(B))) if isinstance(A, (tuple, list)): for a, b in zip(A, B): check_equivalent(a, b) elif isinstance(A, dict): assert len(A) == len(B) for k in A: check_equivalent(A[k], B[k]) else: if isinstance(A, np.ndarray): assert A.shape == B.shape, "Shapes are {0} and {1}".format(A.shape, B.shape) assert np.allclose(A, B, rtol=rtol, atol=atol), "Diffs are: {0}".format(A - B)
def main(): np.random.seed(1) Xtrain, ytrain, params_true, true_fun, ttl = make_data_linreg_1d(21, 'linear') model = LinregModel(1, True) params_init = model.init_params() print model # Check that OLS and BFGS give same result params_ols, loss_ols = model.ols_fit(Xtrain, ytrain) obj_fun = lambda params: model.objective(params, Xtrain, ytrain) grad_fun = lambda params: model.gradient(params, Xtrain, ytrain) params_bfgs, loss_bfgs = bfgs(obj_fun, grad_fun, params_init) assert(np.allclose(params_bfgs, params_ols)) assert(np.allclose(loss_bfgs, loss_ols)) # Check that analytic gradient and automatic gradient give same result # when evaluated on training data grad_fun = autograd.grad(obj_fun) grad_auto = grad_fun(params_init) grad_finite_diff = autograd.util.nd(lambda p : obj_fun(p), params_init)[0] grad_analytic = model.gradient(params_init, Xtrain, ytrain) assert(np.allclose(grad_auto, grad_finite_diff)) assert(np.allclose(grad_auto, grad_analytic)) params_autograd, loss_autograd = bfgs(obj_fun, grad_fun, params_init) assert(np.allclose(params_bfgs, params_autograd)) assert(np.allclose(loss_bfgs, loss_autograd)) print "All assertions passed"
def test_natural_sample_grad(): npr.seed(0) n = 3 s = 5 J = rand_psd(n) h = npr.randn(n, s) eps = npr.randn(n, s) dotter = npr.randn(*eps.shape) def natural_sample(J, h, eps): L = np.linalg.cholesky(J) mu = solve_posdef_from_cholesky(L, h) noise = solve_triangular(L, eps, 'T') return mu + noise g_J_1, g_h_1 = grad(lambda (J, h): np.sum(dotter * natural_sample(J, h, eps)))((J, h)) g_J_2, g_h_2 = natural_sample_grad(dotter, natural_sample(J, h, eps), J, h, eps) assert np.allclose(g_J_1, g_J_2) assert np.allclose(g_h_1, g_h_2)
def check_equivalent(A, B, rtol=1e-4, atol=1e-6): assert type(A) is type(B),\ "Types are: {0} and {1}".format(type(A), type(B)) if isinstance(A, (tuple, list)): for a, b in zip(A, B): check_equivalent(a, b) elif isinstance(A, dict): assert len(A) == len(B) for k in A: check_equivalent(A[k], B[k]) else: if isinstance(A, np.ndarray): assert A.shape == B.shape, "Shapes are {0} and {1}".format(A.shape, B.shape) assert np.allclose(A, B, rtol=rtol, atol=atol), \ "Diffs are:\n{0}.\nA is:\n{A}.\nB is:\n{B}.".format(A - B, A=A, B=B)
def compare_samplers(lds, num_samples, seed): init_params, pair_params, node_params = lds npr.seed(seed) messages1, _ = natural_filter_forward_general( init_params, pair_params, node_params) samples1 = natural_sample_backward_general(messages1, pair_params, num_samples) npr.seed(seed) dense_messages2, _ = _natural_filter_forward_general( init_params, pair_params, node_params) samples2 = _natural_sample_backward(dense_messages2, pair_params, num_samples) assert np.allclose(samples1, samples2)
def test_jacobian_against_stacked_grads(): scalar_funs = [ lambda x: np.sum(x ** 3), lambda x: np.prod(np.sin(x) + np.sin(x)), lambda x: grad(lambda y: np.exp(y) * np.tanh(x[0]))(x[1]), ] vector_fun = lambda x: np.array([f(x) for f in scalar_funs]) x = npr.randn(5) jac = jacobian(vector_fun)(x) grads = [grad(f)(x) for f in scalar_funs] assert np.allclose(jac, np.vstack(grads))
def test_info_to_mean_grad(): npr.seed(0) n = 3 g_mu = npr.randn(n) g_Sigma = npr.randn(3, 3) J = rand_psd(n) h = npr.randn(n) def info_to_mean((J, h)): Sigma = np.linalg.inv(J) mu = np.dot(Sigma, h) return mu, Sigma def fun1((J, h)): mu, Sigma = info_to_mean((J, h)) return np.sum(g_mu * mu) + np.sum(g_Sigma * Sigma) g_J_1, g_h_1 = grad(fun1)((J, h)) g_J_2, g_h_2 = info_to_mean_grad(g_mu, g_Sigma, J, h) assert np.allclose(g_h_1, g_h_2) assert np.allclose(g_J_1, g_J_2)
def test_fast_conv(): """ compares my fast_conv to scipy convolve """ skip = 1 block_size = (11, 11) depth = 5 img = np.random.randn(51, 51, depth) filt = np.dstack([cv.gauss_filt_2D(shape=block_size,sigma=2) for k in range(depth)]) # im2col the image and filter out = fc.convolve(filt, img) # check against scipy convolve outc = np.dstack([ auto_convolve(img[:,:,k], filt[:,:,k], mode='valid') for k in range(3)]) outc = np.sum(outc, axis=2) assert np.allclose(out, outc), "fast_conv (cythonized) failed!"
def test_im2col_convolve(): """ compares my im2col based dot product convolve with scipy convolve """ skip = 1 block_size = (11, 11) img = np.random.randn(227, 227, 3) filt = np.dstack([cv.gauss_filt_2D(shape=block_size,sigma=2) for k in range(3)]) # im2col the image and filter img_cols = cv.im2col(img, block_size=block_size, skip=skip) out = cv.convolve_im2col(img_cols, filt, block_size, skip, img.shape) # check against scipy convolve outc = np.dstack([ sconvolve(img[:,:,k], filt[:,:,k], mode='valid') for k in range(3)]) outc = np.sum(outc, axis=2) assert np.allclose(out, outc), "im2col skip 1 failed!"
def check_equivalent(A, B, rtol=RTOL, atol=ATOL): assert base_class(type(A)) is base_class(type(B)),\ "Types are: {0} and {1}".format(type(A), type(B)) if isinstance(A, (tuple, list)): for a, b in zip(A, B): check_equivalent(a, b) elif isinstance(A, dict): assert len(A) == len(B) for k in A: check_equivalent(A[k], B[k]) else: if isinstance(A, np.ndarray): assert A.shape == B.shape, "Shapes are analytic: {0} and numeric: {1}".format( A.shape, B.shape) assert A.dtype == B.dtype, "Types are analytic: {0} and numeric: {1}".format( A.dtype, B.dtype) assert np.allclose(A, B, rtol=rtol, atol=atol), \ "Diffs are:\n{0}.\nanalytic is:\n{A}.\nnumeric is:\n{B}.".format(A - B, A=A, B=B)
def fit_linear_regression(Xs, ys, weights=None, mu0=0, sigmasq0=1, nu0=1, Psi0=1, fit_intercept=True): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) D = Xs[0].shape[1] P = ys[0].shape[1] assert all([X.shape[1] == D for X in Xs]) assert all([y.shape[1] == P for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) mu0 = mu0 * np.zeros((P, D)) sigmasq0 = sigmasq0 * np.eye(D) # Make sure the weights are the weights if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: mu0 = np.column_stack((mu0, np.zeros(P))) sigmasq0 = block_diag(sigmasq0, np.eye(1)) # Compute the posterior J = np.linalg.inv(sigmasq0) h = np.dot(J, mu0.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = Psi0 * np.eye(P) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + P + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
def test_csr_binary_dot_right(): dotright_cython = csr_binary_dot_right(feats, rows, cols) dotright_numpy = np.dot(feats, binmat) assert np.allclose(dotright_cython, dotright_numpy)
def run( self ): initial_dist, transition_dists, emission_dist = self.generateDists() graphs = self.graphs msg = self.msg msg.updateParams( initial_dist, transition_dists, emission_dist, graphs ) msg.draw() U, V = msg.filter() assert 0 # for node, elt in enumerate( U ): # node = msg.partialGraphIndexToFullGraphIndex( node ) # print( 'node', node, 'elt.shape', elt.shape, 'elt.fbs_axis', elt.fbs_axis ) # for node, edge, elt in zip( *V ): # node = msg.partialGraphIndexToFullGraphIndex( node ) # print( 'node', node, 'edge', edge, 'elt.shape', elt.shape, 'elt.fbs_axis', elt.fbs_axis ) # assert 0 #################################################### print( '\nJoint' ) for n, probs in msg.nodeJoint( U, V, msg.nodes ): reduced = msg.integrate( probs, axes=range( probs.ndim ) ) print( 'P( x_%d, Y )'%( n ), ':', probs, '->', reduced ) #################################################### print( '\nJoint parents should marginalize out to joint probs' ) for n, probs in msg.jointParents( U, V, msg.nodes ): parents, parent_order = msg.getParents( n, get_order=True ) joints = msg.nodeJoint( U, V, parents ) for i, ( ( p, j ), o ) in enumerate( zip( joints, parent_order ) ): # Marginalize out the other parents from probs int_axes = np.setdiff1d( parent_order, o ) reduced = msg.integrate( probs, axes=int_axes ) print( 'sum_{ parents except %d }P( x_p1..pN, Y ) for node %d - P( x_%d, Y ) : ->'%( p, n, p ), ( j - reduced ).sum() ) assert np.allclose( reduced, j ), 'reduced: %s, j: %s'%( reduced, j ) #################################################### print( '\nJoint parent child should marginalize out to joint probs' ) for n, probs in msg.jointParentChild( U, V, msg.nodes ): parents, parent_order = msg.getParents( n, get_order=True ) n_parents = parents.shape[ 0 ] joints = msg.nodeJoint( U, V, parents ) for i, ( ( p, j ), o ) in enumerate( zip( joints, parent_order ) ): # Marginalize out the other parents from probs int_axes = np.setdiff1d( np.hstack( ( n_parents, parent_order ) ), o ) reduced = msg.integrate( probs, axes=int_axes ) print( 'sum_{ parents except %d }P( x_%d, x_p1..pN, Y ) for node %d - P( x_%d, Y ) : ->'%( p, p, n, p ), ( j - reduced ).sum() ) assert np.allclose( reduced, j ), 'reduced: %s, j: %s'%( reduced, j ) ( _, joint ), = msg.nodeJoint( U, V, [ n ] ) # Marginalize out all of the parents reduced = msg.integrate( probs, axes=parent_order ) print( 'sum_{ parents }P( x_%d, x_p1..pN, Y ) - P( x_%d, Y ) : ->'%( n, n ), ( joint - reduced ).sum() ) assert np.allclose( reduced, joint ), 'reduced: %s, j: %s'%( reduced, j ) #################################################### print( '\nSmoothed' ) for n, probs in msg.nodeSmoothed( U, V, msg.nodes ): # If we reduce over the last axis, we should have everything sum to 1 reduced = msg.integrate( probs, axes=[ -1 ] ) print( 'P( x_%d | Y )'%( n ), ':', probs, '->', probs.shape, reduced ) assert np.allclose( reduced, 0.0 ), 'Failed test!' #################################################### print( '\nChild given parents' ) for n, probs in msg.conditionalParentChild( U, V, msg.nodes ): # If we reduce over the last axis, we should have everything sum to 1 reduced = msg.integrate( probs, axes=[ -1 ] ) print( 'P( x_%d | x_p1..pN, Y )'%( n ), '->', probs.shape, reduced ) assert np.allclose( reduced, 0.0 ), 'Failed test!'
def testSwitchingKalmanFilter(): T = 100 D_latent = 20 D_obs = 7 D = 4 K = 5 mp = SwitchingKalmanFilter() As, sigmas = list( zip(*[ MatrixNormalInverseWishart.generate(D_in=D_latent, D_out=D_latent) for _ in range(K) ])) C, R = MatrixNormalInverseWishart.generate(D_in=D_latent, D_out=D_obs) mu0, sigma0 = NormalInverseWishart.generate(D=D_latent) z = Categorical.generate(D=K, size=T) u = np.random.random((T, D_latent)) ys = np.array( [Regression.sample(params=(C, R), size=T)[1] for _ in range(D)]) start = time.time() mp.updateParams(z, As, sigmas, C, R, mu0, sigma0, u, ys) end = time.time() print('Preprocess: ', end - start) start = time.time() alphas = mp.forwardFilter() betas = mp.backwardFilter() end = time.time() print('Both filters: ', end - start) Ja, ha, log_Za = alphas[-1] Jb, hb, log_Zb = betas[-1] marginal = Normal.log_partition(nat_params=(-0.5 * Ja, ha)) - log_Za for a, b in zip(alphas, betas): Ja, ha, log_Za = a Jb, hb, log_Zb = b # comp = Normal.log_partition( nat_params=( -0.5*( Ja + Jb ), ( ha + hb ) ) ) - ( log_Za + log_Zb ) comp = mp.log_marginalFromAlphaBeta(a, b) assert np.isclose(comp, marginal), comp - marginal for t in range(T - 1): joint = mp.childParentJoint(t, alphas, betas) _JsParent, _hsParent, _logZsParent = Normal.marginalizeX1(*joint) _JsChild, _hsChild, _logZsChild = Normal.marginalizeX2(*joint) JsParent, hsParent, logZsParent = np.add(alphas[t], betas[t]) JsChild, hsChild, logZsChild = np.add(alphas[t + 1], betas[t + 1]) assert np.allclose(_JsParent, JsParent) assert np.allclose(_hsParent, hsParent) assert np.allclose(_logZsParent, logZsParent) assert np.allclose(_JsChild, JsChild) assert np.allclose(_hsChild, hsChild) assert np.allclose(_logZsChild, logZsChild) print('Passed the switching kalman filter marginal test!\n\n')
def testMixtureOfGaussians(self): def log_joint(x, pi, z, mu, sigma_sq, alpha, sigma_sq_mu): log_p_pi = log_probs.dirichlet_gen_log_prob(pi, alpha) log_p_mu = log_probs.norm_gen_log_prob(mu, 0, np.sqrt(sigma_sq_mu)) z_one_hot = one_hot(z, len(pi)) log_p_z = np.einsum('ij,j->', z_one_hot, np.log(pi)) mu_z = np.einsum('ij,jk->ik', z_one_hot, mu) log_p_x = log_probs.norm_gen_log_prob(x, mu_z, np.sqrt(sigma_sq)) return log_p_pi + log_p_z + log_p_mu + log_p_x n_clusters = 5 n_dimensions = 2 n_observations = 200 alpha = 3.3 * np.ones(n_clusters) sigma_sq_mu = 1.5 ** 2 sigma_sq = 0.5 ** 2 np.random.seed(10001) pi = np.random.gamma(alpha) pi /= pi.sum() mu = np.random.normal(0, np.sqrt(sigma_sq_mu), [n_clusters, n_dimensions]) z = np.random.choice(np.arange(n_clusters), size=n_observations, p=pi) x = np.random.normal(mu[z, :], sigma_sq) pi_est = np.ones(n_clusters) / n_clusters z_est = np.random.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = np.random.normal(0., 0.01, [n_clusters, n_dimensions]) all_args = [x, pi_est, z_est, mu_est, sigma_sq, alpha, sigma_sq_mu] pi_posterior_args = all_args[:1] + all_args[2:] z_posterior_args = all_args[:2] + all_args[3:] mu_posterior_args = all_args[:3] + all_args[4:] pi_posterior = complete_conditional(log_joint, 1, SupportTypes.SIMPLEX, *all_args) z_posterior = complete_conditional(log_joint, 2, SupportTypes.INTEGER, *all_args) mu_posterior = complete_conditional(log_joint, 3, SupportTypes.REAL, *all_args) self.assertTrue(np.allclose( pi_posterior(*pi_posterior_args).alpha, alpha + np.histogram(z_est, np.arange(n_clusters+1))[0])) correct_z_logits = -0.5 / sigma_sq * np.square(x[:, :, None] - mu_est.T[None, :, :]).sum(1) correct_z_logits += np.log(pi_est) correct_z_posterior = np.exp(correct_z_logits - misc.logsumexp(correct_z_logits, 1, keepdims=True)) self.assertTrue(np.allclose(correct_z_posterior, z_posterior(*z_posterior_args).p)) correct_mu_posterior_mean = np.zeros_like(mu_est) correct_mu_posterior_var = np.zeros_like(mu_est) for k in range(n_clusters): n_k = (z_est == k).sum() correct_mu_posterior_var[k] = 1. / (1. / sigma_sq_mu + n_k / sigma_sq) correct_mu_posterior_mean[k] = ( x[z_est == k].sum(0) / sigma_sq * correct_mu_posterior_var[k]) mu_posterior_val = mu_posterior(*mu_posterior_args) self.assertTrue(np.allclose(correct_mu_posterior_mean, mu_posterior_val.args[0])) self.assertTrue(np.allclose(correct_mu_posterior_var, mu_posterior_val.args[1] ** 2))
# - d indexes D dimension (obs dimension) inv_term = np.linalg.solve(r_term, np.swapaxes(aC, 1, 2)) back_term = np.einsum('idr,id->ir', aC, p) # (Ct A^{-1} p) Sigvs = inv_v * p - np.einsum('ird,ir->id', inv_term, back_term) return Sigvs if __name__ == "__main__": # Test woodbury C = np.random.randn(10, 3) v = np.random.randn(10) * 2 Sigma = np.dot(C, C.T) + np.diag(np.exp(v)) Sinv = np.linalg.inv(Sigma) Sinv_wood = woodbury_invert(C, v) assert np.allclose(Sinv, Sinv_wood, atol=1e-6), "woodbury!" _, lndet = np.linalg.slogdet(Sigma) lndet_wood = woodbury_lndet(C, v) assert np.allclose(lndet, lndet_wood), "woodbury det!" # test woodbury solve p = np.random.randn(10) a_wood = woodbury_solve(C, v, p) a = np.dot(Sinv, p) assert np.allclose(a, a_wood), "woodbury solve!" p = np.random.randn(10, 23) aw = woodbury_solve(C, v, p) aa = np.dot(Sinv, p) assert np.allclose(aw, aa), "woodbury solve vectorized!"
def fit_linear_regression(Xs, ys, weights=None, fit_intercept=True, prior_mean=0, prior_variance=1, nu0=1, Psi0=1 ): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) p, d = Xs[0].shape[1], ys[0].shape[1] assert all([X.shape[1] == p for X in Xs]) assert all([y.shape[1] == d for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) prior_mean = prior_mean * np.zeros((d, p)) prior_variance = prior_variance * np.eye(p) # Check the weights. Default to all ones. if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: prior_mean = np.column_stack((prior_mean, np.zeros(d))) prior_variance = block_diag(prior_variance, np.eye(1)) # Compute the posterior J = np.linalg.inv(prior_variance) h = np.dot(J, prior_mean.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = Psi0 * np.eye(d) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + d + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
def test_vmc_single_point(self): vmc = VMC(wf=self.wf, sampler=self.sampler, optimizer=None) opt_param = [0.5] _, e, s = vmc.single_point(opt_param) assert np.allclose([e, s], [0.5, 0], atol=1E-3)
def test_vmc_opt(self): vmc = VMC(wf=self.wf, sampler=self.sampler, optimizer=self.optimizer) init_param = [1.25] vmc.optimize(init_param) vf = vmc.history['variance'][-1] assert np.allclose([vf], [0.0], atol=1E-3)
def check_expectedstats(natparam): E_stats1 = expectedstats(natparam) E_stats2 = grad(logZ)(natparam) assert np.allclose(E_stats1, E_stats2)
__gtilde_subsample = 1 __gtilde_pickle_fn = 'VBHP/gtilde.pkl' __gtilde_csv_fn = 'VBHP/gtilde.csv' _gtilde_table = wh.load(__gtilde_pickle_fn) isub = list(range(0, _gtilde_table.shape[1]-1, __gtilde_subsample)) + [_gtilde_table.shape[1]-1] _gtilde_table = _gtilde_table[:,isub] _gtilde_neglogz, _gtilde_value, _grad_gtilde_value =_gtilde_table assert not np.isinf(min(_gtilde_neglogz)) _gtilde_neglogz_0, _gtilde_value_0, _grad_gtilde_value_0 = -np.inf, 0.0, 2 _gtilde_neglogz_range = (min(_gtilde_neglogz),max(_gtilde_neglogz)) imin = np.argmin(_gtilde_neglogz) assert imin == 0 assert np.allclose(_gtilde_value_0, _gtilde_value[imin]) assert np.allclose(_grad_gtilde_value_0, _grad_gtilde_value[imin]) _gtilde_interp = scipy.interpolate.interp1d(_gtilde_neglogz, _gtilde_value, fill_value=(_gtilde_value_0, np.nan), bounds_error=False, kind=__interp1d_kind) _grad_gtilde_interp = scipy.interpolate.interp1d(_gtilde_neglogz, _grad_gtilde_value, fill_value=(_grad_gtilde_value_0, np.nan), bounds_error=False, kind=__interp1d_kind) def gtilde(z): """get the value of gtilde at -z by intersection""" assert isinstance(z, np.ndarray) assert np.all(z <= 0.0) lognegz = np.log(-z) assert np.all(lognegz <= _gtilde_neglogz_range[1]), (min(lognegz), max(lognegz), _gtilde_neglogz_range) rval = _gtilde_interp(lognegz) rval[z==0] = _gtilde_value_0 rval[lognegz < _gtilde_neglogz[0]] = 0.0
def check_params(natparam): natparam2 = pack_dense(*unpack_dense(natparam)) assert np.allclose(natparam, natparam2)
def test_grad_identity(): fun = lambda x: x df = grad(fun) ddf = grad(df) assert np.allclose(df(2.0), 1.0) assert np.allclose(ddf(2.0), 0.0)
def test_grad_const(): fun = lambda x: 1.0 with warnings.catch_warnings(record=True) as w: warnings.simplefilter("ignore") df = grad(fun) assert np.allclose(df(2.0), 0.0)
def test_rand(self): x = self.man.rand() p = np.ones(2) / np.sqrt(2) # The manifold only consists of two isolated points (cf. `setUp()`). self.assertTrue(np.allclose(x, p) or np.allclose(x, -p))
def allclose(m1, m2): if isinstance(m1, np.ndarray): return np.allclose(m1, m2) elif np.isscalar(m1): return np.isclose(m1, m2) return len(m1) == len(m2) and all(map(allclose, m1, m2))
def assert_allclose(self, x, y, tol=1e-12, msg=''): self.assertTrue( np.allclose(x, y, tol), msg='{}\nx !~ y where\nx = {}\ny = {}\ntol = {}'.format( msg, x, y, tol))
def run_hessian_approximation_test(self): problem = pymanopt.Problem(self.manifold, self.cost) _, error, _, (slope, *_) = diagnostics.check_directional_derivative( problem, use_quadratic_model=True ) assert np.allclose(np.linalg.norm(error), 0) or (2.95 <= slope <= 3.05)
def fun(x): assert np.allclose(x, (x * 3.0) / 3.0) return np.sum(x)
def test_SLDSStructuredMeanField_entropy(): """Test correctness of the entropy calculation for the SLDSStructuredMeanFieldVariationalPosterior class. """ def entropy_mv_gaussian(J, h): mu = np.linalg.solve(J, h) sigma = np.linalg.inv(J) mv_normal = scipy.stats.multivariate_normal(mu, sigma) return mv_normal.entropy() def make_lds_parameters(T, D, N, U): m0 = np.zeros(D) S0 = np.eye(D) As = 0.99 * np.eye(D) Bs = np.zeros((D, U)) Qs = 0.1 * np.eye(D) Cs = npr.randn(N, D) Ds = np.zeros((N, U)) Rs = 0.1 * np.eye(N) us = np.zeros((T, U)) ys = np.sin(2 * np.pi * np.arange(T) / 50)[:, None] * npr.randn(1, N) + 0.1 * npr.randn(T, N) return m0, S0, As, Bs, Qs, Cs, Ds, Rs, us, ys def cumsum(v,strict=False): if not strict: return np.cumsum(v,axis=0) else: out = np.zeros_like(v) out[1:] = np.cumsum(v[:-1],axis=0) return out def bmat(blocks): rowsizes = [row[0].shape[0] for row in blocks] colsizes = [col[0].shape[1] for col in zip(*blocks)] rowstarts = cumsum(rowsizes,strict=True) colstarts = cumsum(colsizes,strict=True) nrows, ncols = sum(rowsizes), sum(colsizes) out = np.zeros((nrows,ncols)) for i, (rstart, rsz) in enumerate(zip(rowstarts, rowsizes)): for j, (cstart, csz) in enumerate(zip(colstarts, colsizes)): out[rstart:rstart+rsz,cstart:cstart+csz] = blocks[i][j] return out def lds_to_dense_infoparams(params): m0, S0, As, Bs, Qs, Cs, Ds, Rs, us, ys = params mu_init = m0 sigma_init = S0 A, B, sigma_states = As, Bs, Qs C, D, sigma_obs = Cs, Ds, Rs data = ys inputs = us # Copied from PYLDS tests/test_dense.py T, n = data.shape[0], D.shape[0] # mu_init, sigma_init = model.mu_init, model.sigma_init # A, B, sigma_states = model.A, model.B, model.sigma_states # C, D, sigma_obs = model.C, model.D, model.sigma_obs ss_inv = np.linalg.inv(sigma_states) h = np.zeros((T,n)) h[0] += np.linalg.solve(sigma_init, mu_init) # Dynamics h[1:] += inputs[:-1].dot(B.T).dot(ss_inv) h[:-1] += -inputs[:-1].dot(B.T).dot(np.linalg.solve(sigma_states, A)) # Emissions h += C.T.dot(np.linalg.solve(sigma_obs, data.T)).T h += -inputs.dot(D.T).dot(np.linalg.solve(sigma_obs, C)) J = np.kron(np.eye(T),C.T.dot(np.linalg.solve(sigma_obs,C))) J[:n,:n] += np.linalg.inv(sigma_init) pairblock = bmat([[A.T.dot(ss_inv).dot(A), -A.T.dot(ss_inv)], [-ss_inv.dot(A), ss_inv]]) for t in range(0,n*(T-1),n): J[t:t+2*n,t:t+2*n] += pairblock return J.reshape(T*n,T*n), h.reshape(T*n) T, D, N, U = 100, 10, 10, 0 params = make_lds_parameters(T, D, N, U) J_full, h_full = lds_to_dense_infoparams(params) ref_entropy = entropy_mv_gaussian(J_full, h_full) # Calculate entropy using kalman filter and posterior's entropy fn info_args = ssm.messages.convert_mean_to_info_args(*params) J_ini, h_ini, _, J_dyn_11,\ J_dyn_21, J_dyn_22, h_dyn_1,\ h_dyn_2, _, J_obs, h_obs, _ = info_args # J_obs[1:] += J_dyn_22 # J_dyn_22[:] = 0 log_Z, smoothed_mus, smoothed_Sigmas, ExxnT = ssm.messages.\ kalman_info_smoother(*info_args) # Model is just a dummy model to simplify # instantiating the posterior object. model = ssm.SLDS(N, 1, D, emissions="gaussian", dynamics="gaussian") datas = params[-1] post = ssm.variational.SLDSStructuredMeanFieldVariationalPosterior(model, datas) # Assign posterior to have info params that are the same as the ones used # in the reference entropy calculation. continuous_state_params = [dict(J_ini=J_ini, J_dyn_11=J_dyn_11, J_dyn_21=J_dyn_21, J_dyn_22=J_dyn_22, J_obs=J_obs, h_ini=h_ini, h_dyn_1=h_dyn_1, h_dyn_2=h_dyn_2, h_obs=h_obs)] post.continuous_state_params = continuous_state_params ssm_entropy = post._continuous_entropy() print("reference entropy: {}".format(ref_entropy)) print("ssm_entropy: {}".format(ssm_entropy)) assert np.allclose(ref_entropy, ssm_entropy)
def testStableKalmanFilter(): # np.random.seed( 3 ) with np.errstate(all='raise'), scipy.special.errstate(all='raise'): T = 1000 D_latent = 7 D_obs = 3 D = 4 mp = StableKalmanFilter() mpTrue = KalmanFilter() A, sigma = MatrixNormalInverseWishart.generate(D_in=D_latent, D_out=D_latent) C, R = MatrixNormalInverseWishart.generate(D_in=D_latent, D_out=D_obs) mu0, sigma0 = NormalInverseWishart.generate(D=D_latent) u = np.random.random((T, D_latent)) ys = np.array( [Regression.sample(params=(C, R), size=T)[1] for _ in range(D)]) mpTrue.updateParams(A=A, sigma=sigma, C=C, R=R, mu0=mu0, sigma0=sigma0, u=u, ys=ys) start = time.time() mp.updateParams(A=A, sigma=sigma, C=C, R=R, mu0=mu0, sigma0=sigma0, u=u, ys=ys) end = time.time() print('Preprocess: ', end - start) start = time.time() alphas = mp.forwardFilter() betas = mp.backwardFilter() end = time.time() print('Both filters: ', end - start) alphasTrue, betasTrue = (mpTrue.forwardFilter(), mpTrue.backwardFilter()) # for i, ( a, b ) in enumerate( zip( alphas, betas ) ): # Ja, ha, log_Za = a # if( i == 1 ): # print( 'Ja', Ja ) Ja, ha, log_Za = alphas[-1] Jb, hb, log_Zb = betas[-1] for i, (a, b, _a, _b) in enumerate(zip(alphas, betas, alphasTrue, betasTrue)): Ja, ha, log_Za = a Jb, hb, log_Zb = b _Ja, _ha, _log_Za = _a _Jb, _hb, _log_Zb = _b assert np.allclose( Ja, _Ja, rtol=1e-5, atol=1e-6), '%s\n%s' % ((Ja - _Ja), np.max((Ja - _Ja))) assert np.allclose( Jb, _Jb, rtol=1e-5, atol=1e-6), '%s\n%s' % ((Jb - _Jb), np.max((Jb - _Jb))) assert np.allclose( ha, _ha, rtol=1e-5, atol=1e-6), '%s\n%s' % ((ha - _ha), np.max((ha - _ha))) assert np.allclose( hb, _hb, rtol=1e-5, atol=1e-6), '%s\n%s' % ((hb - _hb), np.max((hb - _hb))) assert np.allclose( log_Za, _log_Za, rtol=1e-5, atol=1e-6), '%s\n%s' % ( (log_Za - _log_Za), np.max((log_Za - _log_Za))) assert np.allclose( log_Zb, _log_Zb, rtol=1e-5, atol=1e-6), '%s\n%s' % ( (log_Zb - _log_Zb), np.max((log_Zb - _log_Zb))) print('Passed the stable kalman filter marginal test!\n\n')
def check_ggnvp(f, x, v): gnvp = make_ggnvp(f)(x) gnvp2 = make_dense_ggnvp(f)(x) return np.allclose(gnvp(v), gnvp2(v))
def testKalmanFilter(): T = 1000 D_latent = 7 D_obs = 3 D = 4 mp = KalmanFilter() A, sigma = MatrixNormalInverseWishart.generate(D_in=D_latent, D_out=D_latent) C, R = MatrixNormalInverseWishart.generate(D_in=D_latent, D_out=D_obs) mu0, sigma0 = NormalInverseWishart.generate(D=D_latent) u = np.random.random((T, D_latent)) # nBad = int( np.random.random() * T ) # badMask = np.random.choice( T, nBad ) # u[ badMask ] = np.nan ys = np.array( [Regression.sample(params=(C, R), size=T)[1] for _ in range(D)]) start = time.time() mp.updateParams(A=A, sigma=sigma, C=C, R=R, mu0=mu0, sigma0=sigma0, u=u, ys=ys) end = time.time() print('Preprocess: ', end - start) start = time.time() alphas = mp.forwardFilter() betas = mp.backwardFilter() end = time.time() print('Both filters: ', end - start) Ja, ha, log_Za = alphas[-1] Jb, hb, log_Zb = betas[-1] marginal = Normal.log_partition(nat_params=(-0.5 * Ja, ha)) - log_Za for a, b in zip(alphas, betas): Ja, ha, log_Za = a Jb, hb, log_Zb = b # _marginal = Normal.log_partition( nat_params=( -0.5*( Ja + Jb ), ( ha + hb ) ) ) - ( log_Za + log_Zb ) _marginal = mp.log_marginalFromAlphaBeta(a, b) assert np.isclose(_marginal, marginal), _marginal - marginal for t in range(T - 1): joint = mp.childParentJoint(t, alphas, betas) _JsParent, _hsParent, _logZsParent = Normal.marginalizeX1(*joint) _JsChild, _hsChild, _logZsChild = Normal.marginalizeX2(*joint) JsParent, hsParent, logZsParent = np.add(alphas[t], betas[t]) JsChild, hsChild, logZsChild = np.add(alphas[t + 1], betas[t + 1]) assert np.allclose(_JsParent, JsParent) assert np.allclose(_hsParent, hsParent) assert np.allclose(_logZsParent, logZsParent) assert np.allclose(_JsChild, JsChild) assert np.allclose(_hsChild, hsChild) assert np.allclose(_logZsChild, logZsChild) print('Passed the kalman filter marginal test!\n\n')
def test_forward_pass(T=1000, K=3): log_pi0, log_Ps, ll = make_parameters(T, K) a1 = forward_pass_np(log_pi0, log_Ps, ll) a2 = np.zeros((T, K)) forward_pass(-np.log(K) * np.ones(K), log_Ps, ll, a2) assert np.allclose(a1, a2)
def testSplitEinsumNode2(self): n_dimensions = 5 x = np.random.randn(n_dimensions) y = np.random.randn(n_dimensions) matrix = np.random.randn(n_dimensions, n_dimensions) env = {'x': x, 'y': y, 'matrix': matrix} args = (x, y) f = lambda x, y: np.einsum('i,i->', x, y) node = make_expr(f, *args) val = f(*args) potential_node, stat_node = split_einsum_node2(node.expr_node, [0]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), x)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) potential_node, stat_node = split_einsum_node2(node.expr_node, [1]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), y)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) potential_node, stat_node = split_einsum_node2(node.expr_node, [0, 1]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), x * y)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) args = (x, y) f = lambda x, y: np.einsum('i,i,i->', x, y, y) node = make_expr(f, *args) val = f(*args) potential_node, stat_node = split_einsum_node2(node.expr_node, [1, 2]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), y * y)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) potential_node, stat_node = split_einsum_node2(node.expr_node, [0]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), x)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) args = (x,) f = lambda x: np.einsum('i,i,i->', np.ones_like(x), x, x) node = make_expr(f, *args) val = f(*args) potential_node, stat_node = split_einsum_node2(node.expr_node, [1, 2]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), x * x)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) args = (matrix, x, y) f = lambda matrix, x, y: np.einsum('ij,i,j->', matrix, x, y) node = make_expr(f, *args) val = f(*args) potential_node, stat_node = split_einsum_node2(node.expr_node, [1, 2]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), np.outer(x, y))) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) potential_node, stat_node = split_einsum_node2(node.expr_node, [0]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), matrix)) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) args = (matrix, x, y) f = lambda matrix, x, y: np.einsum('i,j,ki,kj->', x, x, matrix, matrix) node = make_expr(f, *args) val = f(*args) potential_node, stat_node = split_einsum_node2(node.expr_node, [2, 3]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), matrix[:, None, :] * matrix[:, :, None])) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) potential_node, stat_node = split_einsum_node2(node.expr_node, [0, 1]) self.assertTrue(np.allclose(eval_node(stat_node, node, env), np.outer(x, x))) self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) args = (matrix, x, y) f = lambda matrix, x, y: np.einsum(',kj,j,ka,a->', -0.5, matrix, x, matrix, y) node = make_expr(f, *args) val = f(*args) potential_node, stat_node = split_einsum_node2(node.expr_node, [2, 4], False) self.assertEqual(stat_node.args[0], 'j,a->ja') self.assertTrue(np.allclose(eval_node(potential_node, node, env), val)) potential_node, stat_node = split_einsum_node2(node.expr_node, [0, 1, 3], False) self.assertEqual(stat_node.args[0], ',kj,ka->kja') self.assertTrue(np.allclose(eval_node(potential_node, node, env), val))
def test_laplace_em_hessian(N=5, K=3, D=2, T=20): for transitions in ["standard", "recurrent", "recurrent_only"]: for emissions in ["gaussian_orthog", "gaussian"]: print("Checking analytical hessian for transitions={}, " "and emissions={}".format(transitions, emissions)) slds = ssm.SLDS(N, K, D, transitions=transitions, dynamics="gaussian", emissions=emissions) z, x, y = slds.sample(T) new_slds = ssm.SLDS(N, K, D, transitions="standard", dynamics="gaussian", emissions=emissions) inputs = [np.zeros((T, 0))] masks = [np.ones_like(y)] tags = [None] method = "laplace_em" datas = [y] num_samples = 1 def neg_expected_log_joint_wrapper(x_vec, T, D): x = x_vec.reshape(T, D) return new_slds._laplace_neg_expected_log_joint( datas[0], inputs[0], masks[0], tags[0], x, Ez, Ezzp1) variational_posterior = new_slds._make_variational_posterior( "structured_meanfield", datas, inputs, masks, tags, method) new_slds._fit_laplace_em_discrete_state_update( variational_posterior, datas, inputs, masks, tags, num_samples) Ez, Ezzp1, _ = variational_posterior.discrete_expectations[0] x = variational_posterior.mean_continuous_states[0] scale = x.size J_diag, J_lower_diag = new_slds._laplace_hessian_neg_expected_log_joint( datas[0], inputs[0], masks[0], tags[0], x, Ez, Ezzp1) dense_hessian = scipy.linalg.block_diag(*[x for x in J_diag]) dense_hessian[D:, :-D] += scipy.linalg.block_diag( *[x for x in J_lower_diag]) dense_hessian[:-D, D:] += scipy.linalg.block_diag( *[x.T for x in J_lower_diag]) true_hess = hessian(neg_expected_log_joint_wrapper)(x.reshape(-1), T, D) assert np.allclose(true_hess, dense_hessian) print("Hessian passed.") # Also check that computation of H works. h_dense = dense_hessian @ x.reshape(-1) h_dense = h_dense.reshape(T, D) J_ini, J_dyn_11, J_dyn_21, J_dyn_22, J_obs = new_slds._laplace_neg_hessian_params( datas[0], inputs[0], masks[0], tags[0], x, Ez, Ezzp1) h_ini, h_dyn_1, h_dyn_2, h_obs = new_slds._laplace_neg_hessian_params_to_hs( x, J_ini, J_dyn_11, J_dyn_21, J_dyn_22, J_obs) h = h_obs.copy() h[0] += h_ini h[:-1] += h_dyn_1 h[1:] += h_dyn_2 assert np.allclose(h, h_dense)
def test_csr_binary_dot_left(): dotleft_cython = csr_binary_dot_left(feats, rows, cols) dotleft_numpy = np.dot(binmat, feats) assert np.allclose(dotleft_cython, dotleft_numpy)
def test(): torch.set_default_dtype(torch.float64) torch.manual_seed(42) # generate some test data T = 10 qdim = 2 q = torch.randn(5, T, qdim) qdot = torch.randn(5, T, qdim) qddot = torch.cat( (torch.zeros(5, 1, qdim), (qdot[:, 1:, :] - qdot[:, :-1, :]) / 0.1), dim=1) x_ = torch.cat([q, qdot], dim=-1) x = x_[:, :-1] nx = x_[:, 1:] sys = StructuredMechanicalModel(qdim=qdim, dt=0.1, hidden_sizes=[2]) def _loss(qddot_true): qddot_pred = sys.compute_qddot(q, qdot, create_graph=True) return torch.nn.functional.mse_loss(qddot_true, qddot_pred) assert gradcheck(_loss, qddot.requires_grad_()) print("Testing wrt theta") def _ploss(params): if params.ndim > 1: retval = np.zeros(len(params)) for i in tqdm(range(len(params))): retval[i] = _ploss(params[i]) return retval vtp(torch.tensor(params), sys.parameters()) sys.zero_grad() qddot_pred = sys.compute_qddot(q, qdot, create_graph=True) return torch.nn.functional.mse_loss(qddot, qddot_pred).detach().numpy() params0 = ptv(sys.parameters()).detach() # gradient = grad(_ploss)(params0.numpy()) gradient = nd.Gradient(_ploss)(params0.numpy()) vtp(params0, sys.parameters()) sys.zero_grad() qddot_pred = sys.compute_qddot(q, qdot, create_graph=True) loss = torch.nn.functional.mse_loss(qddot, qddot_pred) loss.backward() gradient_ = parameter_grads_to_vector(sys.parameters()).detach().numpy() print(gradient) print(gradient_) assert np.allclose(gradient_, gradient) print("Testing wrt theta") def _ploss(params): if params.ndim > 1: retval = np.zeros(len(params)) for i in tqdm(range(len(params))): retval[i] = _ploss(params[i]) return retval vtp(torch.tensor(params), sys.parameters()) sys.zero_grad() nx_pred = sys.step(torch.ones(5, 9), x) return torch.nn.functional.mse_loss(nx_pred[..., qdim:], nx[..., qdim:]).detach().numpy() params0 = ptv(sys.parameters()).detach() # gradient = grad(_ploss)(params0.numpy()) gradient = nd.Gradient(_ploss)(params0.numpy()) vtp(params0, sys.parameters()) sys.zero_grad() nx_pred = sys.step(torch.ones(5, 9), x) loss = torch.nn.functional.mse_loss(nx_pred[..., qdim:], nx[..., qdim:]) loss.backward() gradient_ = parameter_grads_to_vector(sys.parameters()).detach().numpy() print(gradient) print(gradient_) assert np.allclose(gradient_, gradient)
def fit_multiclass_logistic_regression(X, y, bias=None, K=None, W0=None, mu0=0, sigmasq0=1, verbose=False, maxiter=1000): """ Fit a multiclass logistic regression y_i ~ Cat(softmax(W x_i)) y is a one hot vector in {0, 1}^K x_i is a vector in R^D W is a matrix R^{K x D} The log likelihood is, L(W) = sum_i sum_k y_ik * w_k^T x_i - logsumexp(W x_i) The prior is w_k ~ Norm(mu0, diag(sigmasq0)). """ N, D = X.shape assert y.shape[0] == N # Make sure y is one hot if y.ndim == 1 or y.shape[1] == 1: assert y.dtype == int and y.min() >= 0 K = y.max() + 1 if K is None else K y_oh = np.zeros((N, K), dtype=int) y_oh[np.arange(N), y] = 1 else: K = y.shape[1] assert y.min() == 0 and y.max() == 1 and np.allclose(y.sum(1), 1) y_oh = y # Check that bias is correct shape if bias is not None: assert bias.shape == (K, ) or bias.shape == (N, K) else: bias = np.zeros((K, )) def loss(W_flat): W = np.reshape(W_flat, (K, D)) scores = np.dot(X, W.T) + bias lp = np.sum(y_oh * scores) - np.sum(logsumexp(scores, axis=1)) prior = np.sum(-0.5 * (W - mu0)**2 / sigmasq0) return -(lp + prior) / N W0 = W0 if W0 is not None else np.zeros((K, D)) assert W0.shape == (K, D) itr = [0] def callback(W_flat): itr[0] += 1 print("Iteration {} loss: {:.3f}".format(itr[0], loss(W_flat))) result = minimize(loss, np.ravel(W0), jac=grad(loss), method="BFGS", callback=callback if verbose else None, options=dict(maxiter=maxiter, disp=verbose)) W = np.reshape(result.x, (K, D)) return W
def is_posdef(A): return np.allclose(A, A.T) and np.all(np.linalg.eigvalsh(A) > 0.)