def full(self, X, Xs=None): X, Xc, Xs = self._common(X, Xs) if Xs is None: return at.dot(Xc, at.transpose(Xc)) else: Xsc = at.sub(Xs, self.c) return at.dot(Xc, at.transpose(Xsc))
def est_both_assert_merge_2_reverse(self): # Test case "test_both_assert_merge_2" but in reverse order x1 = tt.matrix("x1") x2 = tt.matrix("x2") x3 = tt.matrix("x3") e = tt.dot(x1, tt.opt.assert_op(x2, (x2 > x3).all())) + tt.dot( tt.opt.assert_op(x1, (x1 > x3).all()), x2) g = FunctionGraph([x1, x2, x3], [e]) MergeOptimizer().optimize(g) strg = aesara.printing.debugprint(g, file="str") strref = """Elemwise{add,no_inplace} [id A] '' 7 |dot [id B] '' 6 | |Assert{msg='Aesara Assert failed!'} [id C] '' 5 | | |x1 [id D] | | |All [id E] '' 3 | | |Elemwise{gt,no_inplace} [id F] '' 1 | | |x1 [id D] | | |x3 [id G] | |Assert{msg='Aesara Assert failed!'} [id H] '' 4 | |x2 [id I] | |All [id J] '' 2 | |Elemwise{gt,no_inplace} [id K] '' 0 | |x2 [id I] | |x3 [id G] |dot [id B] '' 6 """ print(strg) assert strg == strref, (strg, strref)
def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): sigma2 = at.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) Lamd = at.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": Lamd = at.ones_like(Qffd) * sigma2 trace = (1.0 / (2.0 * sigma2)) * (at.sum(self.cov_func(X, diag=True)) - at.sum(at.sum(A * A, 0))) else: # DTC Lamd = at.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd L_B = cholesky(at.eye(Xu.shape[0]) + at.dot(A_l, at.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd c = solve_lower(L_B, at.dot(A, r_l)) constant = 0.5 * X.shape[0] * at.log(2.0 * np.pi) logdet = 0.5 * at.sum(at.log(Lamd)) + at.sum(at.log(at.diag(L_B))) quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace)
def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): sigma2 = at.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": Kffd = cov_total(X, diag=True) Lamd = at.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 else: # VFE or DTC Lamd = at.ones_like(Qffd) * sigma2 A_l = A / Lamd L_B = cholesky(at.eye(Xu.shape[0]) + at.dot(A_l, at.transpose(A))) r = y - mean_total(X) r_l = r / Lamd c = solve_lower(L_B, at.dot(A, r_l)) Kus = self.cov_func(Xu, Xnew) As = solve_lower(Luu, Kus) mu = self.mean_func(Xnew) + at.dot(at.transpose(As), solve_upper(at.transpose(L_B), c)) C = solve_lower(L_B, As) if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - at.sum(at.square(As), 0) + at.sum(at.square(C), 0) if pred_noise: var += sigma2 return mu, var else: cov = self.cov_func(Xnew) - at.dot(at.transpose(As), As) + at.dot( at.transpose(C), C) if pred_noise: cov += sigma2 * at.identity_like(cov) return mu, cov if pred_noise else stabilize(cov)
def t_gemv1(self, m_shp): """test vector2 + dot(matrix, vector1)""" rng = np.random.default_rng(unittest_tools.fetch_seed()) v1 = aesara.shared(np.array(rng.uniform(size=(m_shp[1],)), dtype="float32")) v2_orig = np.array(rng.uniform(size=(m_shp[0],)), dtype="float32") v2 = aesara.shared(v2_orig) m = aesara.shared(np.array(rng.uniform(size=m_shp), dtype="float32")) f = aesara.function([], v2 + at.dot(m, v1), mode=self.mode) # Assert they produce the same output assert np.allclose(f(), np.dot(m.get_value(), v1.get_value()) + v2_orig) topo = [n.op for n in f.maker.fgraph.toposort()] assert topo == [CGemv(inplace=False)], topo # test the inplace version g = aesara.function([], [], updates=[(v2, v2 + at.dot(m, v1))], mode=self.mode) # Assert they produce the same output g() assert np.allclose( v2.get_value(), np.dot(m.get_value(), v1.get_value()) + v2_orig ) topo = [n.op for n in g.maker.fgraph.toposort()] assert topo == [CGemv(inplace=True)] # Do the same tests with a matrix with strides in both dimensions m.set_value(m.get_value(borrow=True)[::-1, ::-1], borrow=True) v2.set_value(v2_orig) assert np.allclose(f(), np.dot(m.get_value(), v1.get_value()) + v2_orig) g() assert np.allclose( v2.get_value(), np.dot(m.get_value(), v1.get_value()) + v2_orig )
def test_compute_flag(self): x = tt.matrix("x") y = tt.matrix("y") y.tag.test_value = np.random.rand(4, 5).astype(config.floatX) # should skip computation of test value aesara.config.compute_test_value = "off" z = tt.dot(x, y) assert not hasattr(z.tag, "test_value") # should fail when asked by user aesara.config.compute_test_value = "raise" with pytest.raises(ValueError): tt.dot(x, y) # test that a warning is raised if required aesara.config.compute_test_value = "warn" warnings.simplefilter("error", UserWarning) try: with pytest.raises(UserWarning): tt.dot(x, y) finally: # Restore the default behavior. # TODO There is a cleaner way to do this in Python 2.6, once # Aesara drops support of Python 2.4 and 2.5. warnings.simplefilter("default", UserWarning)
def square_dist(self, X, Xs=None): X2 = aet.sum(aet.square(X), 1) if Xs is None: sqd = -2.0 * aet.dot(X, aet.transpose(X)) + ( aet.reshape(X2, (-1, 1)) + aet.reshape(X2, (1, -1))) else: Xs2 = aet.sum(aet.square(Xs), 1) sqd = -2.0 * aet.dot(X, aet.transpose(Xs)) + ( aet.reshape(X2, (-1, 1)) + aet.reshape(Xs2, (1, -1))) return aet.clip(sqd, 0.0, np.inf)
def _build_conditional(self, Xnew, X, f, cov_total, mean_total): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) v = solve_lower(L, f - mean_total(X)) mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v) Kss = self.cov_func(Xnew) cov = Kss - at.dot(at.transpose(A), A) return mu, cov
def test_input_aliasing_affecting_inplace_operations(self): # Note: to trigger this bug with aesara rev 4586:2bc6fc7f218b, # you need to make in inputs mutable (so that inplace # operations are used) and to break the elemwise composition # with some non-elemwise op (here dot) x = dvector() y = dvector() m1 = dmatrix() m2 = dmatrix() f = function( [ In(x, mutable=True), In(y, mutable=True), In(m1, mutable=True), In(m2, mutable=True), ], aet.dot((x * 2), m1) + aet.dot((y * 3), m2), ) # Test 1. If the same variable is given twice # Compute bogus values v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray( [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ], dtype="float64", ) bogus_vals = f(v, v, m, m) # Since we used inplace operation v and m may be corrupted # so we need to recreate them v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray( [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ], dtype="float64", ) m_copy = m.copy() v_copy = v.copy() vals = f(v, v_copy, m, m_copy) assert np.allclose(vals, bogus_vals)
def _build_conditional(self, Xnew, X, f): Kxx = self.cov_func(X) Kxs = self.cov_func(X, Xnew) Kss = self.cov_func(Xnew) L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) cov = Kss - at.dot(at.transpose(A), A) v = solve_lower(L, f - self.mean_func(X)) mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v) beta = at.dot(v, v) nu2 = self.nu + X.shape[0] covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT
def square_dist(self, X, Xs): X = at.mul(X, 1.0 / self.ls) X2 = at.sum(at.square(X), 1) if Xs is None: sqd = -2.0 * at.dot(X, at.transpose(X)) + (at.reshape(X2, (-1, 1)) + at.reshape(X2, (1, -1))) else: Xs = at.mul(Xs, 1.0 / self.ls) Xs2 = at.sum(at.square(Xs), 1) sqd = -2.0 * at.dot(X, at.transpose(Xs)) + ( at.reshape(X2, (-1, 1)) + at.reshape(Xs2, (1, -1))) return at.clip(sqd, 0.0, np.inf)
def check_rop_lop(self, y, out_shape): """ As check_mat_rop_lop, except the input is self.x which is a vector. The output is still a vector. """ # TEST ROP vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) yv = tensor.Rop(y, self.x, self.v) rop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: tensor.grad(y[i], x), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = tensor.dot(J, self.v) scan_f = function([self.x, self.v], sy, on_unused_input="ignore") v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), "ROP mismatch: {} {}".format(v1, v2) try: tensor.Rop( aesara.clone(y, replace={self.x: break_op(self.x)}), self.x, self.v ) except ValueError: pytest.skip( "Rop does not handle non-differentiable inputs " "correctly. Bug exposed by fixing Add.grad method." ) vx = np.asarray(self.rng.uniform(size=self.in_shape), aesara.config.floatX) vv = np.asarray(self.rng.uniform(size=out_shape), aesara.config.floatX) yv = tensor.Lop(y, self.x, self.v) lop_f = function([self.x, self.v], yv, on_unused_input="ignore") J, _ = aesara.scan( lambda i, y, x: tensor.grad(y[i], x), sequences=tensor.arange(y.shape[0]), non_sequences=[y, self.x], ) sy = tensor.dot(self.v, J) scan_f = function([self.x, self.v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert np.allclose(v1, v2), "LOP mismatch: {} {}".format(v1, v2)
def test_constant(self): x = tt.constant(np.random.rand(2, 3), dtype=config.floatX) y = aesara.shared(np.random.rand(3, 6).astype(config.floatX), "y") # should work z = tt.dot(x, y) assert hasattr(z.tag, "test_value") f = aesara.function([], z) assert _allclose(f(), z.tag.test_value) # this test should fail x = tt.constant(np.random.rand(2, 4), dtype=config.floatX) with pytest.raises(ValueError): tt.dot(x, y)
def test_shared(self): x = tt.matrix("x") x.tag.test_value = np.random.rand(3, 4).astype(config.floatX) y = aesara.shared(np.random.rand(4, 6).astype(config.floatX), "y") # should work z = tt.dot(x, y) assert hasattr(z.tag, "test_value") f = aesara.function([x], z) assert _allclose(f(x.tag.test_value), z.tag.test_value) # this test should fail y.set_value(np.random.rand(5, 6).astype(config.floatX)) with pytest.raises(ValueError): tt.dot(x, y)
def dlogp(inputs, gradients): (g_logp, ) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = aet.switch(ok, g_cov, -np.nan) g_delta = aet.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def __init__(self, input, n_in, n_out, name_prefix=""): """Initialize the parameters of the logistic regression :type input: aesara.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = aesara.shared( value=np.zeros((n_in, n_out), dtype=aesara.config.floatX), name=name_prefix + "W", ) # compute vector of class-membership probabilities in symbolic form self.p_y_given_x = tt.nnet.softmax(tt.dot(input, self.W)) # compute prediction as class whose probability is maximal in # symbolic form self.y_pred = tt.argmax(self.p_y_given_x, axis=1) # parameters of the model self.params = [self.W]
def test_dot_not_output(self): # Test the case where the vector input to the dot is not already an # output of the inner function. v = tt.vector() m = tt.matrix() output = tt.dot(v, m) # Compile the function twice, once with the optimization and once # without opt_mode = mode.including("scan") f_opt = aesara.function([v, m], tt.jacobian(output, v), mode=opt_mode) no_opt_mode = mode.excluding("scanOp_pushout_output") f_no_opt = aesara.function([v, m], tt.jacobian(output, v), mode=no_opt_mode) # Ensure that the optimization was performed correctly in f_opt # The inner function of scan should have only one output and it should # not be the result of a Dot scan_node = [ node for node in f_opt.maker.fgraph.toposort() if isinstance(node.op, Scan) ][0] assert len(scan_node.op.outputs) == 1 assert not isinstance(scan_node.op.outputs[0], tt.Dot) # Ensure that the function compiled with the optimization produces # the same results as the function compiled without v_value = np.random.random(4).astype(config.floatX) m_value = np.random.random((4, 5)).astype(config.floatX) output_opt = f_opt(v_value, m_value) output_no_opt = f_no_opt(v_value, m_value) utt.assert_allclose(output_opt, output_no_opt)
def __call__(self, X): XY = X.dot(X.T) x2 = at.sum(X**2, axis=1).dimshuffle(0, "x") X2e = at.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2.0 * XY V = at.sort(H.flatten()) length = V.shape[0] # median distance m = at.switch( at.eq((length % 2), 0), # if even vector at.mean(V[((length // 2) - 1):((length // 2) + 1)]), # if odd vector V[length // 2], ) h = 0.5 * m / at.log(floatX(H.shape[0]) + floatX(1)) # RBF Kxy = at.exp(-H / h / 2.0) # Derivative dxkxy = -at.dot(Kxy, X) sumkxy = at.sum(Kxy, axis=-1, keepdims=True) dxkxy = at.add(dxkxy, at.mul(X, sumkxy)) / h return Kxy, dxkxy
def spectral_radius_bound(X, log2_exponent): """ Returns upper bound on the largest eigenvalue of square symmetrix matrix X. log2_exponent must be a positive-valued integer. The larger it is, the slower and tighter the bound. Values up to 5 should usually suffice. The algorithm works by multiplying X by itself this many times. From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22. Rq: an efficient algorithm, not used here, is defined in this paper. """ if X.type.ndim != 2: raise TypeError("spectral_radius_bound requires a matrix argument", X) if not isinstance(log2_exponent, int): raise TypeError("spectral_radius_bound requires an integer exponent", log2_exponent) if log2_exponent <= 0: raise ValueError( "spectral_radius_bound requires a strictly positive " "exponent", log2_exponent, ) XX = X for i in range(log2_exponent): XX = tensor.dot(XX, XX) return tensor.pow(trace(XX), 2**(-log2_exponent))
def test_multiple_inplace(self): skip_if_blas_ldflags_empty() x = dmatrix("x") y = dvector("y") z = dvector("z") f = aesara.function([x, y, z], [at.dot(y, x), at.dot(z, x)], mode=mode_blas_opt) vx = np.random.random((3, 3)) vy = np.random.random((3)) vz = np.random.random((3)) out = f(vx, vy, vz) assert np.allclose(out[0], np.dot(vy, vx)) assert np.allclose(out[1], np.dot(vz, vx)) assert ( len([n for n in f.maker.fgraph.apply_nodes if isinstance(n.op, AllocEmpty)]) == 2 )
def func(chol_vec, delta): chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) return MvNormalLogp()(cov, delta)
def _build_conditional(self, Xnew): Xs, f = self.Xs, self.f X = cartesian(*Xs) delta = f - self.mean_func(X) covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)] chols = [cholesky(cov) for cov in covs] cholTs = [at.transpose(chol) for chol in chols] Kss = self.cov_func(Xnew) Kxs = self.cov_func(X, Xnew) Ksx = at.transpose(Kxs) alpha = kron_solve_lower(chols, delta) alpha = kron_solve_upper(cholTs, alpha) mu = at.dot(Ksx, alpha).ravel() + self.mean_func(Xnew) A = kron_solve_lower(chols, Kxs) cov = stabilize(Kss - at.dot(at.transpose(A), A)) return mu, cov
def test_partial_input_aliasing_affecting_inplace_operations(self): # Note: to trigger this bug with aesara rev 4586:2bc6fc7f218b, # you need to make in inputs mutable ( so that inplace # operations are used) and to break the elemwise composition # with some non-elemwise op ( here dot ) x = dvector() y = dvector() z = dvector() m1 = dmatrix() m2 = dmatrix() m3 = dmatrix() # Test 2. If variables only partial overlap # more exactly we care about the case when we have a,b,c # and a shares memory with b, b shares memory with c, but # c does not share memory with a f = aesara.function( [ In(x, mutable=True), In(y, mutable=True), In(z, mutable=True), In(m1, mutable=True), In(m2, mutable=True), In(m3, mutable=True), ], (aet.dot((x * 2), m1) + aet.dot((y * 3), m2) + aet.dot( (z * 4), m3)), ) # Compute bogus values v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray([[1, 0], [0, 1]], dtype="float64") bogus_vals = f(v[:2], v[1:3], v[2:4], m, m, m) # Since we used inplace operation v and m may be corrupted # so we need to recreate them v = np.asarray([1, 2, 3, 4, 5], dtype="float64") m = np.asarray([[1, 0], [0, 1]], dtype="float64") m_copy1 = m.copy() v_copy1 = v.copy() m_copy2 = m.copy() v_copy2 = v.copy() vals = f(v[:2], v_copy1[1:3], v_copy2[2:4], m, m_copy1, m_copy2) assert np.allclose(vals, bogus_vals)
def test_variable_only(self): x = tt.matrix("x") x.tag.test_value = np.random.rand(3, 4).astype(config.floatX) y = tt.matrix("y") y.tag.test_value = np.random.rand(4, 5).astype(config.floatX) # should work z = tt.dot(x, y) assert hasattr(z.tag, "test_value") f = aesara.function([x, y], z) assert _allclose(f(x.tag.test_value, y.tag.test_value), z.tag.test_value) # this test should fail y.tag.test_value = np.random.rand(6, 5).astype(config.floatX) with pytest.raises(ValueError): tt.dot(x, y)
def test_Rop_dot_bug_18Oct2013_Jeremiah(self): # This test refers to a bug reported by Jeremiah Lowin on 18th Oct # 2013. The bug consists when through a dot operation there is only # one differentiable path (i.e. there is no gradient wrt to one of # the inputs). x = tensor.arange(20.0).reshape([1, 20]) v = aesara.shared(np.ones([20])) d = tensor.dot(x, v).sum() tensor.Rop(tensor.grad(d, v), v, v)
def test_dependence(): dependence = make_dependence_cmp() x = tensor.matrix("x") y = tensor.dot(x * 2, x + 1) nodes = io_toposort([x], [y]) for a, b in zip(nodes[:-1], nodes[1:]): assert dependence(a, b) <= 0
def test_gemv_dot_strides(): # Reported in https://github.com/Aesara/Aesara/issues/6142 xv = rand(5) yv = rand(5, 1) x = gpuarray_shared_constructor(xv) y = gpuarray_shared_constructor(yv, broadcastable=(False, True)) f = aesara.function([], tensor.dot(x, y[::-1]), mode=mode_with_gpu) out = f() utt.assert_allclose(out, np.dot(xv, yv[::-1]))
def test_kanren_basic(): A_at = at.matrix("A") x_at = at.vector("x") y_at = at.dot(A_at, x_at) q = var() res = list(run(None, q, eq(y_at, etuple(_dot, q, x_at)))) assert res == [A_at]
def est_both_assert_merge_1(self): # Merge two nodes, both have assert on the same node # with different conditions. x1 = tt.matrix("x1") x2 = tt.matrix("x2") x3 = tt.matrix("x3") e = tt.dot(tt.opt.assert_op(x1, (x1 > x3).all()), x2) + tt.dot( tt.opt.assert_op(x1, (x1 > x2).all()), x2) g = FunctionGraph([x1, x2, x3], [e]) MergeOptimizer().optimize(g) strg = aesara.printing.debugprint(g, file="str") strref1 = """Elemwise{add,no_inplace} [id A] '' 6 |dot [id B] '' 5 | |Assert{msg='Aesara Assert failed!'} [id C] '' 4 | | |x1 [id D] | | |All [id E] '' 3 | | | |Elemwise{gt,no_inplace} [id F] '' 1 | | | |x1 [id D] | | | |x3 [id G] | | |All [id H] '' 2 | | |Elemwise{gt,no_inplace} [id I] '' 0 | | |x1 [id D] | | |x2 [id J] | |x2 [id J] |dot [id B] '' 5 """ strref2 = """Elemwise{add,no_inplace} [id A] '' 6 |dot [id B] '' 5 | |Assert{msg='Aesara Assert failed!'} [id C] '' 4 | | |x1 [id D] | | |All [id E] '' 3 | | | |Elemwise{gt,no_inplace} [id F] '' 1 | | | |x1 [id D] | | | |x2 [id G] | | |All [id H] '' 2 | | |Elemwise{gt,no_inplace} [id I] '' 0 | | |x1 [id D] | | |x3 [id J] | |x2 [id G] |dot [id B] '' 5 """ # print(strg) assert strg == strref1 or strg == strref2, (strg, strref1, strref2)
def logp(self, states): r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain. This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional on state samples, :math:`s_t`, given by the following: .. math:: \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so another way to express this result is as follows: .. math:: P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) """ # noqa: E501 states_tt = at.as_tensor(states) if states.ndim > 1 or self.Gammas.ndim > 3 or self.gamma_0.ndim > 1: raise NotImplementedError("Broadcasting not supported.") Gammas_tt = at_broadcast_to(self.Gammas, (states.shape[0], ) + tuple(self.Gammas.shape)[-2:]) gamma_0_tt = self.gamma_0 Gamma_1_tt = Gammas_tt[0] P_S_1_tt = at.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]] # def S_logp_fn(S_tm1, S_t, Gamma): # return at.log(Gamma[..., S_tm1, S_t]) # # P_S_2T_tt, _ = aesara.scan( # S_logp_fn, # sequences=[ # { # "input": states_tt, # "taps": [-1, 0], # }, # Gammas_tt, # ], # ) P_S_2T_tt = Gammas_tt[at.arange(1, states.shape[0]), states[:-1], states[1:]] log_P_S_1T_tt = at.concatenate( [at.shape_padright(at.log(P_S_1_tt)), at.log(P_S_2T_tt)]) res = log_P_S_1T_tt.sum() res.name = "states_logp" return res