def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): sigma2 = at.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) Lamd = at.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": Lamd = at.ones_like(Qffd) * sigma2 trace = (1.0 / (2.0 * sigma2)) * (at.sum(self.cov_func(X, diag=True)) - at.sum(at.sum(A * A, 0))) else: # DTC Lamd = at.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd L_B = cholesky(at.eye(Xu.shape[0]) + at.dot(A_l, at.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd c = solve_lower(L_B, at.dot(A, r_l)) constant = 0.5 * X.shape[0] * at.log(2.0 * np.pi) logdet = 0.5 * at.sum(at.log(Lamd)) + at.sum(at.log(at.diag(L_B))) quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace)
def test_gpujoin_gpualloc(): a = tt.fmatrix("a") a_val = np.asarray(np.random.rand(4, 5), dtype="float32") b = tt.fmatrix("b") b_val = np.asarray(np.random.rand(3, 5), dtype="float32") f = aesara.function( [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_without_gpu ) f_gpu = aesara.function( [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)), mode=mode_with_gpu ) f_gpu2 = aesara.function( [a, b], tt.join(0, tt.zeros_like(a), tt.ones_like(b)) + 4, mode=mode_with_gpu ) assert sum([node.op == tt.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == tt.join_ for node in f.maker.fgraph.toposort()]) == 1 assert ( sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) == 2 ) assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1 assert ( sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()]) == 2 ) assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1 assert np.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): sigma2 = at.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": Kffd = cov_total(X, diag=True) Lamd = at.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 else: # VFE or DTC Lamd = at.ones_like(Qffd) * sigma2 A_l = A / Lamd L_B = cholesky(at.eye(Xu.shape[0]) + at.dot(A_l, at.transpose(A))) r = y - mean_total(X) r_l = r / Lamd c = solve_lower(L_B, at.dot(A, r_l)) Kus = self.cov_func(Xu, Xnew) As = solve_lower(Luu, Kus) mu = self.mean_func(Xnew) + at.dot(at.transpose(As), solve_upper(at.transpose(L_B), c)) C = solve_lower(L_B, As) if diag: Kss = self.cov_func(Xnew, diag=True) var = Kss - at.sum(at.square(As), 0) + at.sum(at.square(C), 0) if pred_noise: var += sigma2 return mu, var else: cov = self.cov_func(Xnew) - at.dot(at.transpose(As), As) + at.dot( at.transpose(C), C) if pred_noise: cov += sigma2 * at.identity_like(cov) return mu, cov if pred_noise else stabilize(cov)
def setup_method(self): self.k = iscalar("k") self.A = vector("A") result, _ = scan( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(self.A), non_sequences=self.A, n_steps=self.k, ) result_check, _ = scan_checkpoints( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(self.A), non_sequences=self.A, n_steps=self.k, save_every_N=100, ) self.result = result[-1] self.result_check = result_check[-1] self.grad_A = aesara.grad(self.result.sum(), self.A) self.grad_A_check = aesara.grad(self.result_check.sum(), self.A)
def compute_A_k(A, k): result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=at.ones_like(A), non_sequences=A, n_steps=k, ) A_k = result[-1] return A_k
def test_scan_debugprint1(): k = iscalar("k") A = dvector("A") # Symbolic description of the result result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(A), non_sequences=A, n_steps=k, ) final_result = result[-1] output_str = debugprint(final_result, file="str") lines = output_str.split("\n") expected_output = """Subtensor{int64} [id A] '' |Subtensor{int64::} [id B] '' | |for{cpu,scan_fn} [id C] '' | | |k [id D] | | |IncSubtensor{Set;:int64:} [id E] '' | | | |AllocEmpty{dtype='float64'} [id F] '' | | | | |Elemwise{add,no_inplace} [id G] '' | | | | | |k [id D] | | | | | |Subtensor{int64} [id H] '' | | | | | |Shape [id I] '' | | | | | | |Rebroadcast{(0, False)} [id J] '' | | | | | | |InplaceDimShuffle{x,0} [id K] '' | | | | | | |Elemwise{second,no_inplace} [id L] '' | | | | | | |A [id M] | | | | | | |InplaceDimShuffle{x} [id N] '' | | | | | | |TensorConstant{1.0} [id O] | | | | | |ScalarConstant{0} [id P] | | | | |Subtensor{int64} [id Q] '' | | | | |Shape [id R] '' | | | | | |Rebroadcast{(0, False)} [id J] '' | | | | |ScalarConstant{1} [id S] | | | |Rebroadcast{(0, False)} [id J] '' | | | |ScalarFromTensor [id T] '' | | | |Subtensor{int64} [id H] '' | | |A [id M] | |ScalarConstant{1} [id U] |ScalarConstant{-1} [id V] Inner graphs: for{cpu,scan_fn} [id C] '' >Elemwise{mul,no_inplace} [id W] '' > |<TensorType(float64, vector)> [id X] -> [id E] > |A_copy [id Y] -> [id M]""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
def compute_A_k(A, k): # Symbolic description of the result result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(A), non_sequences=A, n_steps=k, ) A_k = result[-1] return A_k
def jacobian_det(self, rv_var, rv_value): if rv_var.broadcastable[-1]: # If this variable is just a bunch of scalars/degenerate # Dirichlets, we can't transform it return at.ones_like(rv_value) y = rv_value.T Km1 = y.shape[0] + 1 sy = at.sum(y, 0, keepdims=True) r = at.concatenate([y + sy, at.zeros(sy.shape)]) sr = logsumexp(r, 0, keepdims=True) d = at.log(Km1) + (Km1 * sy) - (Km1 * sr) return at.sum(d, 0).T
def test_scan_err2(self): # This test should not fail when building fx for the first time, # but when calling the scan's perform() k = tt.iscalar("k") A = tt.matrix("A") k.tag.test_value = 3 A.tag.test_value = np.random.rand(5, 3).astype(config.floatX) def fx(prior_result, A): return tt.dot(prior_result, A) with pytest.raises(ValueError): aesara.scan(fn=fx, outputs_info=tt.ones_like(A.T), non_sequences=A, n_steps=k) with pytest.raises(ValueError, match="^could not broadcast input"): aesara.scan(fn=fx, outputs_info=tt.ones_like(A.T), non_sequences=A, n_steps=k)
def test_scan_err1(self): # This test should fail when building fx for the first time k = tt.iscalar("k") A = tt.matrix("A") k.tag.test_value = 3 A.tag.test_value = np.random.rand(5, 3).astype(config.floatX) def fx(prior_result, A): return tt.dot(prior_result, A) with pytest.raises(ValueError) as e: aesara.scan(fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k) assert str(e.traceback[0].path).endswith("test_compute_test_value.py") # We should be in the "fx" function defined above assert e.traceback[2].name == "fx"
def test_scan(self): # Test the compute_test_value mechanism Scan. k = tt.iscalar("k") A = tt.vector("A") k.tag.test_value = 3 A.tag.test_value = np.random.rand(5).astype(config.floatX) def fx(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = aesara.scan(fn=fx, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k) # We only care about A**k, but scan has provided us with A**1 through A**k. # Discard the values that we don't care about. Scan is smart enough to # notice this and not waste memory saving them. final_result = result[-1] assert hasattr(final_result.tag, "test_value")
# 1. First example aesara.config.warn.subtensor_merge_bug = False k = tt.iscalar("k") A = tt.vector("A") def inner_fct(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = aesara.scan(fn=inner_fct, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k) # Scan has provided us with A ** 1 through A ** k. Keep only the last # value. Scan notices this and does not waste memory saving them. final_result = result[-1] power = aesara.function(inputs=[A, k], outputs=final_result, updates=updates) print(power(list(range(10)), 2)) # [ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.] # 2. Second example coefficients = tt.vector("coefficients")
def test_scan_debugprint5(): k = iscalar("k") A = dvector("A") # Symbolic description of the result result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=aet.ones_like(A), non_sequences=A, n_steps=k, ) final_result = aesara.grad(result[-1].sum(), A) output_str = debugprint(final_result, file="str") lines = output_str.split("\n") expected_output = """Subtensor{int64} [id A] '' |for{cpu,grad_of_scan_fn}.1 [id B] '' | |Elemwise{sub,no_inplace} [id C] '' | | |Subtensor{int64} [id D] '' | | | |Shape [id E] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |k [id G] | | | | |IncSubtensor{Set;:int64:} [id H] '' | | | | | |AllocEmpty{dtype='float64'} [id I] '' | | | | | | |Elemwise{add,no_inplace} [id J] '' | | | | | | | |k [id G] | | | | | | | |Subtensor{int64} [id K] '' | | | | | | | |Shape [id L] '' | | | | | | | | |Rebroadcast{(0, False)} [id M] '' | | | | | | | | |InplaceDimShuffle{x,0} [id N] '' | | | | | | | | |Elemwise{second,no_inplace} [id O] '' | | | | | | | | |A [id P] | | | | | | | | |InplaceDimShuffle{x} [id Q] '' | | | | | | | | |TensorConstant{1.0} [id R] | | | | | | | |ScalarConstant{0} [id S] | | | | | | |Subtensor{int64} [id T] '' | | | | | | |Shape [id U] '' | | | | | | | |Rebroadcast{(0, False)} [id M] '' | | | | | | |ScalarConstant{1} [id V] | | | | | |Rebroadcast{(0, False)} [id M] '' | | | | | |ScalarFromTensor [id W] '' | | | | | |Subtensor{int64} [id K] '' | | | | |A [id P] | | | |ScalarConstant{0} [id X] | | |TensorConstant{1} [id Y] | |Subtensor{:int64:} [id Z] '' | | |Subtensor{::int64} [id BA] '' | | | |Subtensor{:int64:} [id BB] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |ScalarConstant{-1} [id BC] | | | |ScalarConstant{-1} [id BD] | | |ScalarFromTensor [id BE] '' | | |Elemwise{sub,no_inplace} [id C] '' | |Subtensor{:int64:} [id BF] '' | | |Subtensor{:int64:} [id BG] '' | | | |Subtensor{::int64} [id BH] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |ScalarConstant{-1} [id BI] | | | |ScalarConstant{-1} [id BJ] | | |ScalarFromTensor [id BK] '' | | |Elemwise{sub,no_inplace} [id C] '' | |Subtensor{::int64} [id BL] '' | | |IncSubtensor{Inc;int64::} [id BM] '' | | | |Elemwise{second,no_inplace} [id BN] '' | | | | |for{cpu,scan_fn} [id F] '' | | | | |InplaceDimShuffle{x,x} [id BO] '' | | | | |TensorConstant{0.0} [id BP] | | | |IncSubtensor{Inc;int64} [id BQ] '' | | | | |Elemwise{second,no_inplace} [id BR] '' | | | | | |Subtensor{int64::} [id BS] '' | | | | | | |for{cpu,scan_fn} [id F] '' | | | | | | |ScalarConstant{1} [id BT] | | | | | |InplaceDimShuffle{x,x} [id BU] '' | | | | | |TensorConstant{0.0} [id BV] | | | | |Elemwise{second} [id BW] '' | | | | | |Subtensor{int64} [id BX] '' | | | | | | |Subtensor{int64::} [id BS] '' | | | | | | |ScalarConstant{-1} [id BY] | | | | | |InplaceDimShuffle{x} [id BZ] '' | | | | | |Elemwise{second,no_inplace} [id CA] '' | | | | | |Sum{acc_dtype=float64} [id CB] '' | | | | | | |Subtensor{int64} [id BX] '' | | | | | |TensorConstant{1.0} [id CC] | | | | |ScalarConstant{-1} [id BY] | | | |ScalarConstant{1} [id BT] | | |ScalarConstant{-1} [id CD] | |Alloc [id CE] '' | | |TensorConstant{0.0} [id CF] | | |Elemwise{add,no_inplace} [id CG] '' | | | |Elemwise{sub,no_inplace} [id C] '' | | | |TensorConstant{1} [id CH] | | |Subtensor{int64} [id CI] '' | | |Shape [id CJ] '' | | | |A [id P] | | |ScalarConstant{0} [id CK] | |A [id P] |ScalarConstant{-1} [id CL] Inner graphs: for{cpu,grad_of_scan_fn}.1 [id B] '' >Elemwise{add,no_inplace} [id CM] '' > |Elemwise{mul} [id CN] '' > | |<TensorType(float64, vector)> [id CO] -> [id BL] > | |A_copy [id CP] -> [id P] > |<TensorType(float64, vector)> [id CQ] -> [id BL] >Elemwise{add,no_inplace} [id CR] '' > |Elemwise{mul} [id CS] '' > | |<TensorType(float64, vector)> [id CO] -> [id BL] > | |<TensorType(float64, vector)> [id CT] -> [id Z] > |<TensorType(float64, vector)> [id CU] -> [id CE] for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CV] '' > |<TensorType(float64, vector)> [id CT] -> [id H] > |A_copy [id CP] -> [id P] for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CV] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CV] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CV] '' for{cpu,scan_fn} [id F] '' >Elemwise{mul,no_inplace} [id CV] ''""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
import aesara import aesara.tensor as tt k = tt.iscalar("k") A = tt.vector("A") def inner_fct(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = aesara.scan( fn=inner_fct, outputs_info=tt.ones_like(A), non_sequences=A, n_steps=k ) # Scan has provided us with A**1 through A**k. Keep only the last # value. Scan notices this and does not waste memory saving them. final_result = result[-1] power = aesara.function(inputs=[A, k], outputs=final_result, updates=updates) print(power(list(range(10)), 2))
def get_drhodS(salt, temp, p): betaS = 0.78e-3 rho0 = 1024.0 return betaS * rho0 * aet.ones_like(temp)
def test_debugprint_mitmot(): k = iscalar("k") A = dvector("A") # Symbolic description of the result result, updates = aesara.scan( fn=lambda prior_result, A: prior_result * A, outputs_info=at.ones_like(A), non_sequences=A, n_steps=k, ) final_result = aesara.grad(result[-1].sum(), A) output_str = debugprint(final_result, file="str", print_op_info=True) lines = output_str.split("\n") expected_output = """Subtensor{int64} [id A] |for{cpu,grad_of_scan_fn}.1 [id B] (outer_out_sit_sot-0) | |Elemwise{sub,no_inplace} [id C] (n_steps) | | |Subtensor{int64} [id D] | | | |Shape [id E] | | | | |for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) | | | | |k [id G] (n_steps) | | | | |IncSubtensor{Set;:int64:} [id H] (outer_in_sit_sot-0) | | | | | |AllocEmpty{dtype='float64'} [id I] | | | | | | |Elemwise{add,no_inplace} [id J] | | | | | | | |k [id G] | | | | | | | |Subtensor{int64} [id K] | | | | | | | |Shape [id L] | | | | | | | | |Rebroadcast{(0, False)} [id M] | | | | | | | | |InplaceDimShuffle{x,0} [id N] | | | | | | | | |Elemwise{second,no_inplace} [id O] | | | | | | | | |A [id P] | | | | | | | | |InplaceDimShuffle{x} [id Q] | | | | | | | | |TensorConstant{1.0} [id R] | | | | | | | |ScalarConstant{0} [id S] | | | | | | |Subtensor{int64} [id T] | | | | | | |Shape [id U] | | | | | | | |Rebroadcast{(0, False)} [id M] | | | | | | |ScalarConstant{1} [id V] | | | | | |Rebroadcast{(0, False)} [id M] | | | | | |ScalarFromTensor [id W] | | | | | |Subtensor{int64} [id K] | | | | |A [id P] (outer_in_non_seqs-0) | | | |ScalarConstant{0} [id X] | | |TensorConstant{1} [id Y] | |Subtensor{:int64:} [id Z] (outer_in_seqs-0) | | |Subtensor{::int64} [id BA] | | | |Subtensor{:int64:} [id BB] | | | | |for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) | | | | |ScalarConstant{-1} [id BC] | | | |ScalarConstant{-1} [id BD] | | |ScalarFromTensor [id BE] | | |Elemwise{sub,no_inplace} [id C] | |Subtensor{:int64:} [id BF] (outer_in_seqs-1) | | |Subtensor{:int64:} [id BG] | | | |Subtensor{::int64} [id BH] | | | | |for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) | | | | |ScalarConstant{-1} [id BI] | | | |ScalarConstant{-1} [id BJ] | | |ScalarFromTensor [id BK] | | |Elemwise{sub,no_inplace} [id C] | |Subtensor{::int64} [id BL] (outer_in_mit_mot-0) | | |IncSubtensor{Inc;int64::} [id BM] | | | |Elemwise{second,no_inplace} [id BN] | | | | |for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) | | | | |InplaceDimShuffle{x,x} [id BO] | | | | |TensorConstant{0.0} [id BP] | | | |IncSubtensor{Inc;int64} [id BQ] | | | | |Elemwise{second,no_inplace} [id BR] | | | | | |Subtensor{int64::} [id BS] | | | | | | |for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) | | | | | | |ScalarConstant{1} [id BT] | | | | | |InplaceDimShuffle{x,x} [id BU] | | | | | |TensorConstant{0.0} [id BV] | | | | |Elemwise{second} [id BW] | | | | | |Subtensor{int64} [id BX] | | | | | | |Subtensor{int64::} [id BS] | | | | | | |ScalarConstant{-1} [id BY] | | | | | |InplaceDimShuffle{x} [id BZ] | | | | | |Elemwise{second,no_inplace} [id CA] | | | | | |Sum{acc_dtype=float64} [id CB] | | | | | | |Subtensor{int64} [id BX] | | | | | |TensorConstant{1.0} [id CC] | | | | |ScalarConstant{-1} [id BY] | | | |ScalarConstant{1} [id BT] | | |ScalarConstant{-1} [id CD] | |Alloc [id CE] (outer_in_sit_sot-0) | | |TensorConstant{0.0} [id CF] | | |Elemwise{add,no_inplace} [id CG] | | | |Elemwise{sub,no_inplace} [id C] | | | |TensorConstant{1} [id CH] | | |Subtensor{int64} [id CI] | | |Shape [id CJ] | | | |A [id P] | | |ScalarConstant{0} [id CK] | |A [id P] (outer_in_non_seqs-0) |ScalarConstant{-1} [id CL] Inner graphs: for{cpu,grad_of_scan_fn}.1 [id B] (outer_out_sit_sot-0) >Elemwise{add,no_inplace} [id CM] (inner_out_mit_mot-0-0) > |Elemwise{mul} [id CN] > | |*2-<TensorType(float64, (None,))> [id CO] -> [id BL] (inner_in_mit_mot-0-0) > | |*5-<TensorType(float64, (None,))> [id CP] -> [id P] (inner_in_non_seqs-0) > |*3-<TensorType(float64, (None,))> [id CQ] -> [id BL] (inner_in_mit_mot-0-1) >Elemwise{add,no_inplace} [id CR] (inner_out_sit_sot-0) > |Elemwise{mul} [id CS] > | |*2-<TensorType(float64, (None,))> [id CO] -> [id BL] (inner_in_mit_mot-0-0) > | |*0-<TensorType(float64, (None,))> [id CT] -> [id Z] (inner_in_seqs-0) > |*4-<TensorType(float64, (None,))> [id CU] -> [id CE] (inner_in_sit_sot-0) for{cpu,scan_fn} [id F] (outer_out_sit_sot-0) >Elemwise{mul,no_inplace} [id CV] (inner_out_sit_sot-0) > |*0-<TensorType(float64, (None,))> [id CT] -> [id H] (inner_in_sit_sot-0) > |*1-<TensorType(float64, (None,))> [id CW] -> [id P] (inner_in_non_seqs-0)""" for truth, out in zip(expected_output.split("\n"), lines): assert truth.strip() == out.strip()
# 1. First example aesara.config.warn__subtensor_merge_bug = False k = aet.iscalar("k") A = aet.vector("A") def inner_fct(prior_result, A): return prior_result * A # Symbolic description of the result result, updates = aesara.scan(fn=inner_fct, outputs_info=aet.ones_like(A), non_sequences=A, n_steps=k) # Scan has provided us with A ** 1 through A ** k. Keep only the last # value. Scan notices this and does not waste memory saving them. final_result = result[-1] power = aesara.function(inputs=[A, k], outputs=final_result, updates=updates) print(power(list(range(10)), 2)) # [ 0. 1. 4. 9. 16. 25. 36. 49. 64. 81.] # 2. Second example