def test_einsum(): for datatype in backends: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) matmul = ad.einsum('ik,kj->ij', x2, x3) y = ad.sum(matmul) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_grad_sum = T.ones_like(T.dot(x2_val, x3_val)) expected_yval = T.sum(T.dot(x2_val, x3_val)) expected_grad_x2_val = T.dot(expected_grad_sum, T.transpose(x3_val)) expected_grad_x3_val = T.dot(T.transpose(x2_val), expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_inner_product_hvp(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum(ad.einsum("ab,bc->ac", ad.transpose(x), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, v: v_val }) expected_yval = T.sum(T.dot(T.transpose(x_val), x_val)) expected_grad_x_val = 2 * x_val expected_hv_val = 2 * v_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def test_jtjvps(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum('ab, b->a', A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) executor = ad.Executor([y, jtjvp_x]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3., 4.]) y_val, jtjvp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_jtjvp_x_val = T.einsum('ba, ac->bc', T.transpose(A_val), A_val) expected_jtjvp_x_val = T.einsum('ab, b->a', expected_jtjvp_x_val, v_val) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(jtjvp_x_val, expected_jtjvp_x_val)
def test_hvp2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum( ad.einsum("ab,bc->ac", ad.einsum("ab,bc->ac", ad.transpose(x), H), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, H: H_val, v: v_val }) Hx = T.dot(H_val, x_val) expected_yval = T.sum(T.dot(T.transpose(x_val), Hx)) expected_grad_x_val = 2 * Hx expected_hv_val = T.tensor([[4.], [8.], [12.]]) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def test_cpd_shared_exec(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) input_val = init_rand_cp(dim, size, rank) A_list, input_tensor_val = input_val A_val, B_val, C_val = A_list outputs = cpd_als_shared_exec(dim, size, rank, 1, input_val) # expected values A_val = T.einsum( "abc,bk,ck->ak", input_tensor_val, B_val, C_val) @ T.inv( (T.transpose(B_val) @ B_val) * (T.transpose(C_val) @ C_val)) B_val = T.einsum( "abc,ak,ck->bk", input_tensor_val, A_val, C_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(C_val) @ C_val)) C_val = T.einsum( "abc,ak,bk->ck", input_tensor_val, A_val, B_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(B_val) @ B_val)) assert T.norm(outputs[0] - A_val) < 1e-8 assert T.norm(outputs[1] - B_val) < 1e-8 assert T.norm(outputs[2] - C_val) < 1e-8
def step(self, hess_fn, grads, regularization): A = self.A self.gamma = [] self.gamma.append( (T.transpose(A[1]) @ A[1]) * (T.transpose(A[2]) @ A[2])) self.gamma.append( (T.transpose(A[0]) @ A[0]) * (T.transpose(A[2]) @ A[2])) self.gamma.append( (T.transpose(A[0]) @ A[0]) * (T.transpose(A[1]) @ A[1])) P = self.compute_block_diag_preconditioner(regularization) delta, counter = self.fast_precond_conjugate_gradient( hess_fn, grads, P, regularization) self.total_iters += counter self.atol = self.num * group_vecnorm(delta) print(f"cg iterations: {counter}") print(f"total cg iterations: {self.total_iters}") print(f"total cg time: {self.total_cg_time}") self.A[0] += delta[0] self.A[1] += delta[1] self.A[2] += delta[2] return self.A, self.total_cg_time
def test_executor_debug_symmetry(backendopt): for datatype in backendopt: if datatype == "taco": # Taco addition (line 76) will output sparse matrix even though the input is dense. # This will make the format check fail. continue T.set_backend(datatype) A = ad.Variable(name="A", shape=[3, 3], symmetry=[[0, 1]]) out = ad.einsum("ab,bc->ac", A, A) A_val = T.random((3, 3)) A_val += T.transpose(A_val) executor = ad.Executor([out]) executor.run(feed_dict={A: A_val}, debug=True)
def test_hessian_quadratic(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) H = ad.Variable(name="H", shape=[3, 3]) y = ad.einsum("i,ij,j->", x, H, x) hessian = ad.hessian(y, [x]) executor = ad.Executor([hessian[0][0]]) x_val = T.random([3]) H_val = T.random((3, 3)) hessian_val, = executor.run(feed_dict={x: x_val, H: H_val}) assert T.array_equal(hessian_val, H_val + T.transpose(H_val))
def test_HinverseG(backendopt): for datatype in backendopt: T.set_backend(datatype) N = 10 T.seed(1224) A = T.random([N, N]) A = T.transpose(A) @ A A = A + T.identity(N) b = T.random([N]) def hess_fn(x): return [T.einsum("ab,b->a", A, x[0])] error_tol = 1e-9 x, = conjugate_gradient(hess_fn, [b], error_tol) assert (T.norm(T.abs(T.einsum("ab,b->a", A, x) - b)) <= 1e-4)
def test_transpose_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 2]) y = ad.sum(ad.einsum("ij->ji", x)) grad_x, = ad.gradients(y, [x]) executor = ad.Executor([y, grad_x]) x_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 y_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_yval = T.sum(T.transpose(x_val)) expected_grad_x_val = T.ones_like(x_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val)
def dmrg_local_update(intermediate, eigvec, max_mps_rank): """ Perform local update for DMRG. Parameters ---------- intermediate: the input einsum node. Its inputs are two mps sites. eigvec: the eigenvector to get the low rank decomposition. max_mps_rank: maximum mps tensor rank. """ # parse intermediate strings inputs = intermediate.inputs assert len(inputs) == 2 # Here input names are formatted as A{i}. index_input_0 = int(inputs[0].name[1:]) index_input_1 = int(inputs[1].name[1:]) in_subs, out_subs, _ = parse_einsum_input( (intermediate.einsum_subscripts, *intermediate.inputs)) if index_input_0 > index_input_1: # right site appers first right_subs, left_subs = in_subs.split(',') else: left_subs, right_subs = in_subs.split(',') map_subs_indices = dict(zip(out_subs, list(range(len(intermediate.shape))))) contract_char, = list(set(left_subs) - set(out_subs)) left_uncontract_chars = list(set(left_subs) - set(contract_char)) right_uncontract_chars = list(set(right_subs) - set(contract_char)) left_indices = [map_subs_indices[char] for char in left_uncontract_chars] right_indices = [map_subs_indices[char] for char in right_uncontract_chars] left_uncontract_str = "".join(left_uncontract_chars) right_uncontract_str = "".join(right_uncontract_chars) ############################################################# # svd decomposition to get updated sites eigvec_shape = intermediate.shape eigvec_mat = T.transpose(eigvec, left_indices + right_indices) eigvec_mat = T.reshape(eigvec_mat, (np.prod([eigvec_shape[i] for i in left_indices]), -1)) U, s, VT = T.svd(eigvec_mat) rank = min([max_mps_rank, eigvec_mat.shape[0], eigvec_mat.shape[1]]) U, s, VT = U[:, :rank], s[:rank], VT[:rank, :] VT = T.diag(s) @ VT U = T.reshape(U, [eigvec_shape[i] for i in left_indices] + [rank]) VT = T.reshape(VT, ([rank] + [eigvec_shape[i] for i in right_indices])) left = T.einsum(f"{left_uncontract_str}{contract_char}->{left_subs}", U) right = T.einsum(f"{contract_char}{right_uncontract_str}->{right_subs}", VT) return left, right