def test_s2s_hvp(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum("a,ab,b->", x, H, x) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) x_val = T.tensor([1., 2., 3.]) # 3 v_val = T.tensor([1., 2., 3.]) # 3 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 expected_yval = T.einsum("a,ab,b->", x_val, H_val, x_val) expected_grad_x_val = 2 * T.einsum("ab,b->a", H_val, x_val) expected_hv_val = T.tensor([4., 8., 12.]) StS = SourceToSource() forward_str = StS.forward([y], backend=datatype) m = import_code(forward_str) y_val_s2s, = m.forward([x_val, H_val]) grad_str = StS.gradients(y, [x], backend=datatype) m = import_code(grad_str) grad_x_val_s2s, = m.gradients([x_val, H_val]) hvp_str = StS.hvp(y, [x], [v], backend=datatype) m = import_code(hvp_str) Hv_val_s2s, = m.hvp([x_val, H_val, v_val]) assert isinstance(y, ad.Node) assert T.array_equal(y_val_s2s, expected_yval) assert T.array_equal(grad_x_val_s2s, expected_grad_x_val) assert T.array_equal(Hv_val_s2s, expected_hv_val)
def __init__(self, dim, size, rank): cg = CharacterGetter() self.X = ad.Variable(name='X', shape=[size for _ in range(dim)]) X_subscripts = "".join([cg.getchar() for _ in range(dim)]) self.core = ad.Variable(name='core', shape=[rank for _ in range(dim)]) core_subscripts = "".join([cg.getchar() for _ in range(dim)]) self.A_list = [] A_list_subscripts = [] for i in range(dim): node = ad.Matrix(name=f'A{i}', shape=[size, rank], orthonormal='row') self.A_list.append(node) A_list_subscripts.append(f"{X_subscripts[i]}{core_subscripts[i]}") input_subs = ','.join([ subscripts for subscripts in A_list_subscripts + [core_subscripts] ]) self.einsum_subscripts = input_subs + '->' + X_subscripts self.output = ad.einsum(self.einsum_subscripts, *(self.A_list + [self.core])) self.residual = self.output - self.X self.intermediates, self.losses = [], [] for i in range(dim): intermediate, loss = self._build_graph_w_intermediate(i) self.intermediates.append(intermediate) self.losses.append(loss)
def test_get_transpose_indices_dup(): a = ad.Variable(name='a', shape=[2, 2]) h = ad.Variable(name='h', shape=[2, 2, 2]) out1 = ad.einsum("ad,bc,ecd->abe", a, a, h) out2 = ad.einsum("ac,bd,ecd->eab", a, a, h) trans = get_transpose_indices(out1, out2) assert trans == [2, 0, 1] or trans == [2, 1, 0]
def test_einsum_multiuse_auto_copy(backendopt): """ Test autolinearization and auto fuse. A B inputs |\ | | \ | | \ | | C | / | / output Next: we would need to autoprune. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a, c) linearize(output) all_nodes = find_topo_sort([output]) cloned_nodes = [ tmp for tmp in all_nodes if isinstance(tmp, ad.CloneNode) ] out_new = fuse_einsums(output, [*cloned_nodes, b]) # Test that every inputs is now fused. assert all([not isinstance(x, ad.EinsumNode) for x in out_new.inputs]) assert tree_eq(output, out_new, [*cloned_nodes, b])
def test_get_common_ancestor(backendopt): A = ad.Variable(name="A", shape=[3, 2]) X1 = ad.Variable(name="X1", shape=[3, 2, 2]) X2 = ad.Variable(name="X2", shape=[3, 3, 2, 2]) X3 = ad.Variable(name="X3", shape=[3, 2, 2]) """ The network and indices positions are as follows: g - A | c d e | | | X1 - a - X2 - b - X3 | | | h i j | l - A """ einsum_node = ad.einsum('lj,ge,bej,abdi,ach->cdhigl', A, A, X3, X2, X1) opt_einsum = generate_optimal_tree(einsum_node) sub_einsum = get_common_ancestor(opt_einsum, einsum_node.inputs, A) assert sorted(get_all_inputs(sub_einsum), key=lambda node: node.name) == sorted( [A, A, X3], key=lambda node: node.name)
def test_mul_jacobian_one_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[2, 2]) # test both cases of left and right multiply a scalar for y in [x1 * x2, x2 * x1]: jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor(2.) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val) expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_add_jacobian_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor(1.) x2_val = T.tensor(1.) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) expected_jacobian_x2_val = T.tensor(1.) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_tree_distribution_two_layers(dist_op, backendopt): """ [Distributive] ((A + B) * G) * C will produce AGC + BGC Note that (A+B) * G is contracted first. """ for datatype in backendopt: if datatype == "taco": # '..,kk,..->..' is not supported in taco continue T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 2]) b = ad.Variable(name="b", shape=[3, 2]) g = ad.Variable(name="g", shape=[2, 2]) c = ad.Variable(name="c", shape=[2, 3]) interm = ad.einsum('ik, kk->ik', dist_op(a, b), g) output = ad.einsum('ik,kj->ij', interm, c) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) assert tree_eq(output, new_output, [a, b, c, g])
def test_tree_distribution_ppE(dist_op, backendopt): """ [Distributive] ((A + B) + C) * G will produce AG + BG + CG Note that (A+B) has parent (A + B) + C. """ for datatype in backendopt: if datatype == "taco": # '..,kk,..->..' is not supported in taco continue T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 2]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[3, 2]) g = ad.Variable(name="g", shape=[2, 2]) output = ad.einsum('ik,kk->ik', dist_op(dist_op(a, b), c), g) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) assert tree_eq(output, new_output, [a, b, c, g])
def test_einsum(): for datatype in backends: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) matmul = ad.einsum('ik,kj->ij', x2, x3) y = ad.sum(matmul) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_grad_sum = T.ones_like(T.dot(x2_val, x3_val)) expected_yval = T.sum(T.dot(x2_val, x3_val)) expected_grad_x2_val = T.dot(expected_grad_sum, T.transpose(x3_val)) expected_grad_x3_val = T.dot(T.transpose(x2_val), expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_vjps(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum('ab, b->a', A, x) transposed_vjp_x, = ad.transposed_vjps(y, [x], v) executor = ad.Executor([y, transposed_vjp_x]) x_val = T.tensor([1., 2.]) # 1x3 A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([1., 2., 3.]) y_val, transposed_vjp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_transposed_vjp_x_val = T.einsum('b, ba->a', v_val, A_val) assert isinstance(transposed_vjp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(transposed_vjp_x_val, expected_transposed_vjp_x_val)
def test_inner_product_hvp(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum(ad.einsum("ab,bc->ac", ad.transpose(x), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, v: v_val }) expected_yval = T.sum(T.dot(T.transpose(x_val), x_val)) expected_grad_x_val = 2 * x_val expected_hv_val = 2 * v_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def test_jtjvps(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum('ab, b->a', A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) executor = ad.Executor([y, jtjvp_x]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3., 4.]) y_val, jtjvp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_jtjvp_x_val = T.einsum('ba, ac->bc', T.transpose(A_val), A_val) expected_jtjvp_x_val = T.einsum('ab, b->a', expected_jtjvp_x_val, v_val) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(jtjvp_x_val, expected_jtjvp_x_val)
def test_s2s_jtjvp(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum("ab,b->a", A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3, 4]) expected_jtjvp_x_val = T.einsum("ba,bc,c->a", A_val, A_val, v_val) StS = SourceToSource() forward_str = StS.forward([jtjvp_x], function_name='jtjvp', backend=datatype) m = import_code(forward_str) jtjvp_x_val_s2s, = m.jtjvp([A_val, v_val]) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(jtjvp_x_val_s2s, expected_jtjvp_x_val)
def test_three_mul_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor([[1., 2.], [3., 4.]]) x2_val = T.tensor([[5., 6.], [7., 8.]]) x3_val = T.tensor([[9., 10.], [11., 12.]]) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij,ij->abij", I, I, x2_val, x3_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_prune_identity(backendopt): for datatype in backendopt: T.set_backend(datatype) a1 = ad.Variable(name="a1", shape=[3, 3]) a2 = ad.Variable(name="a2", shape=[3, 3]) i1 = ad.identity(3) i2 = ad.identity(3) i3 = ad.identity(3) out = ad.einsum("ab,cd,ac,be,ef->abdf", a1, a2, i1, i2, i3) prune_identity_nodes(out) """ Explanation to the einsum above: The identity node i1 means that a and c should be the same dim. we can get rid of i1 and rewrite the expr as ad.einsum("ab,ad,be,ef->abdf", a1, a2, i2, i3). we can also combine i2 and i3 cuz e is useless. Therefore, we can rewrite the expr as ad.einsum("ab,ad,bf->abdf", a1, a2, i2). """ out_expect = ad.einsum("ab,ad,bf->abdf", a1, a2, i2) assert len(out.inputs) == 3 assert tree_eq(out, out_expect, [a1, a2])
def test_three_mul_jacobian_scalars(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) x3 = ad.Variable(name="x3", shape=[]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor(1.) x2_val = T.tensor(2.) x3_val = T.tensor(3.) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) expected_jacobian_x1_val = x2_val * x3_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def cpd_graph(dim, size, rank): cg = CharacterGetter() input_tensor = ad.Variable(name='input_tensor', shape=[size for _ in range(dim)]) input_tensor_subs = "".join([cg.getchar() for _ in range(dim)]) rank_char = cg.getchar() A_list = [] A_list_subs = [] for i in range(dim): node = ad.Variable(name=f'A{i}', shape=[size, rank]) A_list.append(node) A_list_subs.append(f"{input_tensor_subs[i]}{rank_char}") input_subs = ','.join(A_list_subs) einsum_subscripts = input_subs + '->' + input_tensor_subs output = ad.einsum(einsum_subscripts, *A_list) residual = output - input_tensor residual_shape = list(range(len(residual.shape))) loss = ad.tensordot(residual, residual, axes=[residual_shape, residual_shape]) return A_list, input_tensor, loss, residual
def test_jacobian_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3, 3, 3]) x2 = ad.Variable(name="x2", shape=[3, 3, 3]) y = ad.einsum("ikl,jkl->ijk", x1, x2) jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.random((3, 3, 3)) x2_val = T.random((3, 3, 3)) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, }) I = T.identity(3) expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val) expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val)) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_cpd_jtjvp_optimized(benchmark): for datatype in BACKEND_TYPES: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list v_A = ad.Variable(name="v_A", shape=[size, rank]) v_B = ad.Variable(name="v_B", shape=[size, rank]) v_C = ad.Variable(name="v_C", shape=[size, rank]) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list v_A_list, _ = init_rand_cp(dim, size, rank) v_A_val, v_B_val, v_C_val = v_A_list JtJvps = ad.jtjvps(output_node=residual, node_list=[A, B, C], vector_list=[v_A, v_B, v_C]) JtJvps = [optimize(JtJvp) for JtJvp in JtJvps] dedup(*JtJvps) for node in JtJvps: assert isinstance(node, ad.AddNode) executor_JtJvps = ad.Executor(JtJvps) jtjvp_val = benchmark(executor_JtJvps.run, feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, v_A: v_A_val, v_B: v_B_val, v_C: v_C_val })
def test_einsum_multiuse(backendopt): """ Test manual fuse. A B inputs |\ | | \ | | \ | | C | / | / output Note that here we assume A is split into 2 vars by some other operations. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) a_copy = ad.Variable(name="a2", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a_copy, c) # New graph out_new = fuse_einsums(output, [a, a_copy, b]) assert tree_eq(output, out_new, [a, a_copy, b])
def test_add_3(): A = ad.Variable(name="A", shape=[3]) B = ad.Variable(name="B", shape=[3]) y = A + B + B assert AutodiffParser.parse(y.name, [A, B]).name == y.name
def test_einsum_fuse_graph(backendopt): """ [Fuse einsum used twice] This case is rather subtle. We want to auto fuse A B C | \ / | es | /| | / | es | \ | es Here es is einsum. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[2, 3]) BC = ad.einsum('ik, kj->ij', b, c) # 3x3 ABC = ad.einsum('ik, kj->ij', a, BC) # 3x3 out = ad.einsum('jk, ki->ji', ABC, BC) # 3x3 linearize(out) tree, = find_sub_einsumtree(PseudoNode(out)) out, ins = tree new_z = fuse_einsums(out.node, ins) assert tree_eq(out.node, new_z, [a, b, c])
def test_einsum(): A = ad.Variable(name="A", shape=[3, 2]) B = ad.Variable(name="B", shape=[2, 3]) y = ad.einsum('ik,kj->ij', A, B) assert AutodiffParser.parse(y.name, [A, B]).name == y.name
def test_einsum_gen_corner_case(backendopt): """ Note: Numpy contraction path cannot find the opt path for this expression. It will output the same expression as the input. -------- E -------- | | | | a b c d | | | | A - e - B - f - C - g - D | | | | h i j k | | | | """ size = 5 A = ad.Variable(name="A", shape=[size, size, size]) B = ad.Variable(name="B", shape=[size, size, size, size]) C = ad.Variable(name="C", shape=[size, size, size, size]) D = ad.Variable(name="D", shape=[size, size, size]) E = ad.Variable(name="E", shape=[size, size, size, size]) output = ad.einsum('aeh,bfie,cgjf,dgk,abcd->hijk', A, B, C, D, E) new_output = generate_optimal_tree(output) for node in find_topo_sort([new_output]): if not isinstance(node, ad.VariableNode): assert (len(node.inputs) == 2)
def test_add_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_executor_dependent(backendopt): for datatype in backendopt: T.set_backend(datatype) A = ad.Variable(name="A", shape=[3, 3]) B = ad.Variable(name="B", shape=[3, 3]) AA = ad.einsum('ab,ab->', A, A) BB = ad.einsum('ab,ab->', B, B) AB = ad.einsum('ab,ab->', A, B) out_A = AA + AB out_B = AB + AA executor = ad.Executor({out_A, out_B}) data = gen_dict([A, B]) A_val, = executor.run(feed_dict=data, reset_graph=False, out_nodes=[out_A]) data2 = gen_dict([A]) data2.update({B: data[B]}) B_val, = executor.run(feed_dict=data2, out_nodes=[out_B]) # This is checking A's val is not reused in B_val computationA. assert A_val != B_val
def test_sub_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 - x2 z = x3 - y jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x3_val - x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_get_transpose_indices(): a = ad.Variable(name="a", shape=[2, 2, 2]) b = ad.Variable(name="b", shape=[2, 2]) c = ad.Variable(name="a", shape=[2, 2, 2, 2]) # not transposable assert get_transpose_indices(a, b) == None assert get_transpose_indices(ad.einsum("abc,cd->abd", a, b), b) == None assert get_transpose_indices(ad.einsum('iii->', a), ad.einsum('ii->', b)) == None assert get_transpose_indices(ad.einsum('abc,bc->a', a, b), ad.einsum('abc,bc->ab', a, b)) == None assert get_transpose_indices(ad.einsum('adb,cb->adc', a, b), ad.einsum('dab,bc->dac', a, b)) == None assert get_transpose_indices(ad.einsum('abc,bc->ab', a, b), ad.einsum('abc,bc->ac', a, b)) == None # same expression assert get_transpose_indices(ad.einsum('iii->', a), ad.einsum('iii->', a)) == None assert get_transpose_indices(ad.einsum('adb,bc->adc', a, b), ad.einsum('dab,bc->dac', a, b)) == None # complicated contraction index assert get_transpose_indices( ad.einsum('ab,cd,ef,gh,gh,ij->ij', b, b, b, b, b, b), ad.einsum('ab,cd,cd,gh,gh,ij->ji', b, b, b, b, b, b)) == None # transposable assert get_transpose_indices(ad.einsum('acb,bd->adc', a, b), ad.einsum('dab,bc->dac', a, b)) == [0, 2, 1] assert get_transpose_indices(ad.einsum('acje,ie->iacj', c, b), ad.einsum('jace,ie->iacj', c, b)) == [0, 2, 3, 1]
def test_hvp2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum( ad.einsum("ab,bc->ac", ad.einsum("ab,bc->ac", ad.transpose(x), H), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, H: H_val, v: v_val }) Hx = T.dot(H_val, x_val) expected_yval = T.sum(T.dot(T.transpose(x_val), Hx)) expected_grad_x_val = 2 * Hx expected_hv_val = T.tensor([[4.], [8.], [12.]]) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)