def test_vjps(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum('ab, b->a', A, x) transposed_vjp_x, = ad.transposed_vjps(y, [x], v) executor = ad.Executor([y, transposed_vjp_x]) x_val = T.tensor([1., 2.]) # 1x3 A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([1., 2., 3.]) y_val, transposed_vjp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_transposed_vjp_x_val = T.einsum('b, ba->a', v_val, A_val) assert isinstance(transposed_vjp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(transposed_vjp_x_val, expected_transposed_vjp_x_val)
def test_jacobian_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3, 3, 3]) x2 = ad.Variable(name="x2", shape=[3, 3, 3]) y = ad.einsum("ikl,jkl->ijk", x1, x2) jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.random((3, 3, 3)) x2_val = T.random((3, 3, 3)) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, }) I = T.identity(3) expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val) expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val)) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_mul_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 * x2 jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor([[1., 2.], [3., 4.]]) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->abij", I, I, x2_val) expected_jacobian_x2_val = T.einsum("ai,bj,ij->abij", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_mul_jacobian_one_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[2, 2]) # test both cases of left and right multiply a scalar for y in [x1 * x2, x2 * x1]: jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor(2.) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val) expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_sub_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 - x2 jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ac,bd->abcd", I, I) expected_jacobian_x2_val = -T.einsum("ac,bd->abcd", I, I) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val - x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_jtjvps(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum('ab, b->a', A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) executor = ad.Executor([y, jtjvp_x]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3., 4.]) y_val, jtjvp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_jtjvp_x_val = T.einsum('ba, ac->bc', T.transpose(A_val), A_val) expected_jtjvp_x_val = T.einsum('ab, b->a', expected_jtjvp_x_val, v_val) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(jtjvp_x_val, expected_jtjvp_x_val)
def test_s2s_hvp(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum("a,ab,b->", x, H, x) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) x_val = T.tensor([1., 2., 3.]) # 3 v_val = T.tensor([1., 2., 3.]) # 3 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 expected_yval = T.einsum("a,ab,b->", x_val, H_val, x_val) expected_grad_x_val = 2 * T.einsum("ab,b->a", H_val, x_val) expected_hv_val = T.tensor([4., 8., 12.]) StS = SourceToSource() forward_str = StS.forward([y], backend=datatype) m = import_code(forward_str) y_val_s2s, = m.forward([x_val, H_val]) grad_str = StS.gradients(y, [x], backend=datatype) m = import_code(grad_str) grad_x_val_s2s, = m.gradients([x_val, H_val]) hvp_str = StS.hvp(y, [x], [v], backend=datatype) m = import_code(hvp_str) Hv_val_s2s, = m.hvp([x_val, H_val, v_val]) assert isinstance(y, ad.Node) assert T.array_equal(y_val_s2s, expected_yval) assert T.array_equal(grad_x_val_s2s, expected_grad_x_val) assert T.array_equal(Hv_val_s2s, expected_hv_val)
def test_cpd_shared_exec(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) input_val = init_rand_cp(dim, size, rank) A_list, input_tensor_val = input_val A_val, B_val, C_val = A_list outputs = cpd_als_shared_exec(dim, size, rank, 1, input_val) # expected values A_val = T.einsum( "abc,bk,ck->ak", input_tensor_val, B_val, C_val) @ T.inv( (T.transpose(B_val) @ B_val) * (T.transpose(C_val) @ C_val)) B_val = T.einsum( "abc,ak,ck->bk", input_tensor_val, A_val, C_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(C_val) @ C_val)) C_val = T.einsum( "abc,ak,bk->ck", input_tensor_val, A_val, B_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(B_val) @ B_val)) assert T.norm(outputs[0] - A_val) < 1e-8 assert T.norm(outputs[1] - B_val) < 1e-8 assert T.norm(outputs[2] - C_val) < 1e-8
def test_cpd_hessian_optimize_diag(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]] for node in hessian_diag: node = optimize(node) assert isinstance(node, ad.AddNode) num_operations = len( list( filter(lambda x: isinstance(x, ad.OpNode), find_topo_sort([node])))) """ Use this assertion to test the optimize function. 5 operations: 1. T.einsum('ca,cb->ab',A,A), 2. T.einsum('ca,cb->ab',B,B), 3. T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)), 4. T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10)), 5. (T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))+ T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))) """ assert num_operations == 5 executor = ad.Executor(hessian_diag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, }) expected_hes_diag_val = [ 2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val, T.identity(size)) ] assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8 assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8 assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
def expect_jtjvp_val(A, B, C, v_A, v_B, v_C): jtjvp_A = T.einsum('ia,ja,ka,kb,jb->ib', v_A, B, C, C, B) + T.einsum( 'ja,ia,ka,kb,jb->ib', v_B, A, C, C, B) + T.einsum( 'ka,ia,ja,kb,jb->ib', v_C, A, B, C, B) jtjvp_B = T.einsum('ia,ja,ka,kb,ib->jb', v_A, B, C, C, A) + T.einsum( 'ja,ia,ka,kb,ib->jb', v_B, A, C, C, A) + T.einsum( 'ka,ia,ja,kb,ib->jb', v_C, A, B, C, A) jtjvp_C = T.einsum('ia,ja,ka,ib,jb->kb', v_A, B, C, A, B) + T.einsum( 'ja,ia,ka,ib,jb->kb', v_B, A, C, A, B) + T.einsum( 'ka,ia,ja,ib,jb->kb', v_C, A, B, A, B) return [jtjvp_A, jtjvp_B, jtjvp_C]
def test_sub_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 - x2 z = x3 - y jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x3_val - x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_s2s_jtjvp(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum("ab,b->a", A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3, 4]) expected_jtjvp_x_val = T.einsum("ba,bc,c->a", A_val, A_val, v_val) StS = SourceToSource() forward_str = StS.forward([jtjvp_x], function_name='jtjvp', backend=datatype) m = import_code(forward_str) jtjvp_x_val_s2s, = m.jtjvp([A_val, v_val]) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(jtjvp_x_val_s2s, expected_jtjvp_x_val)
def fast_hessian_contract(self, delta, regularization, hvps): N = len(self.A) ret = [] for n in range(N): ret.append(T.zeros(self.A[n].shape)) ret[n] = ret[n] + regularization * T.einsum( 'jj,ij->ij', self.gamma[n], delta[n]) + hvps[n] return ret
def test_tucker_als_shared_exec(backendopt): for datatype in backendopt: T.set_backend(datatype) input_val = init_rand_tucker(dim, size, rank) A_val_list, _, X_val = input_val A_val_list_ad, core_val_ad, _ = tucker_als_shared_exec( dim, size, rank, 1, input_val) A1_val, A2_val, A3_val = A_val_list # expected values # ttmc: tensor times matrix chain ttmc = T.einsum("abc,bk,cl->akl", X_val, A2_val, A3_val) ttmc_inner = T.einsum("akl,bkl->ab", ttmc, ttmc) mat, _, _ = T.svd(ttmc_inner) A1_val = mat[:, :rank] ttmc = T.einsum("abc,ak,cl->kbl", X_val, A1_val, A3_val) ttmc_inner = T.einsum("kbl,kcl->bc", ttmc, ttmc) mat, _, _ = T.svd(ttmc_inner) A2_val = mat[:, :rank] ttmc = T.einsum("abc,ak,bl->klc", X_val, A1_val, A2_val) ttmc_inner = T.einsum("klc,kld->cd", ttmc, ttmc) mat, _, _ = T.svd(ttmc_inner) A3_val = mat[:, :rank] core_val = T.einsum("abc,ak,bl,cm->klm", X_val, A1_val, A2_val, A3_val) assert T.norm(A_val_list_ad[0] - A1_val) < 1e-8 assert T.norm(A_val_list_ad[1] - A2_val) < 1e-8 assert T.norm(A_val_list_ad[2] - A3_val) < 1e-8 assert T.norm(core_val_ad - core_val) < 1e-8
def test_jacobian_summation_einsum_2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) y = ad.Variable(name="y", shape=[2, 2]) out = ad.einsum('ij,ab->ab', x, y) grad_x, = ad.jacobians(out, [x]) executor = ad.Executor([out, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) y_val = T.tensor([[5., 6.], [7., 8.]]) out_val, grad_x_val = executor.run(feed_dict={x: x_val, y: y_val}) expected_out_val = T.einsum('ij,ab->ab', x_val, y_val) expected_grad_x_val = T.einsum('ij,ab->abij', T.ones(x_val.shape), y_val) assert T.array_equal(out_val, expected_out_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def jtjvp(inputs): v_A = inputs[0] B0 = inputs[1] C0 = inputs[2] v_B = inputs[3] A0 = inputs[4] v_C = inputs[5] A = [A0, B0, C0] v = [v_A, v_B, v_C] # compute G G = [] for i in range(3): G.append(T.einsum("ij,ik->jk", A[i], A[i])) # compute gamma gamma = [] for i in range(3): gamma.append([]) for j in range(3): if j >= i: M = compute_coefficient_matrix(i, j, G) gamma[i].append(M) else: M = gamma[j][i] gamma[i].append(M) # fast hessian contract ret = [] for n in range(3): ret.append(T.zeros(A[n].shape)) for p in range(3): M = gamma[n][p] if n == p: ret[n] += T.einsum("iz,zr->ir", v[p], M) else: B = T.einsum("jr,jz->rz", A[p], v[p]) ret[n] += T.einsum("iz,zr,rz->ir", A[n], M, B) return [ret[0], ret[1], ret[2]]
def n_mode_eigendec(node, tensor_val, rank): """ Eigendecomposition of mode-n unfolding of a input node. Used in Tucker decomposition to update the core tensor and one factor matrix. Parameters ---------- node: the input einsum node. Note that it must be the EinsumNode of the core tensor node and one factor matrix node. tensor_val: the value of the input node rank: Tucker decomposition rank Returns ------- 1. the core tensor 2. the corresponding factor matrix """ assert isinstance(node, ad.EinsumNode) assert len(node.inputs) == 2 in_subs, out_subs, _ = _parse_einsum_input( (node.einsum_subscripts, *node.inputs)) core_subs, A_subs = in_subs.split(',') assert len(A_subs) == 2 contracted_char = list(set(A_subs) - set(out_subs))[0] out_subs_2 = "".join( [char if char not in A_subs else contracted_char for char in out_subs]) # used for tensor_val.T @ tensor_val in its matricized form einstr = out_subs + "," + out_subs_2 + "->" + A_subs Y = T.einsum(einstr, tensor_val, tensor_val) U, _, _ = T.svd(Y) U = U[:, :rank] einstr = out_subs + "," + A_subs + "->" + core_subs core = T.einsum(einstr, tensor_val, U) return core, U
def test_gauge_transform_left(backendopt): for datatype in backendopt: T.set_backend(datatype) tensors_input = rand_mps(num=4, rank=4, size=2) tensors = gauge_transform_mps(tensors_input, right=False) # make sure the transformation will not change the mps results mps = T.einsum('ab,acd,cef,eg->bdfg', *tensors_input) mps_gauge = T.einsum('ab,acd,cef,eg->bdfg', *tensors) assert T.norm(mps - mps_gauge) < 1e-8 dim = len(tensors_input) # test all tensors except the right one's orthogonality inner = T.einsum("ab,cb->ac", tensors[0], tensors[0]) assert T.norm(inner - T.identity(inner.shape[0])) < 1e-8 for i in range(1, dim - 1): inner = T.einsum("abc,adc->bd", tensors[i], tensors[i]) assert T.norm(inner - T.identity(inner.shape[0])) < 1e-8
def test_jvps(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2]) A1 = ad.Variable(name="A1", shape=[3, 2]) x2 = ad.Variable(name="x2", shape=[2]) A2 = ad.Variable(name="A2", shape=[3, 2]) v1 = ad.Variable(name="v1", shape=[2]) v2 = ad.Variable(name="v2", shape=[2]) y = ad.einsum('ab, b->a', A1, x1) + ad.einsum('ab, b->a', A2, x2) transposed_vjp_x = ad.jvps(y, [x1, x2], [v1, v2]) executor = ad.Executor([y, transposed_vjp_x]) x1_val = T.tensor([1., 2.]) A1_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v1_val = T.tensor([3., 4.]) x2_val = T.tensor([1., 2.]) A2_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v2_val = T.tensor([3., 4.]) y_val, transposed_vjp_x_val = executor.run(feed_dict={ x1: x1_val, A1: A1_val, v1: v1_val, x2: x2_val, A2: A2_val, v2: v2_val }) expected_yval = T.einsum('ab, b->a', A1_val, x1_val) + T.einsum( 'ab, b->a', A2_val, x2_val) expected_transposed_vjp_x_val = T.einsum( 'ab, b->a', A1_val, v1_val) + T.einsum('ab, b->a', A2_val, v2_val) assert isinstance(transposed_vjp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(transposed_vjp_x_val, expected_transposed_vjp_x_val)
def test_mul_const_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x2", shape=[2, 2]) jacobian_x1, = ad.jacobians(2 * x1, [x1]) executor = ad.Executor([jacobian_x1]) x1_val = T.tensor([[5., 6.], [7., 8.]]) jacobian_x1_val, = executor.run(feed_dict={x1: x1_val}) I = T.identity(2) expected_jacobian_x1_val = 2 * T.einsum("ai,bj->abij", I, I) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_einsum_3op(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) x4 = ad.Variable(name="x4", shape=[3, 2]) matmul = ad.einsum('ik,kj,jl->il', x2, x3, x4) y = ad.sum(matmul) grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x2, x3, x4]) executor = ad.Executor([y, grad_x2, grad_x3, grad_x4]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 x4_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 y_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val, x4: x4_val }) expected_grad_sum = T.ones_like(T.dot(T.dot(x2_val, x3_val), x4_val)) expected_yval = T.sum(T.dot(T.dot(x2_val, x3_val), x4_val)) expected_grad_x2_val = T.einsum("il, kj, jl->ik", expected_grad_sum, x3_val, x4_val) expected_grad_x3_val = T.einsum("ik, il, jl->kj", x2_val, expected_grad_sum, x4_val) expected_grad_x4_val = T.einsum("ik, kj, il->jl", x2_val, x3_val, expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val) assert T.array_equal(grad_x4_val, expected_grad_x4_val)
def test_tensor_transpose_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2, 2]) y = ad.einsum("kij->jik", x) v = ad.Variable(name="v", shape=[2, 2, 2]) v_val = T.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # 2 x 2 x 2 grad_x, = ad.transposed_vjps(y, [x], v) executor = ad.Executor([y, grad_x]) x_val = T.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # 2 x 2 x 2 y_val, grad_x_val = executor.run(feed_dict={x: x_val, v: v_val}) expected_yval = T.einsum("kij->jik", x_val) expected_grad_x_val = T.einsum("kij->jik", v_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_cpd_hessian_simplify(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) # TODO (issue #101): test the off-diagonal elements hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]] for node in hessian_diag: node = simplify(node) input_node = node.inputs[0] assert len(input_node.inputs) == 5 executor = ad.Executor(hessian_diag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, }) expected_hes_diag_val = [ 2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val, T.identity(size)) ] assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8 assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8 assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
def test_tucker(backendopt): for datatype in backendopt: T.set_backend(datatype) tg = TuckerGraph(dim, size, rank) executor = ad.Executor([tg.residual]) A_val_list, core_val, X_val = init_rand_tucker(dim, size, rank) feed_dict = dict(zip(tg.A_list, A_val_list)) feed_dict.update({tg.core: core_val, tg.X: X_val}) residual_val, = executor.run(feed_dict=feed_dict) expect_residual_val = T.einsum('ae,bf,cg,efg->abc', *A_val_list, core_val) - X_val assert T.norm(residual_val - expect_residual_val) < 1e-8
def test_HinverseG(backendopt): for datatype in backendopt: T.set_backend(datatype) N = 10 T.seed(1224) A = T.random([N, N]) A = T.transpose(A) @ A A = A + T.identity(N) b = T.random([N]) def hess_fn(x): return [T.einsum("ab,b->a", A, x[0])] error_tol = 1e-9 x, = conjugate_gradient(hess_fn, [b], error_tol) assert (T.norm(T.abs(T.einsum("ab,b->a", A, x) - b)) <= 1e-4)
def test_jacobian_trace_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) trace = ad.einsum('ii->', x) grad_x, = ad.jacobians(trace, [x]) executor = ad.Executor([trace, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) trace_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_trace_val = T.einsum('ii->', x_val) expected_grad_x_val = T.identity(2) assert T.array_equal(trace_val, expected_trace_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_cpd_grad(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list grad_A, grad_B, grad_C = ad.gradients(loss, [A, B, C]) executor = ad.Executor([loss, grad_A, grad_B, grad_C]) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list loss_val, grad_A_val, grad_B_val, grad_C_val = executor.run( feed_dict={ input_tensor: input_tensor_val, A: A_val, B: B_val, C: C_val }) expected_output_tensor = T.einsum("ia,ja,ka->ijk", A_val, B_val, C_val) expected_residual = expected_output_tensor - input_tensor_val expected_norm_error = T.norm(expected_residual) expected_loss = expected_norm_error * expected_norm_error expected_contract_residual_A = 2 * T.einsum("ijk,ia->ajk", expected_residual, A_val) expected_contract_residual_B = 2 * T.einsum("ijk,ja->iak", expected_residual, B_val) expected_contract_residual_C = 2 * T.einsum("ijk,ka->ija", expected_residual, C_val) expected_grad_A = T.einsum("iak,ka->ia", expected_contract_residual_B, C_val) expected_grad_B = T.einsum("ajk,ka->ja", expected_contract_residual_A, C_val) expected_grad_C = T.einsum("ajk,ja->ka", expected_contract_residual_A, B_val) assert abs(loss_val - expected_loss) < 1e-8 assert T.norm(grad_A_val - expected_grad_A) < 1e-8 assert T.norm(grad_B_val - expected_grad_B) < 1e-8 assert T.norm(grad_C_val - expected_grad_C) < 1e-8
def test_chainjacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2, 2]) x1.set_in_indices_length(1) x2.set_in_indices_length(2) y = ad.chainjacobian(x1, x2) executor = ad.Executor([y]) x1_val = T.tensor([[[1, 1], [1, 1]], [[1, 1], [1, 1]]]) x2_val = T.tensor([[[1, 1], [1, 1]], [[1, 1], [1, 1]]]) y_val, = executor.run(feed_dict={x1: x1_val, x2: x2_val}) expected_y_val = T.einsum("abc,bcd->ad", x1_val, x2_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_y_val)
def test_add_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 + x2 z = y + x3 jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) # jacobian_z_y = T.einsum("ae,bf->abef", I, I) # jacobian_y_x2 = T.einsum("ec,fd->efcd", I, I) # jacobian_z_x2 = T.einsum("abef,efcd->abcd", jacobian_z_y, jacobian_y_x2) # = T.einsum("ae,bf,ec,fd->abcd", I, I, I, I) # = T.einsum("ac,bd->abcd", I, I) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x1_val + x2_val + x3_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def hess_fn(x): return [T.einsum("ab,b->a", A, x[0])]