def test_cpd_hessian_optimize_diag(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]] for node in hessian_diag: node = optimize(node) assert isinstance(node, ad.AddNode) num_operations = len( list( filter(lambda x: isinstance(x, ad.OpNode), find_topo_sort([node])))) """ Use this assertion to test the optimize function. 5 operations: 1. T.einsum('ca,cb->ab',A,A), 2. T.einsum('ca,cb->ab',B,B), 3. T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)), 4. T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10)), 5. (T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))+ T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))) """ assert num_operations == 5 executor = ad.Executor(hessian_diag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, }) expected_hes_diag_val = [ 2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val, T.identity(size)) ] assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8 assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8 assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
def test_jacobian_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3, 3, 3]) x2 = ad.Variable(name="x2", shape=[3, 3, 3]) y = ad.einsum("ikl,jkl->ijk", x1, x2) jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.random((3, 3, 3)) x2_val = T.random((3, 3, 3)) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, }) I = T.identity(3) expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val) expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val)) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_mul_jacobian_one_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[2, 2]) # test both cases of left and right multiply a scalar for y in [x1 * x2, x2 * x1]: jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor(2.) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val) expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_three_mul_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor([[1., 2.], [3., 4.]]) x2_val = T.tensor([[5., 6.], [7., 8.]]) x3_val = T.tensor([[9., 10.], [11., 12.]]) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij,ij->abij", I, I, x2_val, x3_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_sub_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 - x2 z = x3 - y jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x3_val - x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_add_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_gauge_transform_left(backendopt): for datatype in backendopt: T.set_backend(datatype) tensors_input = rand_mps(num=4, rank=4, size=2) tensors = gauge_transform_mps(tensors_input, right=False) # make sure the transformation will not change the mps results mps = T.einsum('ab,acd,cef,eg->bdfg', *tensors_input) mps_gauge = T.einsum('ab,acd,cef,eg->bdfg', *tensors) assert T.norm(mps - mps_gauge) < 1e-8 dim = len(tensors_input) # test all tensors except the right one's orthogonality inner = T.einsum("ab,cb->ac", tensors[0], tensors[0]) assert T.norm(inner - T.identity(inner.shape[0])) < 1e-8 for i in range(1, dim - 1): inner = T.einsum("abc,adc->bd", tensors[i], tensors[i]) assert T.norm(inner - T.identity(inner.shape[0])) < 1e-8
def test_mul_const_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x2", shape=[2, 2]) jacobian_x1, = ad.jacobians(2 * x1, [x1]) executor = ad.Executor([jacobian_x1]) x1_val = T.tensor([[5., 6.], [7., 8.]]) jacobian_x1_val, = executor.run(feed_dict={x1: x1_val}) I = T.identity(2) expected_jacobian_x1_val = 2 * T.einsum("ai,bj->abij", I, I) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_cpd_hessian_simplify(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) # TODO (issue #101): test the off-diagonal elements hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]] for node in hessian_diag: node = simplify(node) input_node = node.inputs[0] assert len(input_node.inputs) == 5 executor = ad.Executor(hessian_diag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, }) expected_hes_diag_val = [ 2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val, T.identity(size)) ] assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8 assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8 assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
def test_executor_retain(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 3]) y = ad.sum(x2) z = y * 2 x2_val = T.identity(3) executor = ad.Executor([y, z]) y_val, = executor.run(feed_dict={x2: x2_val}, reset_graph=False, out_nodes=[y]) # This can only be run if y values are retained. z_val, = executor.run(feed_dict={}, reset_graph=False, out_nodes=[z])
def test_HinverseG(backendopt): for datatype in backendopt: T.set_backend(datatype) N = 10 T.seed(1224) A = T.random([N, N]) A = T.transpose(A) @ A A = A + T.identity(N) b = T.random([N]) def hess_fn(x): return [T.einsum("ab,b->a", A, x[0])] error_tol = 1e-9 x, = conjugate_gradient(hess_fn, [b], error_tol) assert (T.norm(T.abs(T.einsum("ab,b->a", A, x) - b)) <= 1e-4)
def test_jacobian_trace_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) trace = ad.einsum('ii->', x) grad_x, = ad.jacobians(trace, [x]) executor = ad.Executor([trace, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) trace_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_trace_val = T.einsum('ii->', x_val) expected_grad_x_val = T.identity(2) assert T.array_equal(trace_val, expected_trace_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_add_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 + x2 z = y + x3 jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) # jacobian_z_y = T.einsum("ae,bf->abef", I, I) # jacobian_y_x2 = T.einsum("ec,fd->efcd", I, I) # jacobian_z_x2 = T.einsum("abef,efcd->abcd", jacobian_z_y, jacobian_y_x2) # = T.einsum("ae,bf,ec,fd->abcd", I, I, I, I) # = T.einsum("ac,bd->abcd", I, I) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x1_val + x2_val + x3_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)