def test_einsum_multitier(backendopt): for datatype in backendopt: T.set_backend(datatype) input_nodes1, zs1 = get_tree("set1") input_nodes2, zs2 = get_tree("set2") out1 = zs1 + zs2 input_nodes3, zs3 = get_tree("set3") input_nodes4, zs4 = get_tree("set4") out2 = zs3 + zs4 out = ad.einsum("ij, jk->ik", out1, out2) input_nodes = input_nodes1 + input_nodes2 + input_nodes3 + input_nodes4 generated_feed_dict = gen_dict(input_nodes) executor = ad.Executor([out]) z_val, = executor.run(feed_dict=generated_feed_dict) with OutputInjectedModeP(find_topo_sort_p([PseudoNode(out)])): trees = find_sub_einsumtree(PseudoNode(out)) for tree in trees: out_node, in_nodes = tree new_z = fuse_einsums(out_node.node, in_nodes) replace_node(out_node, new_z) executor = ad.Executor([out]) z_new_val, = executor.run(feed_dict=generated_feed_dict) assert float_eq(z_val, z_new_val)
def cpd_newton(size, rank): dim = 3 for datatype in BACKEND_TYPES: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list v_A = ad.Variable(name="v_A", shape=[size, rank]) v_B = ad.Variable(name="v_B", shape=[size, rank]) v_C = ad.Variable(name="v_C", shape=[size, rank]) grads = ad.gradients(loss, [A, B, C]) Hvps = ad.hvp(output_node=loss, node_list=[A, B, C], vector_list=[v_A, v_B, v_C]) executor_grads = ad.Executor([loss] + grads) executor_Hvps = ad.Executor(Hvps) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list for i in range(100): def hess_fn(v): return executor_Hvps.run( feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, v_A: v[0], v_B: v[1], v_C: v[2] }) loss_val, grad_A_val, grad_B_val, grad_C_val = executor_grads.run( feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val }) delta = conjugate_gradient( hess_fn=hess_fn, grads=[grad_A_val, grad_B_val, grad_C_val], error_tol=1e-9, max_iters=250) A_val -= delta[0] B_val -= delta[1] C_val -= delta[2] print(f'At iteration {i} the loss is: {loss_val}')
def tree_eq(out, new_out, input_nodes, tol=1e-8): """Compares whether two output (based on the same set of inputs are equal. """ feed_dict = gen_dict(input_nodes) executor = ad.Executor([out]) out_val, = executor.run(feed_dict=feed_dict) executor = ad.Executor([new_out]) new_out_val, = executor.run(feed_dict=feed_dict) return float_eq(out_val, new_out_val, tol)
def cpd_als_shared_exec(dim, size, rank, num_iter, input_val=[]): A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) full_hessian = ad.hessian(loss, A_list) hessians = [full_hessian[i][i] for i in range(len(full_hessian))] grads = ad.gradients(loss, A_list) updates = [ ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]]) for (hes, grad) in zip(hessians, grads) ] new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)] new_A_list = generate_sequential_optimal_tree(new_A_list, A_list) executor = ad.Executor(new_A_list) executor_loss = ad.Executor([simplify(loss)]) if input_val == []: A_val_list, input_tensor_val = init_rand_cp(dim, size, rank) else: A_val_list, input_tensor_val = input_val for iter in range(num_iter): t0 = time.time() # als iterations for i in range(len(A_list)): feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) if i == 0: A_val_list[0], = executor.run(feed_dict=feed_dict, out_nodes=[new_A_list[0]]) else: A_val_list[i], = executor.run(feed_dict=feed_dict, reset_graph=False, evicted_inputs=[A_list[i - 1]], out_nodes=[new_A_list[i]]) feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) loss_val, = executor_loss.run(feed_dict=feed_dict) print(f'At iteration {iter} the loss is: {loss_val}') t1 = time.time() print(f"[ {iter} ] Sweep took {t1 - t0} seconds") return A_val_list
def test_jtjvps(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum('ab, b->a', A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) executor = ad.Executor([y, jtjvp_x]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3., 4.]) y_val, jtjvp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_jtjvp_x_val = T.einsum('ba, ac->bc', T.transpose(A_val), A_val) expected_jtjvp_x_val = T.einsum('ab, b->a', expected_jtjvp_x_val, v_val) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(jtjvp_x_val, expected_jtjvp_x_val)
def test_inner_product_hvp(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum(ad.einsum("ab,bc->ac", ad.transpose(x), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, v: v_val }) expected_yval = T.sum(T.dot(T.transpose(x_val), x_val)) expected_grad_x_val = 2 * x_val expected_hv_val = 2 * v_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def cpd_gradient_descent(size, rank, learning_rate): dim = 3 for datatype in BACKEND_TYPES: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list grad_A, grad_B, grad_C = ad.gradients(loss, [A, B, C]) executor = ad.Executor([loss, grad_A, grad_B, grad_C]) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list for i in range(100): loss_val, grad_A_val, grad_B_val, grad_C_val = executor.run( feed_dict={ input_tensor: input_tensor_val, A: A_val, B: B_val, C: C_val }) A_val -= learning_rate * grad_A_val B_val -= learning_rate * grad_B_val C_val -= learning_rate * grad_C_val print(f'At iteration {i} the loss is: {loss_val}')
def test_large_matmul_chain(backendopt): n = 60 size = 3 for datatype in backendopt: T.set_backend(datatype) # build the graph of x_1 @ ... @ x_n x_list = [ ad.Variable(name=f"x{i}", shape=[size, size]) for i in range(n) ] prev_char = chr(192) left_char = prev_char for i in range(n): new_char = chr(ord(prev_char) + 1) x_list[i].subscripts = f"{prev_char}{new_char}" prev_char = new_char right_char = prev_char input_subs = ','.join([node.subscripts for node in x_list]) einsum_subscripts = input_subs + '->' + left_char + right_char out = ad.einsum(einsum_subscripts, *x_list) # decompose the large einsum, and rewrite the einsum expression of the # generated einsum tree so there's no unicode character out = optimize(out) executor = ad.Executor([out]) x_val_list = [T.random([size, size]) for _ in range(n)] out_val, = executor.run(feed_dict=dict(zip(x_list, x_val_list))) out_val_matmul = x_val_list[0] for i in range(1, n): out_val_matmul = out_val_matmul @ x_val_list[i] assert float_eq(out_val, out_val_matmul, tol=1e-2)
def test_cpd_hessian_optimize_offdiag(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) hessian_offdiag = [hessian[0][1], hessian[1][0]] for node in hessian_offdiag: optimize(node) assert isinstance(node, ad.AddNode) num_operations = len( list( filter(lambda x: isinstance(x, ad.OpNode), find_topo_sort([node])))) # This is currently non-deterministic. # assert num_operations == 14 executor = ad.Executor(hessian_offdiag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, })
def test_vjps(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum('ab, b->a', A, x) transposed_vjp_x, = ad.transposed_vjps(y, [x], v) executor = ad.Executor([y, transposed_vjp_x]) x_val = T.tensor([1., 2.]) # 1x3 A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([1., 2., 3.]) y_val, transposed_vjp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_transposed_vjp_x_val = T.einsum('b, ba->a', v_val, A_val) assert isinstance(transposed_vjp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(transposed_vjp_x_val, expected_transposed_vjp_x_val)
def test_sub_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 - x2 z = x3 - y jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x3_val - x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_hvp2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum( ad.einsum("ab,bc->ac", ad.einsum("ab,bc->ac", ad.transpose(x), H), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, H: H_val, v: v_val }) Hx = T.dot(H_val, x_val) expected_yval = T.sum(T.dot(T.transpose(x_val), Hx)) expected_grad_x_val = 2 * Hx expected_hv_val = T.tensor([[4.], [8.], [12.]]) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def test_jacobian_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3, 3, 3]) x2 = ad.Variable(name="x2", shape=[3, 3, 3]) y = ad.einsum("ikl,jkl->ijk", x1, x2) jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.random((3, 3, 3)) x2_val = T.random((3, 3, 3)) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, }) I = T.identity(3) expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val) expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val)) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_einsum(): for datatype in backends: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) matmul = ad.einsum('ik,kj->ij', x2, x3) y = ad.sum(matmul) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_grad_sum = T.ones_like(T.dot(x2_val, x3_val)) expected_yval = T.sum(T.dot(x2_val, x3_val)) expected_grad_x2_val = T.dot(expected_grad_sum, T.transpose(x3_val)) expected_grad_x3_val = T.dot(T.transpose(x2_val), expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_mul_jacobian_one_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[2, 2]) # test both cases of left and right multiply a scalar for y in [x1 * x2, x2 * x1]: jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor(2.) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val) expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_three_mul_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor([[1., 2.], [3., 4.]]) x2_val = T.tensor([[5., 6.], [7., 8.]]) x3_val = T.tensor([[9., 10.], [11., 12.]]) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij,ij->abij", I, I, x2_val, x3_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_cpd_jtjvp_optimized(benchmark): for datatype in BACKEND_TYPES: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list v_A = ad.Variable(name="v_A", shape=[size, rank]) v_B = ad.Variable(name="v_B", shape=[size, rank]) v_C = ad.Variable(name="v_C", shape=[size, rank]) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list v_A_list, _ = init_rand_cp(dim, size, rank) v_A_val, v_B_val, v_C_val = v_A_list JtJvps = ad.jtjvps(output_node=residual, node_list=[A, B, C], vector_list=[v_A, v_B, v_C]) JtJvps = [optimize(JtJvp) for JtJvp in JtJvps] dedup(*JtJvps) for node in JtJvps: assert isinstance(node, ad.AddNode) executor_JtJvps = ad.Executor(JtJvps) jtjvp_val = benchmark(executor_JtJvps.run, feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, v_A: v_A_val, v_B: v_B_val, v_C: v_C_val })
def test_add_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_add_mul_mix_3(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3]) x3 = ad.Variable(name="x3", shape=[3]) z = x2 * x2 + x2 + x3 + 3 y = ad.sum(z * z + x3) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 2 * T.ones(3) x3_val = 3 * T.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) z_val = x2_val * x2_val + x2_val + x3_val + 3 expected_yval = z_val * z_val + x3_val expected_grad_x2_val = 2 * \ (x2_val * x2_val + x2_val + x3_val + 3) * (2 * x2_val + 1) expected_grad_x3_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) + 1 assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(expected_yval)) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_add_mul_mix_2(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3]) x2 = ad.Variable(name="x2", shape=[3]) x3 = ad.Variable(name="x3", shape=[3]) x4 = ad.Variable(name="x4", shape=[3]) y = ad.sum(x1 + x2 * x3 * x4) grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4]) x1_val = 1 * T.ones(3) x2_val = 2 * T.ones(3) x3_val = 3 * T.ones(3) x4_val = 4 * T.ones(3) y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run( feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val, x4: x4_val }) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(x1_val + x2_val * x3_val * x4_val)) assert T.array_equal(grad_x1_val, T.ones_like(x1_val)) assert T.array_equal(grad_x2_val, x3_val * x4_val) assert T.array_equal(grad_x3_val, x2_val * x4_val) assert T.array_equal(grad_x4_val, x2_val * x3_val)
def test_executor_dependent(backendopt): for datatype in backendopt: T.set_backend(datatype) A = ad.Variable(name="A", shape=[3, 3]) B = ad.Variable(name="B", shape=[3, 3]) AA = ad.einsum('ab,ab->', A, A) BB = ad.einsum('ab,ab->', B, B) AB = ad.einsum('ab,ab->', A, B) out_A = AA + AB out_B = AB + AA executor = ad.Executor({out_A, out_B}) data = gen_dict([A, B]) A_val, = executor.run(feed_dict=data, reset_graph=False, out_nodes=[out_A]) data2 = gen_dict([A]) data2.update({B: data[B]}) B_val, = executor.run(feed_dict=data2, out_nodes=[out_B]) # This is checking A's val is not reused in B_val computationA. assert A_val != B_val
def test_add_jacobian_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor(1.) x2_val = T.tensor(1.) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) expected_jacobian_x2_val = T.tensor(1.) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_three_mul_jacobian_scalars(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) x3 = ad.Variable(name="x3", shape=[]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor(1.) x2_val = T.tensor(2.) x3_val = T.tensor(3.) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) expected_jacobian_x1_val = x2_val * x3_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def dmrg_shared_exec(mpo_tensors, init_mps_tensors, max_mps_rank, num_iter=1, sequence='R'): """ Perform DMRG iterations with shared executions. """ if sequence != "R": raise NotImplementedError num = len(mpo_tensors) size = mpo_tensors[0].shape[1] mpo_ranks = [mpo_tensors[i].shape[0] for i in range(1, len(mpo_tensors))] mps_tensors = copy.deepcopy(init_mps_tensors) mps_ranks = [mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))] dg = DmrgGraph.create(num, mpo_ranks, mps_ranks, size) for i, hes in enumerate(dg.hessians): dg.hessians[i] = simplify(hes) assert isinstance(hes, ad.EinsumNode) dg.hessians = generate_sequential_optimal_tree(dg.hessians, dg.mps_inputs) executor = ad.Executor(dg.hessians) # sequence is R for iter in range(num_iter): mps_tensors = gauge_transform_mps(mps_tensors, right=True) mps_ranks = [ mps_tensors[i].shape[0] for i in range(1, len(mps_tensors)) ] for i in range(num - 1): dg.update_graph(num, mpo_ranks, mps_ranks, size) feed_dict = dict(zip(dg.mpo_inputs, mpo_tensors)) feed_dict.update(dict(zip(dg.mps_inputs, mps_tensors))) hes_val, = executor.run(feed_dict=feed_dict, out_nodes=[dg.hessians[i]]) # get the smallest eigenvalue and the corresponding eigenvector of the hesval eigvec_shape = dg.intermediates[i].shape eig_val, eigvec = get_smallest_eigenpair(hes_val, dg.intermediates[i].shape) # Update the two sites of mps mps_tensors[i], mps_tensors[i + 1] = dmrg_local_update( dg.intermediates[i], eigvec, max_mps_rank) # update the rank mps_ranks[i] = mps_tensors[i + 1].shape[0] print(f'At iteration {iter} the smallest eigenvalue is: {eig_val}') return mps_tensors, eig_val
def test_einsum_multiuse(backendopt): """ An einsum graph like A B inputs |\ | | \ | | \ | | C | / | / output will produce An einsum graph like A B inputs |\ | | A1 | | \ | A2 C | / | / output """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a, c) feed_dict = gen_dict([a, b]) executor = ad.Executor([output]) out_val, = executor.run(feed_dict=feed_dict) linearize(output) executor = ad.Executor([output]) out_new_val, = executor.run(feed_dict=feed_dict) assert T.array_equal(out_val, out_new_val)
def tucker_als_graph_shared_exec(dim, size, rank): """ Build the graph used for Tucker ALS with shared execution. Parameters ---------- dim: dimensionality of the input tensor size: the size of input tensor's each dim rank: the rank of the decomposition Returns ------- tg: an TuckerGraph object executor: An shared executor loss: the optimized graph for tucker loss updates: an list containing updates graphs for each dimension intermediates: list of einsum nodes. Each node is the objective each Tucker ALS step optimized for """ tg = TuckerGraph(dim, size, rank) updates = [] for i in range(dim): core_A = tg.intermediates[i] hes = ad.hessian(tg.losses[i], [core_A]) hes = hes[0][0] grad, = ad.gradients(tg.losses[i], [core_A]) new_core_A = core_A - ad.tensordot( ad.tensorinv(hes), grad, [[i + dim for i in range(dim)], [i for i in range(dim)]]) updates.append(simplify(new_core_A)) loss = simplify(tg.losses[0]) for i in range(1, len(tg.losses)): assert loss.name == simplify(tg.losses[i]).name updates = generate_sequential_optimal_tree(updates, tg.A_list) executor_updates = ad.Executor(updates) executor_loss = ad.Executor([loss]) return tg, executor_updates, executor_loss, loss, updates, tg.intermediates
def test_tensorinv_matrix(): for datatype in backends: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 3]) inv_x = ad.tensorinv(x) executor = ad.Executor([inv_x]) x_val = T.random([3, 3]) inv_x_val, = executor.run(feed_dict={x: x_val}) assert T.array_equal(inv_x_val, T.inv(x_val))
def test_executor_debug_orthonormal(backendopt): for datatype in backendopt: T.set_backend(datatype) A = ad.Matrix(name="A", shape=[3, 3], orthonormal='row') out = ad.einsum("ab,bc->ac", A, A) A_val, _, _ = T.svd(T.random((3, 3))) executor = ad.Executor([out]) executor.run(feed_dict={A: A_val}, debug=True)
def test_mps(backendopt): for datatype in backendopt: T.set_backend(datatype) mps_graph = MpsGraph.create(4, ranks=[5, 6, 7]) executor = ad.Executor([mps_graph.output]) expect_mps = ad.einsum('ab,acd,cef,eg->bdfg', *mps_graph.inputs) assert tree_eq(mps_graph.output, expect_mps, mps_graph.inputs)
def test_mpo(backendopt): for datatype in backendopt: T.set_backend(datatype) mpo_graph = MpoGraph.create(4, ranks=[5, 6, 7]) executor = ad.Executor([mpo_graph.output]) expect_mpo = ad.einsum('abc,adef,dghi,gjk->behjcfik', *mpo_graph.inputs) assert tree_eq(mpo_graph.output, expect_mpo, mpo_graph.inputs)