def test_einsum(): A = ad.Variable(name="A", shape=[3, 2]) B = ad.Variable(name="B", shape=[2, 3]) y = ad.einsum('ik,kj->ij', A, B) assert AutodiffParser.parse(y.name, [A, B]).name == y.name
def test_einsum_multiuse_auto_copy(backendopt): """ Test autolinearization and auto fuse. A B inputs |\ | | \ | | \ | | C | / | / output Next: we would need to autoprune. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a, c) linearize(output) all_nodes = find_topo_sort([output]) cloned_nodes = [ tmp for tmp in all_nodes if isinstance(tmp, ad.CloneNode) ] out_new = fuse_einsums(output, [*cloned_nodes, b]) # Test that every inputs is now fused. assert all([not isinstance(x, ad.EinsumNode) for x in out_new.inputs]) assert tree_eq(output, out_new, [*cloned_nodes, b])
def __init__(self, dim, size, rank): cg = CharacterGetter() self.X = ad.Variable(name='X', shape=[size for _ in range(dim)]) X_subscripts = "".join([cg.getchar() for _ in range(dim)]) self.core = ad.Variable(name='core', shape=[rank for _ in range(dim)]) core_subscripts = "".join([cg.getchar() for _ in range(dim)]) self.A_list = [] A_list_subscripts = [] for i in range(dim): node = ad.Matrix(name=f'A{i}', shape=[size, rank], orthonormal='row') self.A_list.append(node) A_list_subscripts.append(f"{X_subscripts[i]}{core_subscripts[i]}") input_subs = ','.join([ subscripts for subscripts in A_list_subscripts + [core_subscripts] ]) self.einsum_subscripts = input_subs + '->' + X_subscripts self.output = ad.einsum(self.einsum_subscripts, *(self.A_list + [self.core])) self.residual = self.output - self.X self.intermediates, self.losses = [], [] for i in range(dim): intermediate, loss = self._build_graph_w_intermediate(i) self.intermediates.append(intermediate) self.losses.append(loss)
def test_tree_distribution_ppE(dist_op, backendopt): """ [Distributive] ((A + B) + C) * G will produce AG + BG + CG Note that (A+B) has parent (A + B) + C. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 2]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[3, 2]) g = ad.Variable(name="g", shape=[2, 2]) output = ad.einsum('ik,kk->ik', dist_op(dist_op(a, b), c), g) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) assert tree_eq(output, new_output, [a, b, c, g])
def test_grad_of_grad(): x2 = ad.Variable(name = "x2") x3 = ad.Variable(name = "x3") y = x2 * x2 + x2 * x3 grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) grad_x2_x2, grad_x2_x3 = ad.gradients(grad_x2, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3, grad_x2_x2, grad_x2_x3]) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x2_val, grad_x3_val, grad_x2_x2_val, grad_x2_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) expected_yval = x2_val * x2_val + x2_val * x3_val expected_grad_x2_val = 2 * x2_val + x3_val expected_grad_x3_val = x2_val expected_grad_x2_x2_val = 2 * np.ones_like(x2_val) expected_grad_x2_x3_val = 1 * np.ones_like(x2_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_yval) assert np.array_equal(grad_x2_val, expected_grad_x2_val) assert np.array_equal(grad_x3_val, expected_grad_x3_val) assert np.array_equal(grad_x2_x2_val, expected_grad_x2_x2_val) assert np.array_equal(grad_x2_x3_val, expected_grad_x2_x3_val)
def test_add_mul_mix_2(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") x4 = ad.Variable(name="x4") y = x1 + x2 * x3 * x4 grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4]) x1_val = 1 * np.ones(3) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) x4_val = 4 * np.ones(3) y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run( feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val, x4: x4_val }) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x1_val + x2_val * x3_val * x4_val) assert np.array_equal(grad_x1_val, np.ones_like(x1_val)) assert np.array_equal(grad_x2_val, x3_val * x4_val) assert np.array_equal(grad_x3_val, x2_val * x4_val) assert np.array_equal(grad_x4_val, x2_val * x3_val)
def test_matmul_two_vars(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = ad.matmul_op(x2, x3) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = np.array([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = np.array([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_yval = np.matmul(x2_val, x3_val) expected_grad_x2_val = np.matmul( np.ones_like(expected_yval), np.transpose(x3_val)) expected_grad_x3_val = np.matmul( np.transpose(x2_val), np.ones_like(expected_yval)) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_yval) assert np.array_equal(grad_x2_val, expected_grad_x2_val) assert np.array_equal(grad_x3_val, expected_grad_x3_val)
def test_einsum_gen_corner_case(backendopt): """ Note: Numpy contraction path cannot find the opt path for this expression. It will output the same expression as the input. -------- E -------- | | | | a b c d | | | | A - e - B - f - C - g - D | | | | h i j k | | | | """ size = 5 A = ad.Variable(name="A", shape=[size, size, size]) B = ad.Variable(name="B", shape=[size, size, size, size]) C = ad.Variable(name="C", shape=[size, size, size, size]) D = ad.Variable(name="D", shape=[size, size, size]) E = ad.Variable(name="E", shape=[size, size, size, size]) output = ad.einsum('aeh,bfie,cgjf,dgk,abcd->hijk', A, B, C, D, E) new_output = generate_optimal_tree(output) for node in find_topo_sort([new_output]): if not isinstance(node, ad.VariableNode): assert (len(node.inputs) == 2)
def test_add_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def run_problem4(): print("-" * 18 + " Problem 4 " + "-" * 18) x1 = ad.Variable() x2 = ad.Variable() x3 = ad.Variable() y = ((ad.sin(x1 + 1) + ad.cos(2 * x2)) * ad.tan(ad.log(x3)) + (ad.sin(x2 + 1) + ad.cos(2 * x1)) * ad.exp(1 + ad.sin(x3))) x1_f = np.random.rand() x2_f = np.random.rand() x3_f = np.random.rand() x1_v = noise_like(x1_f) x2_v = noise_like(x2_f) x3_v = noise_like(x3_f) f, grad = ad.func(y, {x1: x1_f, x2: x2_f, x3: x3_f}, get_gradient=True) f_np = ((np.sin(x1_f + 1) + np.cos(2 * x2_f)) * np.tan(np.log(x3_f)) + (np.sin(x2_f + 1) + np.cos(2 * x1_f)) * np.exp(1 + np.sin(x3_f))) print("Function value by autodiff =", f) print("Function value by numpy =", f_np) lhs = (ad.func(y, { x1: x1_f + t * x1_v, x2: x2_f + t * x2_v, x3: x3_f + t * x3_v }) - f) / t rhs = (np.sum(grad[x1] * x1_v) + np.sum(grad[x2] * x2_v) + np.sum(grad[x3] * x3_v)) print("(V(w + tv)-V(w)) / t =", lhs) print("<dV(w), v> =", rhs) print("|lfs - rhs| / |lhs| =", np.abs(lhs - rhs) / np.abs(lhs))
def test_get_common_ancestor(backendopt): A = ad.Variable(name="A", shape=[3, 2]) X1 = ad.Variable(name="X1", shape=[3, 2, 2]) X2 = ad.Variable(name="X2", shape=[3, 3, 2, 2]) X3 = ad.Variable(name="X3", shape=[3, 2, 2]) """ The network and indices positions are as follows: g - A | c d e | | | X1 - a - X2 - b - X3 | | | h i j | l - A """ einsum_node = ad.einsum('lj,ge,bej,abdi,ach->cdhigl', A, A, X3, X2, X1) opt_einsum = generate_optimal_tree(einsum_node) sub_einsum = get_common_ancestor(opt_einsum, einsum_node.inputs, A) assert sorted(get_all_inputs(sub_einsum), key=lambda node: node.name) == sorted( [A, A, X3], key=lambda node: node.name)
def run_problem3(): print("-" * 18 + " Problem 3 " + "-" * 18) x = ad.Variable() w1 = ad.Variable() w2 = ad.Variable() y = ad.average(ad.matmul(ad.relu(ad.matmul(x, w1)), w2) + x) x_f = np.random.randn(1, 64) w1_f = np.random.randn(64, 128) w2_f = np.random.randn(128, 64) x_v = noise_like(x_f) w1_v = noise_like(w1_f) w2_v = noise_like(w2_f) f, grad = ad.func(y, {x: x_f, w1: w1_f, w2: w2_f}, get_gradient=True) f_np = np.average( np.matmul(np.maximum(np.matmul(x_f, w1_f), 0), w2_f) + x_f) print("Function value by autodiff =", f) print("Function value by numpy =", f_np) lhs = (ad.func(y, { x: x_f + t * x_v, w1: w1_f + t * w1_v, w2: w2_f + t * w2_v }) - f) / t rhs = (np.sum(grad[x] * x_v) + np.sum(grad[w1] * w1_v) + np.sum(grad[w2] * w2_v)) print("(V(w + tv)-V(w)) / t =", lhs) print("<dV(w), v> =", rhs) print("|lfs - rhs| / |lhs| =", np.abs(lhs - rhs) / np.abs(lhs))
def auto_diff_lr(): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') # 注意,以下实现某些情况会有很大的数值误差, # 所以一般真实系统实现会提供高阶算子,从而减少数值误差 h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) L = y * ad.log(h) + (1 - y) * ad.log(1 - h) w_grad, = ad.gradients(L, [w]) executor = ad.Executor([L, w_grad]) N = 100 X_val, Y_val = gen_2d_data(N) w_val = np.ones(3) plot(N, X_val, Y_val, w_val) executor = ad.Executor([L, w_grad]) test_accuracy(w_val, X_val, Y_val) alpha = 0.01 max_iters = 300 for iteration in range(max_iters): acc_L_val = 0 for i in range(N): x_val = X_val[i] y_val = np.array(Y_val[i]) L_val, w_grad_val = executor.run(feed_dict={w: w_val, x: x_val, y: y_val}) w_val += alpha * w_grad_val acc_L_val += L_val print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val)) test_accuracy(w_val, X_val, Y_val) plot(N, X_val, Y_val, w_val, True)
def test_executor_dependent(backendopt): for datatype in backendopt: T.set_backend(datatype) A = ad.Variable(name="A", shape=[3, 3]) B = ad.Variable(name="B", shape=[3, 3]) AA = ad.einsum('ab,ab->', A, A) BB = ad.einsum('ab,ab->', B, B) AB = ad.einsum('ab,ab->', A, B) out_A = AA + AB out_B = AB + AA executor = ad.Executor({out_A, out_B}) data = gen_dict([A, B]) A_val, = executor.run(feed_dict=data, reset_graph=False, out_nodes=[out_A]) data2 = gen_dict([A]) data2.update({B: data[B]}) B_val, = executor.run(feed_dict=data2, out_nodes=[out_B]) # This is checking A's val is not reused in B_val computationA. assert A_val != B_val
def setUp(self): """ Creating true multi-layer perceptron with one hidden layer """ np.random.seed(1337) batch_size = 16 input_size = 20 hidden_size = 40 output_size = 5 self.x_val = np.random.randn(batch_size, input_size) self.w1_val = np.random.randn(input_size, hidden_size) self.w2_val = np.random.randn(hidden_size, output_size) self.tf_x = tf.constant(self.x_val) self.tf_w1 = tf.constant(self.w1_val) self.tf_w2 = tf.constant(self.w2_val) self.tf_h = tf.nn.sigmoid(self.tf_x @ self.tf_w1) self.tf_o = tf.nn.sigmoid(self.tf_h @ self.tf_w2) self.my_x = ad.Variable(self.x_val, name="x_val") self.my_w1 = ad.Variable(self.w1_val, name="w1_val") self.my_w2 = ad.Variable(self.w2_val, name="w2_val") self.var_h = ad.Sigmoid(self.my_x @ self.my_w1) self.var_o = ad.Sigmoid(self.var_h @ self.my_w2) self.my_graph = self.var_o self.tf_graph = self.tf_o
def test_sub_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 - x2 z = x3 - y jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x3_val - x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def setUp(self): """ Graph looks like this: x_val w_val \ / MatMul | Sigmoid x_val.shape = (2, 3) w_val.shape = (3, 5) MatMul.shape = (2, 5) Sigmoid.shape = (2, 5) """ np.random.seed(1337) self.x_val = np.random.randn(2, 3) self.w_val = np.random.randn(3, 5) self.b_val = np.random.randn(5) self.tf_x = tf.constant(self.x_val, dtype=tf.float64) self.tf_w = tf.constant(self.w_val, dtype=tf.float64) self.tf_b = tf.constant(self.b_val, dtype=tf.float64) self.tf_mul = self.tf_x @ self.tf_w + self.tf_b self.tf_graph = tf.nn.sigmoid(self.tf_mul) self.my_x = ad.Variable(self.x_val, name="x_val") self.my_w = ad.Variable(self.w_val, name="w_val") self.my_b = ad.Variable(self.b_val, name="b_val") self.var_mul = self.my_x @ self.my_w + self.my_b self.my_graph = ad.Sigmoid(self.var_mul)
def test_three_mul_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor([[1., 2.], [3., 4.]]) x2_val = T.tensor([[5., 6.], [7., 8.]]) x3_val = T.tensor([[9., 10.], [11., 12.]]) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij,ij->abij", I, I, x2_val, x3_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_add_mul_mix_3(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") z = x2 * x2 + x2 + x3 + 3 y = z * z + x3 grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) z_val = x2_val * x2_val + x2_val + x3_val + 3 expected_yval = z_val * z_val + x3_val expected_grad_x2_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) * ( 2 * x2_val + 1) expected_grad_x3_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) + 1 assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_yval) assert np.array_equal(grad_x2_val, expected_grad_x2_val) assert np.array_equal(grad_x3_val, expected_grad_x3_val)
def test_three_mul_jacobian_scalars(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) x3 = ad.Variable(name="x3", shape=[]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor(1.) x2_val = T.tensor(2.) x3_val = T.tensor(3.) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) expected_jacobian_x1_val = x2_val * x3_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_tree_distribution_two_layers(dist_op, backendopt): """ [Distributive] ((A + B) * G) * C will produce AGC + BGC Note that (A+B) * G is contracted first. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 2]) b = ad.Variable(name="b", shape=[3, 2]) g = ad.Variable(name="g", shape=[2, 2]) c = ad.Variable(name="c", shape=[2, 3]) interm = ad.einsum('ik, kk->ik', dist_op(a, b), g) output = ad.einsum('ik,kj->ij', interm, c) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) assert tree_eq(output, new_output, [a, b, c, g])
def test_mul_jacobian_one_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[2, 2]) # test both cases of left and right multiply a scalar for y in [x1 * x2, x2 * x1]: jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor(2.) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val) expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_prune_identity(backendopt): for datatype in backendopt: T.set_backend(datatype) a1 = ad.Variable(name="a1", shape=[3, 3]) a2 = ad.Variable(name="a2", shape=[3, 3]) i1 = ad.identity(3) i2 = ad.identity(3) i3 = ad.identity(3) out = ad.einsum("ab,cd,ac,be,ef->abdf", a1, a2, i1, i2, i3) prune_identity_nodes(out) """ Explanation to the einsum above: The identity node i1 means that a and c should be the same dim. we can get rid of i1 and rewrite the expr as ad.einsum("ab,ad,be,ef->abdf", a1, a2, i2, i3). we can also combine i2 and i3 cuz e is useless. Therefore, we can rewrite the expr as ad.einsum("ab,ad,bf->abdf", a1, a2, i2). """ out_expect = ad.einsum("ab,ad,bf->abdf", a1, a2, i2) assert len(out.inputs) == 3 assert tree_eq(out, out_expect, [a1, a2])
def test_jacobian_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3, 3, 3]) x2 = ad.Variable(name="x2", shape=[3, 3, 3]) y = ad.einsum("ikl,jkl->ijk", x1, x2) jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.random((3, 3, 3)) x2_val = T.random((3, 3, 3)) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, }) I = T.identity(3) expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val) expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val)) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_einsum_multiuse(backendopt): """ Test manual fuse. A B inputs |\ | | \ | | \ | | C | / | / output Note that here we assume A is split into 2 vars by some other operations. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) a_copy = ad.Variable(name="a2", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a_copy, c) # New graph out_new = fuse_einsums(output, [a, a_copy, b]) assert tree_eq(output, out_new, [a, a_copy, b])
def test_add_jacobian_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor(1.) x2_val = T.tensor(1.) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) expected_jacobian_x2_val = T.tensor(1.) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_einsum_fuse_graph(backendopt): """ [Fuse einsum used twice] This case is rather subtle. We want to auto fuse A B C | \ / | es | /| | / | es | \ | es Here es is einsum. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[2, 3]) BC = ad.einsum('ik, kj->ij', b, c) # 3x3 ABC = ad.einsum('ik, kj->ij', a, BC) # 3x3 out = ad.einsum('jk, ki->ji', ABC, BC) # 3x3 linearize(out) tree, = find_sub_einsumtree(PseudoNode(out)) out, ins = tree new_z = fuse_einsums(out.node, ins) assert tree_eq(out.node, new_z, [a, b, c])
def test_logistic_loss(): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) L = y * ad.log(h) + (1 - y) * ad.log(1 - h) w_grad, = ad.gradients(L, [w]) executor = ad.Executor([L, w_grad]) y_val = 0 x_val = np.array([2, 3, 4]) w_val = np.random.random(3) L_val, w_grad_val = executor.run(feed_dict={x: x_val, y: y_val, w: w_val}) logistic = 1 / (1 + np.exp(-np.sum(w_val * x_val))) expected_L_val = y_val * np.log(logistic) + (1 - y_val) * np.log(1 - logistic) expected_w_grad = (y_val - logistic) * x_val print(L_val) print(expected_L_val) print(expected_w_grad) print(w_grad_val) assert expected_L_val == L_val assert np.sum(np.abs(expected_w_grad - w_grad_val)) < 1E-9
def __init__(self, number_of_units, number_of_layers, input_dim, output_dim): assert isinstance( number_of_units, int ) and number_of_units >= 2 and number_of_layers >= 0 and isinstance( number_of_layers, int) and isinstance( input_dim, int) and input_dim > 0 and isinstance( output_dim, int) and output_dim > 0 self.number_of_units = number_of_units self.number_of_layers = number_of_layers self.input_dim = input_dim self.output_dim = output_dim #self.X=X self._W = ad.Variable(xavier(self.input_dim, self.number_of_units), name="W") self._B = ad.Variable(xavier(1, self.number_of_units), name="B") self._Wf = ad.Variable(xavier(self.number_of_units, self.output_dim), name="Wf") self._Bf = ad.Variable(xavier(1, self.output_dim), name="Bf") self.layer1 = lstm_layer(self.input_dim, self.number_of_units) self.layers = [] self.layers.append(self.layer1) for i in range(self.number_of_layers): self.layers.append(lstm_layer(self.input_dim, self.number_of_units))
def test_add_3(): A = ad.Variable(name="A", shape=[3]) B = ad.Variable(name="B", shape=[3]) y = A + B + B assert AutodiffParser.parse(y.name, [A, B]).name == y.name