Esempio n. 1
0
def test_einsum():

    A = ad.Variable(name="A", shape=[3, 2])
    B = ad.Variable(name="B", shape=[2, 3])
    y = ad.einsum('ik,kj->ij', A, B)

    assert AutodiffParser.parse(y.name, [A, B]).name == y.name
Esempio n. 2
0
def test_einsum_multiuse_auto_copy(backendopt):
    """
        Test autolinearization and auto fuse.
        A    B   inputs 
        |\   |
        | \  |
        |  \ |
        |   C
        |  / 
        | /
        output

        Next: we would need to autoprune.
    """

    for datatype in backendopt:
        T.set_backend(datatype)

        a = ad.Variable(name="a1", shape=[3, 2])
        b = ad.Variable(name="b", shape=[2, 3])

        c = ad.einsum('ik,kj->ij', a, b)
        output = ad.einsum('ik,ij->kj', a, c)

        linearize(output)
        all_nodes = find_topo_sort([output])
        cloned_nodes = [
            tmp for tmp in all_nodes if isinstance(tmp, ad.CloneNode)
        ]

        out_new = fuse_einsums(output, [*cloned_nodes, b])
        # Test that every inputs is now fused.
        assert all([not isinstance(x, ad.EinsumNode) for x in out_new.inputs])

        assert tree_eq(output, out_new, [*cloned_nodes, b])
Esempio n. 3
0
    def __init__(self, dim, size, rank):

        cg = CharacterGetter()

        self.X = ad.Variable(name='X', shape=[size for _ in range(dim)])
        X_subscripts = "".join([cg.getchar() for _ in range(dim)])

        self.core = ad.Variable(name='core', shape=[rank for _ in range(dim)])
        core_subscripts = "".join([cg.getchar() for _ in range(dim)])

        self.A_list = []
        A_list_subscripts = []
        for i in range(dim):
            node = ad.Matrix(name=f'A{i}',
                             shape=[size, rank],
                             orthonormal='row')
            self.A_list.append(node)
            A_list_subscripts.append(f"{X_subscripts[i]}{core_subscripts[i]}")

        input_subs = ','.join([
            subscripts for subscripts in A_list_subscripts + [core_subscripts]
        ])
        self.einsum_subscripts = input_subs + '->' + X_subscripts

        self.output = ad.einsum(self.einsum_subscripts,
                                *(self.A_list + [self.core]))

        self.residual = self.output - self.X

        self.intermediates, self.losses = [], []
        for i in range(dim):
            intermediate, loss = self._build_graph_w_intermediate(i)
            self.intermediates.append(intermediate)
            self.losses.append(loss)
def test_tree_distribution_ppE(dist_op, backendopt):
    """
        [Distributive] ((A + B) + C) * G

        will produce
        
        AG + BG + CG

        Note that (A+B) has parent (A + B) + C.
    """

    for datatype in backendopt:
        T.set_backend(datatype)

        a = ad.Variable(name="a", shape=[3, 2])
        b = ad.Variable(name="b", shape=[3, 2])
        c = ad.Variable(name="c", shape=[3, 2])
        g = ad.Variable(name="g", shape=[2, 2])

        output = ad.einsum('ik,kk->ik', dist_op(dist_op(a, b), c), g)

        new_output = distribute_tree(output)
        assert isinstance(new_output, dist_op)

        assert tree_eq(output, new_output, [a, b, c, g])
Esempio n. 5
0
def test_grad_of_grad():
    x2 = ad.Variable(name = "x2")
    x3 = ad.Variable(name = "x3")
    y = x2 * x2 + x2 * x3

    grad_x2, grad_x3 = ad.gradients(y, [x2, x3])
    grad_x2_x2, grad_x2_x3 = ad.gradients(grad_x2, [x2, x3])

    executor = ad.Executor([y, grad_x2, grad_x3, grad_x2_x2, grad_x2_x3])
    x2_val = 2 * np.ones(3)
    x3_val = 3 * np.ones(3)
    y_val, grad_x2_val, grad_x3_val, grad_x2_x2_val, grad_x2_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val})

    expected_yval = x2_val * x2_val + x2_val * x3_val
    expected_grad_x2_val = 2 * x2_val + x3_val
    expected_grad_x3_val = x2_val
    expected_grad_x2_x2_val = 2 * np.ones_like(x2_val)
    expected_grad_x2_x3_val = 1 * np.ones_like(x2_val)

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_yval)
    assert np.array_equal(grad_x2_val, expected_grad_x2_val)
    assert np.array_equal(grad_x3_val, expected_grad_x3_val)
    assert np.array_equal(grad_x2_x2_val, expected_grad_x2_x2_val)
    assert np.array_equal(grad_x2_x3_val, expected_grad_x2_x3_val)
Esempio n. 6
0
def test_add_mul_mix_2():
    x1 = ad.Variable(name="x1")
    x2 = ad.Variable(name="x2")
    x3 = ad.Variable(name="x3")
    x4 = ad.Variable(name="x4")
    y = x1 + x2 * x3 * x4

    grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4])

    executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4])
    x1_val = 1 * np.ones(3)
    x2_val = 2 * np.ones(3)
    x3_val = 3 * np.ones(3)
    x4_val = 4 * np.ones(3)
    y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(
        feed_dict={
            x1: x1_val,
            x2: x2_val,
            x3: x3_val,
            x4: x4_val
        })

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, x1_val + x2_val * x3_val * x4_val)
    assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
    assert np.array_equal(grad_x2_val, x3_val * x4_val)
    assert np.array_equal(grad_x3_val, x2_val * x4_val)
    assert np.array_equal(grad_x4_val, x2_val * x3_val)
Esempio n. 7
0
def test_matmul_two_vars():
    x2 = ad.Variable(name="x2")
    x3 = ad.Variable(name="x3")
    y = ad.matmul_op(x2, x3)

    grad_x2, grad_x3 = ad.gradients(y, [x2, x3])

    executor = ad.Executor([y, grad_x2, grad_x3])
    x2_val = np.array([[1, 2], [3, 4], [5, 6]])  # 3x2
    x3_val = np.array([[7, 8, 9], [10, 11, 12]])  # 2x3

    y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={
        x2: x2_val,
        x3: x3_val
    })

    expected_yval = np.matmul(x2_val, x3_val)
    expected_grad_x2_val = np.matmul(
        np.ones_like(expected_yval), np.transpose(x3_val))
    expected_grad_x3_val = np.matmul(
        np.transpose(x2_val), np.ones_like(expected_yval))

    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_yval)
    assert np.array_equal(grad_x2_val, expected_grad_x2_val)
    assert np.array_equal(grad_x3_val, expected_grad_x3_val)
Esempio n. 8
0
def test_einsum_gen_corner_case(backendopt):
    """
    Note: Numpy contraction path cannot find the opt path for this expression.
        It will output the same expression as the input.
    --------    E    --------
    |       |       |       |
    a       b       c       d
    |       |       |       |
    A - e - B - f - C - g - D
    |       |       |       |
    h       i       j       k
    |       |       |       |
    """
    size = 5
    A = ad.Variable(name="A", shape=[size, size, size])
    B = ad.Variable(name="B", shape=[size, size, size, size])
    C = ad.Variable(name="C", shape=[size, size, size, size])
    D = ad.Variable(name="D", shape=[size, size, size])
    E = ad.Variable(name="E", shape=[size, size, size, size])

    output = ad.einsum('aeh,bfie,cgjf,dgk,abcd->hijk', A, B, C, D, E)
    new_output = generate_optimal_tree(output)

    for node in find_topo_sort([new_output]):
        if not isinstance(node, ad.VariableNode):
            assert (len(node.inputs) == 2)
Esempio n. 9
0
def test_add_jacobian(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[2, 2])
        x2 = ad.Variable(name="x2", shape=[2, 2])
        y = x1 + x2

        jacobian_x2, = ad.jacobians(y, [x2])

        executor = ad.Executor([y, jacobian_x2])

        x1_val = T.tensor([[1, 1], [1, 1]])
        x2_val = T.tensor([[1, 1], [1, 1]])
        y_val, jacobian_x2_val = executor.run(feed_dict={
            x1: x1_val,
            x2: x2_val
        })

        I = T.identity(2)
        expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I)

        assert isinstance(y, ad.Node)
        assert isinstance(jacobian_x2, ad.Node)
        assert T.array_equal(y_val, x1_val + x2_val)
        assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
Esempio n. 10
0
def run_problem4():
    print("-" * 18 + " Problem 4 " + "-" * 18)
    x1 = ad.Variable()
    x2 = ad.Variable()
    x3 = ad.Variable()
    y = ((ad.sin(x1 + 1) + ad.cos(2 * x2)) * ad.tan(ad.log(x3)) +
         (ad.sin(x2 + 1) + ad.cos(2 * x1)) * ad.exp(1 + ad.sin(x3)))

    x1_f = np.random.rand()
    x2_f = np.random.rand()
    x3_f = np.random.rand()

    x1_v = noise_like(x1_f)
    x2_v = noise_like(x2_f)
    x3_v = noise_like(x3_f)

    f, grad = ad.func(y, {x1: x1_f, x2: x2_f, x3: x3_f}, get_gradient=True)
    f_np = ((np.sin(x1_f + 1) + np.cos(2 * x2_f)) * np.tan(np.log(x3_f)) +
            (np.sin(x2_f + 1) + np.cos(2 * x1_f)) * np.exp(1 + np.sin(x3_f)))
    print("Function value by autodiff =", f)
    print("Function value by numpy    =", f_np)

    lhs = (ad.func(y, {
        x1: x1_f + t * x1_v,
        x2: x2_f + t * x2_v,
        x3: x3_f + t * x3_v
    }) - f) / t
    rhs = (np.sum(grad[x1] * x1_v) + np.sum(grad[x2] * x2_v) +
           np.sum(grad[x3] * x3_v))
    print("(V(w + tv)-V(w)) / t       =", lhs)
    print("<dV(w), v>                 =", rhs)
    print("|lfs - rhs| / |lhs|        =", np.abs(lhs - rhs) / np.abs(lhs))
Esempio n. 11
0
def test_get_common_ancestor(backendopt):

    A = ad.Variable(name="A", shape=[3, 2])

    X1 = ad.Variable(name="X1", shape=[3, 2, 2])
    X2 = ad.Variable(name="X2", shape=[3, 3, 2, 2])
    X3 = ad.Variable(name="X3", shape=[3, 2, 2])
    """
        The network and indices positions are as follows:

                      g - A
                          |
        c        d        e
        |        |        |
        X1 - a - X2 - b - X3
        |        |        |
        h        i        j
                          |
                      l - A

    """
    einsum_node = ad.einsum('lj,ge,bej,abdi,ach->cdhigl', A, A, X3, X2, X1)
    opt_einsum = generate_optimal_tree(einsum_node)
    sub_einsum = get_common_ancestor(opt_einsum, einsum_node.inputs, A)

    assert sorted(get_all_inputs(sub_einsum),
                  key=lambda node: node.name) == sorted(
                      [A, A, X3], key=lambda node: node.name)
Esempio n. 12
0
def run_problem3():
    print("-" * 18 + " Problem 3 " + "-" * 18)
    x = ad.Variable()
    w1 = ad.Variable()
    w2 = ad.Variable()
    y = ad.average(ad.matmul(ad.relu(ad.matmul(x, w1)), w2) + x)

    x_f = np.random.randn(1, 64)
    w1_f = np.random.randn(64, 128)
    w2_f = np.random.randn(128, 64)

    x_v = noise_like(x_f)
    w1_v = noise_like(w1_f)
    w2_v = noise_like(w2_f)

    f, grad = ad.func(y, {x: x_f, w1: w1_f, w2: w2_f}, get_gradient=True)
    f_np = np.average(
        np.matmul(np.maximum(np.matmul(x_f, w1_f), 0), w2_f) + x_f)
    print("Function value by autodiff =", f)
    print("Function value by numpy    =", f_np)

    lhs = (ad.func(y, {
        x: x_f + t * x_v,
        w1: w1_f + t * w1_v,
        w2: w2_f + t * w2_v
    }) - f) / t
    rhs = (np.sum(grad[x] * x_v) + np.sum(grad[w1] * w1_v) +
           np.sum(grad[w2] * w2_v))
    print("(V(w + tv)-V(w)) / t       =", lhs)
    print("<dV(w), v>                 =", rhs)
    print("|lfs - rhs| / |lhs|        =", np.abs(lhs - rhs) / np.abs(lhs))
Esempio n. 13
0
def auto_diff_lr():
    x = ad.Variable(name='x')
    w = ad.Variable(name='w')
    y = ad.Variable(name='y')

    # 注意,以下实现某些情况会有很大的数值误差,
    # 所以一般真实系统实现会提供高阶算子,从而减少数值误差

    h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x)))
    L = y * ad.log(h) + (1 - y) * ad.log(1 - h)
    w_grad, = ad.gradients(L, [w])
    executor = ad.Executor([L, w_grad])

    N = 100
    X_val, Y_val = gen_2d_data(N)
    w_val = np.ones(3)

    plot(N, X_val, Y_val, w_val)
    executor = ad.Executor([L, w_grad])
    test_accuracy(w_val, X_val, Y_val)
    alpha = 0.01
    max_iters = 300
    for iteration in range(max_iters):
        acc_L_val = 0
        for i in range(N):
            x_val = X_val[i]
            y_val = np.array(Y_val[i])
            L_val, w_grad_val = executor.run(feed_dict={w: w_val, x: x_val, y: y_val})
            w_val += alpha * w_grad_val
            acc_L_val += L_val
        print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val))
    test_accuracy(w_val, X_val, Y_val)
    plot(N, X_val, Y_val, w_val, True)
Esempio n. 14
0
def test_executor_dependent(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        A = ad.Variable(name="A", shape=[3, 3])
        B = ad.Variable(name="B", shape=[3, 3])
        AA = ad.einsum('ab,ab->', A, A)
        BB = ad.einsum('ab,ab->', B, B)
        AB = ad.einsum('ab,ab->', A, B)

        out_A = AA + AB
        out_B = AB + AA

        executor = ad.Executor({out_A, out_B})

        data = gen_dict([A, B])
        A_val, = executor.run(feed_dict=data,
                              reset_graph=False,
                              out_nodes=[out_A])
        data2 = gen_dict([A])
        data2.update({B: data[B]})
        B_val, = executor.run(feed_dict=data2, out_nodes=[out_B])
        # This is checking A's val is not reused in B_val computationA.
        assert A_val != B_val
Esempio n. 15
0
    def setUp(self):
        """
        Creating true multi-layer perceptron with one hidden layer

        """
        np.random.seed(1337)

        batch_size = 16
        input_size = 20
        hidden_size = 40
        output_size = 5
        self.x_val = np.random.randn(batch_size, input_size)
        self.w1_val = np.random.randn(input_size, hidden_size)
        self.w2_val = np.random.randn(hidden_size, output_size)

        self.tf_x = tf.constant(self.x_val)
        self.tf_w1 = tf.constant(self.w1_val)
        self.tf_w2 = tf.constant(self.w2_val)
        self.tf_h = tf.nn.sigmoid(self.tf_x @ self.tf_w1)
        self.tf_o = tf.nn.sigmoid(self.tf_h @ self.tf_w2)

        self.my_x = ad.Variable(self.x_val, name="x_val")
        self.my_w1 = ad.Variable(self.w1_val, name="w1_val")
        self.my_w2 = ad.Variable(self.w2_val, name="w2_val")
        self.var_h = ad.Sigmoid(self.my_x @ self.my_w1)
        self.var_o = ad.Sigmoid(self.var_h @ self.my_w2)

        self.my_graph = self.var_o
        self.tf_graph = self.tf_o
Esempio n. 16
0
def test_sub_jacobian_w_chain(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[2, 2])
        x2 = ad.Variable(name="x2", shape=[2, 2])
        x3 = ad.Variable(name="x3", shape=[2, 2])
        y = x1 - x2
        z = x3 - y

        jacobian_x2, = ad.jacobians(z, [x2])

        executor = ad.Executor([z, jacobian_x2])

        x1_val = T.tensor([[1, 1], [1, 1]])
        x2_val = T.tensor([[1, 1], [1, 1]])
        x3_val = T.tensor([[1, 1], [1, 1]])
        z_val, jacobian_x2_val = executor.run(feed_dict={
            x1: x1_val,
            x2: x2_val,
            x3: x3_val
        })

        I = T.identity(2)
        expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I)

        assert isinstance(z, ad.Node)
        assert isinstance(jacobian_x2, ad.Node)
        assert T.array_equal(z_val, x3_val - x1_val + x2_val)
        assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
Esempio n. 17
0
    def setUp(self):
        """
        Graph looks like this:
        x_val    w_val
         \  /
         MatMul
          |
        Sigmoid

        x_val.shape = (2, 3)
        w_val.shape = (3, 5)
        MatMul.shape = (2, 5)
        Sigmoid.shape = (2, 5)

        """
        np.random.seed(1337)
        self.x_val = np.random.randn(2, 3)
        self.w_val = np.random.randn(3, 5)
        self.b_val = np.random.randn(5)

        self.tf_x = tf.constant(self.x_val, dtype=tf.float64)
        self.tf_w = tf.constant(self.w_val, dtype=tf.float64)
        self.tf_b = tf.constant(self.b_val, dtype=tf.float64)
        self.tf_mul = self.tf_x @ self.tf_w + self.tf_b
        self.tf_graph = tf.nn.sigmoid(self.tf_mul)

        self.my_x = ad.Variable(self.x_val, name="x_val")
        self.my_w = ad.Variable(self.w_val, name="w_val")
        self.my_b = ad.Variable(self.b_val, name="b_val")

        self.var_mul = self.my_x @ self.my_w + self.my_b
        self.my_graph = ad.Sigmoid(self.var_mul)
Esempio n. 18
0
def test_three_mul_jacobian(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[2, 2])
        x2 = ad.Variable(name="x2", shape=[2, 2])
        x3 = ad.Variable(name="x3", shape=[2, 2])
        y = x1 * x2 * x3

        jacobian_x1, = ad.jacobians(y, [x1])
        executor = ad.Executor([y, jacobian_x1])

        x1_val = T.tensor([[1., 2.], [3., 4.]])
        x2_val = T.tensor([[5., 6.], [7., 8.]])
        x3_val = T.tensor([[9., 10.], [11., 12.]])

        y_val, jacobian_x1_val = executor.run(feed_dict={
            x1: x1_val,
            x2: x2_val,
            x3: x3_val
        })

        I = T.identity(2)
        expected_jacobian_x1_val = T.einsum("ai,bj,ij,ij->abij", I, I, x2_val,
                                            x3_val)

        assert isinstance(y, ad.Node)
        assert T.array_equal(y_val, x1_val * x2_val * x3_val)
        assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
Esempio n. 19
0
def test_add_mul_mix_3():
    x2 = ad.Variable(name="x2")
    x3 = ad.Variable(name="x3")
    z = x2 * x2 + x2 + x3 + 3
    y = z * z + x3

    grad_x2, grad_x3 = ad.gradients(y, [x2, x3])

    executor = ad.Executor([y, grad_x2, grad_x3])
    x2_val = 2 * np.ones(3)
    x3_val = 3 * np.ones(3)
    y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={
        x2: x2_val,
        x3: x3_val
    })

    z_val = x2_val * x2_val + x2_val + x3_val + 3
    expected_yval = z_val * z_val + x3_val
    expected_grad_x2_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) * (
        2 * x2_val + 1)
    expected_grad_x3_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) + 1
    assert isinstance(y, ad.Node)
    assert np.array_equal(y_val, expected_yval)
    assert np.array_equal(grad_x2_val, expected_grad_x2_val)
    assert np.array_equal(grad_x3_val, expected_grad_x3_val)
Esempio n. 20
0
def test_three_mul_jacobian_scalars(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[])
        x2 = ad.Variable(name="x2", shape=[])
        x3 = ad.Variable(name="x3", shape=[])
        y = x1 * x2 * x3

        jacobian_x1, = ad.jacobians(y, [x1])
        executor = ad.Executor([y, jacobian_x1])

        x1_val = T.tensor(1.)
        x2_val = T.tensor(2.)
        x3_val = T.tensor(3.)

        y_val, jacobian_x1_val = executor.run(feed_dict={
            x1: x1_val,
            x2: x2_val,
            x3: x3_val
        })

        expected_jacobian_x1_val = x2_val * x3_val

        assert isinstance(y, ad.Node)
        assert T.array_equal(y_val, x1_val * x2_val * x3_val)
        assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
Esempio n. 21
0
def test_tree_distribution_two_layers(dist_op, backendopt):
    """
        [Distributive] ((A + B) * G) * C

        will produce
        
        AGC + BGC

        Note that (A+B) * G is contracted first.
    """

    for datatype in backendopt:
        T.set_backend(datatype)

        a = ad.Variable(name="a", shape=[3, 2])
        b = ad.Variable(name="b", shape=[3, 2])
        g = ad.Variable(name="g", shape=[2, 2])
        c = ad.Variable(name="c", shape=[2, 3])

        interm = ad.einsum('ik, kk->ik', dist_op(a, b), g)
        output = ad.einsum('ik,kj->ij', interm, c)

        new_output = distribute_tree(output)
        assert isinstance(new_output, dist_op)

        assert tree_eq(output, new_output, [a, b, c, g])
Esempio n. 22
0
def test_mul_jacobian_one_scalar(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[])
        x2 = ad.Variable(name="x2", shape=[2, 2])

        # test both cases of left and right multiply a scalar
        for y in [x1 * x2, x2 * x1]:

            jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2])
            executor = ad.Executor([y, jacobian_x1, jacobian_x2])

            x1_val = T.tensor(2.)
            x2_val = T.tensor([[5., 6.], [7., 8.]])
            y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={
                x1: x1_val,
                x2: x2_val
            })

            I = T.identity(2)
            expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val)
            expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I)

            assert isinstance(y, ad.Node)
            assert T.array_equal(y_val, x1_val * x2_val)
            assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
            assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
Esempio n. 23
0
def test_prune_identity(backendopt):
    for datatype in backendopt:
        T.set_backend(datatype)

        a1 = ad.Variable(name="a1", shape=[3, 3])
        a2 = ad.Variable(name="a2", shape=[3, 3])
        i1 = ad.identity(3)
        i2 = ad.identity(3)
        i3 = ad.identity(3)

        out = ad.einsum("ab,cd,ac,be,ef->abdf", a1, a2, i1, i2, i3)
        prune_identity_nodes(out)
        """
        Explanation to the einsum above:
        The identity node i1 means that a and c should be the same dim.
        we can get rid of i1 and rewrite the expr as 
        ad.einsum("ab,ad,be,ef->abdf", a1, a2, i2, i3).
        we can also combine i2 and i3 cuz e is useless. Therefore,
        we can rewrite the expr as
        ad.einsum("ab,ad,bf->abdf", a1, a2, i2).
        """
        out_expect = ad.einsum("ab,ad,bf->abdf", a1, a2, i2)
        assert len(out.inputs) == 3

        assert tree_eq(out, out_expect, [a1, a2])
Esempio n. 24
0
def test_jacobian_einsum(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[3, 3, 3])
        x2 = ad.Variable(name="x2", shape=[3, 3, 3])
        y = ad.einsum("ikl,jkl->ijk", x1, x2)

        jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2])
        executor = ad.Executor([y, jacobian_x1, jacobian_x2])

        x1_val = T.random((3, 3, 3))
        x2_val = T.random((3, 3, 3))
        y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={
            x1: x1_val,
            x2: x2_val,
        })

        I = T.identity(3)
        expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val)
        expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val)

        assert isinstance(y, ad.Node)
        assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val))
        assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
        assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
Esempio n. 25
0
def test_einsum_multiuse(backendopt):
    """
        Test manual fuse.
        A    B   inputs 
        |\   |
        | \  |
        |  \ |
        |   C
        |  / 
        | /
        output

        Note that here we assume A is split into 2 vars by some other operations.
    """

    for datatype in backendopt:
        T.set_backend(datatype)

        a = ad.Variable(name="a1", shape=[3, 2])
        a_copy = ad.Variable(name="a2", shape=[3, 2])
        b = ad.Variable(name="b", shape=[2, 3])

        c = ad.einsum('ik,kj->ij', a, b)
        output = ad.einsum('ik,ij->kj', a_copy, c)
        # New graph
        out_new = fuse_einsums(output, [a, a_copy, b])
        assert tree_eq(output, out_new, [a, a_copy, b])
Esempio n. 26
0
def test_add_jacobian_scalar(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x1 = ad.Variable(name="x1", shape=[])
        x2 = ad.Variable(name="x2", shape=[])
        y = x1 + x2

        jacobian_x2, = ad.jacobians(y, [x2])

        executor = ad.Executor([y, jacobian_x2])

        x1_val = T.tensor(1.)
        x2_val = T.tensor(1.)
        y_val, jacobian_x2_val = executor.run(feed_dict={
            x1: x1_val,
            x2: x2_val
        })

        expected_jacobian_x2_val = T.tensor(1.)

        assert isinstance(y, ad.Node)
        assert isinstance(jacobian_x2, ad.Node)
        assert T.array_equal(y_val, x1_val + x2_val)
        assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
Esempio n. 27
0
def test_einsum_fuse_graph(backendopt):
    """
        [Fuse einsum used twice]
        This case is rather subtle.
        We want to auto fuse
            A   B   C 
            |    \ /  
            |     es  
            |    /|   
            |  /  |   
            es    |   
              \   |   
                es
        Here es is einsum.
    """

    for datatype in backendopt:
        T.set_backend(datatype)
        a = ad.Variable(name="a", shape=[3, 3])
        b = ad.Variable(name="b", shape=[3, 2])
        c = ad.Variable(name="c", shape=[2, 3])

        BC = ad.einsum('ik, kj->ij', b, c)  # 3x3

        ABC = ad.einsum('ik, kj->ij', a, BC)  # 3x3

        out = ad.einsum('jk, ki->ji', ABC, BC)  # 3x3

        linearize(out)
        tree, = find_sub_einsumtree(PseudoNode(out))
        out, ins = tree
        new_z = fuse_einsums(out.node, ins)

        assert tree_eq(out.node, new_z, [a, b, c])
Esempio n. 28
0
def test_logistic_loss():
    x = ad.Variable(name='x')
    w = ad.Variable(name='w')
    y = ad.Variable(name='y')

    h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x)))
    L = y * ad.log(h) + (1 - y) * ad.log(1 - h)
    w_grad, = ad.gradients(L, [w])
    executor = ad.Executor([L, w_grad])

    y_val = 0
    x_val = np.array([2, 3, 4])
    w_val = np.random.random(3)

    L_val, w_grad_val = executor.run(feed_dict={x: x_val, y: y_val, w: w_val})

    logistic = 1 / (1 + np.exp(-np.sum(w_val * x_val)))
    expected_L_val = y_val * np.log(logistic) + (1 - y_val) * np.log(1 - logistic)
    expected_w_grad = (y_val - logistic) * x_val

    print(L_val)
    print(expected_L_val)
    print(expected_w_grad)
    print(w_grad_val)

    assert expected_L_val == L_val
    assert np.sum(np.abs(expected_w_grad - w_grad_val)) < 1E-9
    def __init__(self, number_of_units, number_of_layers, input_dim,
                 output_dim):
        assert isinstance(
            number_of_units, int
        ) and number_of_units >= 2 and number_of_layers >= 0 and isinstance(
            number_of_layers, int) and isinstance(
                input_dim, int) and input_dim > 0 and isinstance(
                    output_dim, int) and output_dim > 0

        self.number_of_units = number_of_units
        self.number_of_layers = number_of_layers
        self.input_dim = input_dim
        self.output_dim = output_dim
        #self.X=X

        self._W = ad.Variable(xavier(self.input_dim, self.number_of_units),
                              name="W")
        self._B = ad.Variable(xavier(1, self.number_of_units), name="B")
        self._Wf = ad.Variable(xavier(self.number_of_units, self.output_dim),
                               name="Wf")
        self._Bf = ad.Variable(xavier(1, self.output_dim), name="Bf")

        self.layer1 = lstm_layer(self.input_dim, self.number_of_units)
        self.layers = []
        self.layers.append(self.layer1)

        for i in range(self.number_of_layers):
            self.layers.append(lstm_layer(self.input_dim,
                                          self.number_of_units))
Esempio n. 30
0
def test_add_3():

    A = ad.Variable(name="A", shape=[3])
    B = ad.Variable(name="B", shape=[3])
    y = A + B + B

    assert AutodiffParser.parse(y.name, [A, B]).name == y.name