Exemple #1
0
def test_einsum_bkwd1(num, optimize, data):
    x = mg.random.rand(num)
    y_shape = data.draw(broadcastable_shapes(x.shape, min_dims=1, max_dims=1))
    y = Tensor(np.random.rand(*y_shape))

    grad = data.draw(st.floats(-100, 100))
    o = einsum("i, i", x, y, optimize=optimize)
    o.backward(grad)

    def f(x, y):
        return np.einsum("i, i", x, y)

    dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5)
    assert_allclose(y.grad, dy, atol=1e-5, rtol=1e-5)

    o.null_gradients()
    assert x.grad is None
    assert y.grad is None

    # test broadcasting in reverse direction
    o = einsum("i, i", y, x, optimize=optimize)
    o.backward(grad)

    assert x.grad is not None
    assert y.grad is not None

    dy, dx = numerical_gradient_full(f, y.data, x.data, back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5)
    assert_allclose(y.grad, dy, atol=1e-5, rtol=1e-5)

    o.null_gradients()
Exemple #2
0
def compare_backprop(*operands, atol=1e-5, rtol=1e-5, optimize=False):
    """ Compare back-propagation through mygrad-einsum, and compare
        against numerical derivative"""
    if isinstance(operands[0], str):
        # operands form: "ijk, ijk", x, y
        script = operands[0]
        vars = operands[1:]
        vars = tuple(np.asarray(i).astype(float) for i in vars)
        tensors = tuple(Tensor(i) for i in vars)

        def f(*args):
            return np.einsum(script, *args)

        out = einsum(script, *tensors, optimize=optimize)
    else:
        # operands form: op0, sublist0, op1, sublist1, ..., [sublistout]
        end = -1 if len(operands) % 2 else None  # -1 if sublistout is included
        vars = tuple(np.asarray(i).astype(float) for i in operands[:end:2])
        tensors = tuple(Tensor(i) for i in vars)

        def f(*args):
            x = tuple(chain.from_iterable(zip(args, operands[1::2])))
            if end is not None:
                x += (operands[-1], )
            return np.einsum(*x)

        x = tuple(chain.from_iterable(zip(tensors, operands[1::2])))
        if end is not None:
            x += (operands[-1], )
        out = einsum(*x, optimize=optimize)

    grad = np.random.rand(*out.shape)
    #    grad = np.ones(out.shape)
    out.backward(grad)

    numerical_derivs = numerical_gradient_full(f,
                                               *vars,
                                               back_grad=grad,
                                               as_decimal=False)

    for n, (dnum, tensor) in enumerate(zip(numerical_derivs, tensors)):
        assert dnum.shape == tensor.grad.shape
        assert_allclose(
            dnum,
            tensor.grad,
            atol=atol,
            rtol=rtol,
            err_msg="The numerical and mygrad derivatives disagree for "
            "variable index {}".format(n))
Exemple #3
0
def test_einsum_bkwd3(shape, optimize, data):
    script = "ia, ia, i -> a"
    x = mg.random.rand(*shape)

    y_shape = data.draw(broadcastable_shapes(shape, min_dims=2, max_dims=2),
                        label="y_shape")
    y = mg.random.rand(*y_shape)

    z_shape = data.draw(broadcastable_shapes(x.shape[:1],
                                             min_dims=1,
                                             max_dims=1),
                        label="z_shape")
    z = mg.random.rand(*z_shape)

    try:
        o = einsum(script, x, y, z, optimize=optimize)
    except ValueError:
        assume(False)  # skip over invalid einsum shapes
        return

    grad = np.random.rand(*o.shape)
    o.backward(grad)

    def f(x, y, z):
        return np.einsum(script, x, y, z)

    dx, dy, dz = numerical_gradient_full(f,
                                         x.data,
                                         y.data,
                                         z.data,
                                         back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-6)
    assert_allclose(y.grad, dy, atol=1e-6)
    assert_allclose(z.grad, dz, atol=1e-6)
Exemple #4
0
def test_einsum_bkwd3(shape, optimize, data):
    script = "ia, ia, i -> a"
    x = Tensor(np.random.rand(*shape))

    y_shape = data.draw(broadcastable_shape(shape, min_dim=2, max_dim=2))
    y = Tensor(np.random.rand(*y_shape))

    z_shape = data.draw(broadcastable_shape(x.shape[:1], min_dim=1, max_dim=1))
    z = Tensor(np.random.rand(*z_shape))

    grad = np.random.rand(x.shape[1])

    o = einsum(script, x, y, z, optimize=optimize)
    o.backward(grad)

    def f(x, y, z):
        return np.einsum(script, x, y, z)

    dx, dy, dz = numerical_gradient_full(f,
                                         x.data,
                                         y.data,
                                         z.data,
                                         back_grad=grad,
                                         as_decimal=False)

    assert_allclose(x.grad, dx, atol=1e-6)
    assert_allclose(y.grad, dy, atol=1e-6)
    assert_allclose(z.grad, dz, atol=1e-6)
Exemple #5
0
def test_einsum_bkwd5(optimize):
    x = mg.random.rand(5, 3, 4, 6)
    y = mg.random.rand(1, 5, 6, 2)
    grad = np.random.rand(1, 3, 4, 2)

    def f(x, y):
        return np.einsum("iBCj, aijd -> aBCd", x, y)

    o = einsum("iBCj, aijd -> aBCd", x, y, optimize=optimize)
    o.backward(grad)

    dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-6)
    assert_allclose(y.grad, dy, atol=1e-6)
Exemple #6
0
def test_einsum_bkwd6(shape, optimize):
    sig = "ijk, -> j"
    x = mg.random.rand(*shape)
    y = mg.random.rand(1)[0]
    grad = np.random.rand(x.shape[1])

    o = einsum(sig, x, y, optimize=optimize)
    o.backward(grad)

    def f(x, y):
        return np.einsum(sig, x, y)

    dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-6)
    assert_allclose(y.grad, dy, atol=1e-6)
Exemple #7
0
def test_einsum_bkwd2(num, optimize, data):
    y = Tensor(np.random.rand(num))

    # flip so that leading dim of x is broadcastable with y
    x_shape = data.draw(broadcastable_shape(y.shape, min_dim=2, max_dim=2))[::-1]
    x = Tensor(np.random.rand(*x_shape))
    grad = np.random.rand(x.shape[-1])

    o = einsum("ia, i -> a", x, y, optimize=optimize)
    o.backward(grad)

    def f(x, y):
        return np.einsum("ia, i -> a", x, y)

    dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-6)
    assert_allclose(y.grad, dy, atol=1e-6)
Exemple #8
0
def test_einsum_bkwd4(shape, optimize, data):
    script = "ia, i -> "

    x = Tensor(np.random.rand(*shape))

    y_shape = data.draw(broadcastable_shape(x.shape[:1], min_dim=1, max_dim=1))
    y = Tensor(np.random.rand(*y_shape))

    grad = np.random.rand(1).item()

    o = einsum(script, x, y, optimize=optimize)
    o.backward(grad)

    def f(x, y):
        return np.einsum(script, x, y)

    dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad)

    assert_allclose(x.grad, dx, atol=1e-6)
    assert_allclose(y.grad, dy, atol=1e-6)
Exemple #9
0
def compare_einsum(*operands, optimize=False):
    mygrad_out = einsum(*operands)
    assert isinstance(mygrad_out, Tensor)
    operands = tuple(i.data if isinstance(i, Tensor) else i for i in operands)
    assert_allclose(np.einsum(*operands),
                    einsum(*operands, optimize=optimize).data)
Exemple #10
0
def test_redundant_args():
    """
    Test behavior for when einsum receives redundant inputs. An optimization
    was added such that einsum will only compute the gradient for such an entry
    once and scale it accordingly.
    """
    a = mg.arange(4).reshape(2, 2)
    a_copy = copy(a)

    # check standard summation
    o = einsum("ij,ij", a, a)
    assert len(o.creator.cache) == 1
    o.sum().backward()

    o = einsum("ij,ij", a_copy, a_copy * 1)
    assert len(o.creator.cache) == 2
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    # check standard summation using alt signature
    o = einsum(a, [0, 1], a, [0, 1])
    assert len(o.creator.cache) == 1
    o.sum().backward()

    o = einsum(a_copy, [0, 1], a_copy * 1, [0, 1])
    assert len(o.creator.cache) == 2
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    # check matmul (no redundant indices)
    o = einsum("ij,jk", a, a)
    assert len(o.creator.cache) == 2
    o.sum().backward()

    o = a_copy @ a_copy
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    # check traces
    o = einsum("ii,ii", a, a)
    assert len(o.creator.cache) == 1
    o.sum().backward()

    o = einsum("ii,ii", a_copy, a_copy * 1)
    assert len(o.creator.cache) == 2
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    b = Tensor(-1 * np.arange(2).reshape(2, 1))
    b_copy = copy(b)

    # check broadcasting and multiply-redundant input tensors
    # with distinct einsum labels
    o = einsum("ii,ii,i...,i...,...i,...i", a, a, b, b, a, a)
    assert len(o.creator.cache) == 3
    o.sum().backward()

    o = einsum(
        "ii,ii,i...,i...,...i,...i",
        a_copy,
        a_copy * 1,
        b_copy,
        b_copy * 1,
        a_copy,
        1 * a_copy,
    )
    assert len(o.creator.cache) == 6
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)
    assert_allclose(b.grad, b_copy.grad)