def test_einsum_bkwd1(num, optimize, data): x = mg.random.rand(num) y_shape = data.draw(broadcastable_shapes(x.shape, min_dims=1, max_dims=1)) y = Tensor(np.random.rand(*y_shape)) grad = data.draw(st.floats(-100, 100)) o = einsum("i, i", x, y, optimize=optimize) o.backward(grad) def f(x, y): return np.einsum("i, i", x, y) dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5) assert_allclose(y.grad, dy, atol=1e-5, rtol=1e-5) o.null_gradients() assert x.grad is None assert y.grad is None # test broadcasting in reverse direction o = einsum("i, i", y, x, optimize=optimize) o.backward(grad) assert x.grad is not None assert y.grad is not None dy, dx = numerical_gradient_full(f, y.data, x.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-5, rtol=1e-5) assert_allclose(y.grad, dy, atol=1e-5, rtol=1e-5) o.null_gradients()
def compare_backprop(*operands, atol=1e-5, rtol=1e-5, optimize=False): """ Compare back-propagation through mygrad-einsum, and compare against numerical derivative""" if isinstance(operands[0], str): # operands form: "ijk, ijk", x, y script = operands[0] vars = operands[1:] vars = tuple(np.asarray(i).astype(float) for i in vars) tensors = tuple(Tensor(i) for i in vars) def f(*args): return np.einsum(script, *args) out = einsum(script, *tensors, optimize=optimize) else: # operands form: op0, sublist0, op1, sublist1, ..., [sublistout] end = -1 if len(operands) % 2 else None # -1 if sublistout is included vars = tuple(np.asarray(i).astype(float) for i in operands[:end:2]) tensors = tuple(Tensor(i) for i in vars) def f(*args): x = tuple(chain.from_iterable(zip(args, operands[1::2]))) if end is not None: x += (operands[-1], ) return np.einsum(*x) x = tuple(chain.from_iterable(zip(tensors, operands[1::2]))) if end is not None: x += (operands[-1], ) out = einsum(*x, optimize=optimize) grad = np.random.rand(*out.shape) # grad = np.ones(out.shape) out.backward(grad) numerical_derivs = numerical_gradient_full(f, *vars, back_grad=grad, as_decimal=False) for n, (dnum, tensor) in enumerate(zip(numerical_derivs, tensors)): assert dnum.shape == tensor.grad.shape assert_allclose( dnum, tensor.grad, atol=atol, rtol=rtol, err_msg="The numerical and mygrad derivatives disagree for " "variable index {}".format(n))
def test_einsum_bkwd3(shape, optimize, data): script = "ia, ia, i -> a" x = mg.random.rand(*shape) y_shape = data.draw(broadcastable_shapes(shape, min_dims=2, max_dims=2), label="y_shape") y = mg.random.rand(*y_shape) z_shape = data.draw(broadcastable_shapes(x.shape[:1], min_dims=1, max_dims=1), label="z_shape") z = mg.random.rand(*z_shape) try: o = einsum(script, x, y, z, optimize=optimize) except ValueError: assume(False) # skip over invalid einsum shapes return grad = np.random.rand(*o.shape) o.backward(grad) def f(x, y, z): return np.einsum(script, x, y, z) dx, dy, dz = numerical_gradient_full(f, x.data, y.data, z.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-6) assert_allclose(y.grad, dy, atol=1e-6) assert_allclose(z.grad, dz, atol=1e-6)
def test_einsum_bkwd3(shape, optimize, data): script = "ia, ia, i -> a" x = Tensor(np.random.rand(*shape)) y_shape = data.draw(broadcastable_shape(shape, min_dim=2, max_dim=2)) y = Tensor(np.random.rand(*y_shape)) z_shape = data.draw(broadcastable_shape(x.shape[:1], min_dim=1, max_dim=1)) z = Tensor(np.random.rand(*z_shape)) grad = np.random.rand(x.shape[1]) o = einsum(script, x, y, z, optimize=optimize) o.backward(grad) def f(x, y, z): return np.einsum(script, x, y, z) dx, dy, dz = numerical_gradient_full(f, x.data, y.data, z.data, back_grad=grad, as_decimal=False) assert_allclose(x.grad, dx, atol=1e-6) assert_allclose(y.grad, dy, atol=1e-6) assert_allclose(z.grad, dz, atol=1e-6)
def test_einsum_bkwd5(optimize): x = mg.random.rand(5, 3, 4, 6) y = mg.random.rand(1, 5, 6, 2) grad = np.random.rand(1, 3, 4, 2) def f(x, y): return np.einsum("iBCj, aijd -> aBCd", x, y) o = einsum("iBCj, aijd -> aBCd", x, y, optimize=optimize) o.backward(grad) dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-6) assert_allclose(y.grad, dy, atol=1e-6)
def test_einsum_bkwd6(shape, optimize): sig = "ijk, -> j" x = mg.random.rand(*shape) y = mg.random.rand(1)[0] grad = np.random.rand(x.shape[1]) o = einsum(sig, x, y, optimize=optimize) o.backward(grad) def f(x, y): return np.einsum(sig, x, y) dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-6) assert_allclose(y.grad, dy, atol=1e-6)
def test_einsum_bkwd2(num, optimize, data): y = Tensor(np.random.rand(num)) # flip so that leading dim of x is broadcastable with y x_shape = data.draw(broadcastable_shape(y.shape, min_dim=2, max_dim=2))[::-1] x = Tensor(np.random.rand(*x_shape)) grad = np.random.rand(x.shape[-1]) o = einsum("ia, i -> a", x, y, optimize=optimize) o.backward(grad) def f(x, y): return np.einsum("ia, i -> a", x, y) dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-6) assert_allclose(y.grad, dy, atol=1e-6)
def test_einsum_bkwd4(shape, optimize, data): script = "ia, i -> " x = Tensor(np.random.rand(*shape)) y_shape = data.draw(broadcastable_shape(x.shape[:1], min_dim=1, max_dim=1)) y = Tensor(np.random.rand(*y_shape)) grad = np.random.rand(1).item() o = einsum(script, x, y, optimize=optimize) o.backward(grad) def f(x, y): return np.einsum(script, x, y) dx, dy = numerical_gradient_full(f, x.data, y.data, back_grad=grad) assert_allclose(x.grad, dx, atol=1e-6) assert_allclose(y.grad, dy, atol=1e-6)
def compare_einsum(*operands, optimize=False): mygrad_out = einsum(*operands) assert isinstance(mygrad_out, Tensor) operands = tuple(i.data if isinstance(i, Tensor) else i for i in operands) assert_allclose(np.einsum(*operands), einsum(*operands, optimize=optimize).data)
def test_redundant_args(): """ Test behavior for when einsum receives redundant inputs. An optimization was added such that einsum will only compute the gradient for such an entry once and scale it accordingly. """ a = mg.arange(4).reshape(2, 2) a_copy = copy(a) # check standard summation o = einsum("ij,ij", a, a) assert len(o.creator.cache) == 1 o.sum().backward() o = einsum("ij,ij", a_copy, a_copy * 1) assert len(o.creator.cache) == 2 o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) # check standard summation using alt signature o = einsum(a, [0, 1], a, [0, 1]) assert len(o.creator.cache) == 1 o.sum().backward() o = einsum(a_copy, [0, 1], a_copy * 1, [0, 1]) assert len(o.creator.cache) == 2 o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) # check matmul (no redundant indices) o = einsum("ij,jk", a, a) assert len(o.creator.cache) == 2 o.sum().backward() o = a_copy @ a_copy o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) # check traces o = einsum("ii,ii", a, a) assert len(o.creator.cache) == 1 o.sum().backward() o = einsum("ii,ii", a_copy, a_copy * 1) assert len(o.creator.cache) == 2 o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) b = Tensor(-1 * np.arange(2).reshape(2, 1)) b_copy = copy(b) # check broadcasting and multiply-redundant input tensors # with distinct einsum labels o = einsum("ii,ii,i...,i...,...i,...i", a, a, b, b, a, a) assert len(o.creator.cache) == 3 o.sum().backward() o = einsum( "ii,ii,i...,i...,...i,...i", a_copy, a_copy * 1, b_copy, b_copy * 1, a_copy, 1 * a_copy, ) assert len(o.creator.cache) == 6 o.sum().backward() assert_allclose(a.grad, a_copy.grad) assert_allclose(b.grad, b_copy.grad)