def he_normal(*shape, gain=1): ''' Initialize a :class:`mygrad.Tensor` according to the normal initialization procedure described by He et al. Parameters ---------- shape : Sequence[int] The shape of the output Tensor. Note that `shape` must be at least two-dimensional. gain : Real, optional (default=1) The gain (scaling factor) to apply. Returns ------- mygrad.Tensor, shape=`shape` A Tensor, with values initialized according to the He normal initialization. Extended Description -------------------- He, Zhang, Ren, and Sun put forward this initialization in the paper "Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification" https://arxiv.org/abs/1502.01852 A Tensor :math:`W` initialized in this way should be drawn from a distribution about .. math:: \mathcal{N}(0, \sqrt{\frac{2}{(1+a^2)n_l}}) where :math:`a` is the slope of the rectifier following this layer, which is incorporated using the `gain` variable above. ''' assert len(shape) >= 2, 'He Normal initialization requires at least two dimensions!' tensor = np.empty(shape) std = gain / np.sqrt(shape[1] * tensor[0, 0].size) return Tensor(np.random.normal(0, std, shape))
import numpy as np import pytest from hypothesis import given, settings from numpy.testing import assert_array_equal from mygrad import Tensor real_types = (hnp.integer_dtypes() | hnp.unsigned_integer_dtypes() | hnp.floating_dtypes()) @given( tensor=st.tuples( hnp.arrays(shape=hnp.array_shapes(), dtype=real_types), st.booleans(), ).map(lambda x: Tensor(x[0], constant=x[1])), dest_type=real_types, constant=st.booleans() | st.none(), ) def test_astype(tensor: Tensor, dest_type: type, constant: Optional[bool]): tensor = tensor * 1 # give tensor a creator new_tensor = tensor.astype(dest_type, constant=constant) assert new_tensor.constant is (tensor.constant if constant is None else constant) assert tensor.creator is not None assert new_tensor.creator is None assert new_tensor.dtype is dest_type assert new_tensor.shape == tensor.shape if new_tensor.dtype is tensor.dtype:
def test_repr(): assert repr(Tensor(1)) == 'Tensor(1)' assert repr(Tensor([1])) == 'Tensor([1])' assert repr(Tensor([1, 2])) == 'Tensor([1, 2])' tmp_rep = 'Tensor([[0, 1, 2],\n [3, 4, 5],\n [6, 7, 8]])' assert repr(mg.arange(9).reshape((3, 3))) == tmp_rep
def test_pos(x: np.ndarray, constant: bool): x = Tensor(x, constant=constant) y = +x assert y.creator.variables[0] is x assert_array_equal(y.data, x.data) assert y.constant is x.constant
def test_invalid_gradient_raises(constant: bool): x = Tensor(3, constant=constant) * 2 with (pytest.raises(InvalidGradient) if not constant else does_not_raise()): x.backward("bad")
def wrapper(x, data): arrs = [x] # list of drawn arrays to feed to functions # draw additional arrays according to `num_arrays` for i in range(1, self.num_arrays): arrs.append( data.draw(self.gen_other_array(x, i), label="array-{}".format(i))) arrs = tuple(Tensor(arr) for arr in arrs) arr_copies = tuple(copy(arr) for arr in arrs) if callable(self.kwargs): kwargs = data.draw(self.kwargs(*arrs), label="kwargs") if not isinstance(kwargs, dict): raise TypeError( "`kwargs` was a search strategy. This needs to draw dictionaries," "instead drew: {}".format(kwargs)) else: # The keyword args to be passed to `self.op`. If any provided argument is callable # it is assumed to by a hypothesis search strategy, and all of the drawn arrays will # be passed to the strategy, in order to draw a value for that keyword argument. # Otherwise the provided value is used as-is. kwargs = { k: (data.draw(v(*arrs), label="kwarg: {}".format(k)) if callable(v) else v) for k, v in self.kwargs.items() } if self.assumptions is not None: assume(self.assumptions(*arrs, **kwargs)) for i, arr in enumerate( arrs): # assure arrays don't contain forbidden values for value in self.index_to_no_go.get(i, ()): assume(np.all(arr != value)) # forward pass of the function out = self.op(*arrs, **kwargs) # gradient to be backpropped through this operation grad = data.draw( hnp.arrays( shape=out.shape, dtype=float, elements=st.floats(-10, 10), unique=True, ), label="grad", ) grad_copy = copy(grad) # keep a copy to check for later mutations # compute analytic derivatives via mygrad-backprop if any(out.shape != i.shape for i in arrs): # Broadcasting occurred # Must reduce `out` to scalar # first multiply by `grad` to simulate non-trivial back-prop (grad * out).sum().backward() else: out.backward(grad) if not self.finite_difference: # compute derivatives via numerical approximation of derivative # using the complex-step method numerical_grad = (numerical_gradient_full if self.vary_each_element else numerical_gradient) else: numerical_grad = finite_difference grads_numerical = numerical_grad(self.true_func, *(i.data for i in arrs), back_grad=grad, kwargs=kwargs) # check that the analytic and numeric derivatives match for n, (arr, d_num) in enumerate(zip(arrs, grads_numerical)): assert_allclose( arr.grad, d_num, **self.tolerances, err_msg= "arr-{}: mygrad derivative and numerical derivative do not match" .format(n)) # check that none of the set derivatives is a view of `grad` assert not np.shares_memory( arr.grad, grad), "arr-{}.grad stores a view of grad".format(n) # check that none of the set derivatives are views of one another for arr_i, arr_j in combinations(arrs, 2): assert not np.shares_memory( arr_i.grad, arr_j.grad ), "two input arrays were propagated views of the same gradient" # verify that null_gradients works out.null_gradients() assert all(i.grad is None for i in arrs), "null_gradients failed" # check if any of the input-arrays were mutated for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)): assert_array_equal( arr.data, arr_copy.data, err_msg="arr-{} was mutated during backward prop".format( n), ) # check if `grad` was mutated assert_array_equal( grad, grad_copy, err_msg="`grad` was mutated during backward prop")
def test_init_data_rand(x): assert_equal(actual=Tensor(x).data, desired=x)
def test_0d_iter(): x = Tensor(3) with pytest.raises(TypeError): sum(x)
def wrapper(shapes: hnp.BroadcastableShapes, constant, data: st.DataObject): self.index_to_arr_shapes.update((k, v) for k, v in zip( sorted(self.missing_shapes), shapes.input_shapes)) # list of drawn arrays to feed to functions arrs = data.draw( st.tuples(*(self.arrays(i) for i in range(self.num_arrays))), label="arrays", ) # list of array-copies to check for mutation arr_copies = tuple(copy(arr) for arr in arrs) if callable(self.kwargs): kwargs = data.draw(self.kwargs(*arrs)) if not isinstance(kwargs, dict): raise TypeError( "`kwargs` was a search strategy. This needs to draw dictionaries," "instead drew: {}".format(kwargs)) else: # set or draw keyword args to be passed to functions kwargs = { k: (data.draw(v(*arrs), label="kwarg: {}".format(k)) if callable(v) else v) for k, v in self.kwargs.items() } if self.assumptions is not None: assume(self.assumptions(*arrs, **kwargs)) for i, arr in enumerate( arrs): # assure arrays don't contain forbidden values for value in self.index_to_no_go.get(i, ()): assume(np.all(arr != value)) # execute mygrad and "true" functions. Compare outputs and check mygrad behavior o = self.op(*(Tensor(i) for i in arrs), **kwargs, constant=constant) tensor_out = o.data true_out = self.true_func(*arrs, **kwargs) assert isinstance( o, Tensor ), "`mygrad_func` returned type {}, should return `mygrad.Tensor`".format( type(o)) assert ( o.constant is constant ), "`mygrad_func` returned tensor.constant={}, should be constant={}".format( o.constant, constant) assert_allclose( actual=tensor_out, desired=true_out, err_msg= "`mygrad_func(x)` and `true_func(x)` produce different results", **self.tolerances, ) for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)): assert_array_equal( arr, arr_copy, err_msg="arr-{} was mutated during forward prop".format(n), )
def __init__(self, slope=0.1): """ Parameters ---------- slope : Real, optional (default=0.1) The initial value to use for the slope.""" self.slope = Tensor(slope)
def uniform(*shape, lower_bound=0, upper_bound=1, dtype=np.float32, constant=False): """ Initialize a ``Tensor`` by drawing from a uniform distribution. Parameters ---------- shape : Sequence[int] The output shape. lower_bound : Real, optional (default=0) Lower bound on the output interval, inclusive. upper_bound : Real, optional (default=1) Upper bound on the output interval, exclusive. dtype : data-type, optional (default=float32) The data type of the output tensor; must be a floating-point type. constant : bool, optional (default=False) If `True`, the returned tensor is a constant (it does not back-propagate a gradient). Returns ------- mygrad.Tensor, shape=``shape`` A Tensor, with values drawn uniformly from [lower_bound, upper_bound). Examples -------- >>> from mygrad.nnet.initializers import uniform >>> uniform(2, 3) Tensor([[0.8731087 , 0.30872548, 0.75528544], [0.55404514, 0.7652222 , 0.4955769 ]], dtype=float32) >>> uniform(2, 2, lower_bound=-1, upper_bound=3) Tensor([[ 1.9151938 , -0.28968155], [-0.01240687, -0.24448799]], dtype=float32) >>> uniform(5, dtype="float16", constant=True) Tensor([0.5186, 0.1481, 0.3745, 0.941 , 0.331 ], dtype=float16) """ if lower_bound >= upper_bound: raise ValueError( "Uniform lower bound cannot be greater than upper bound") if not np.issubdtype(dtype, np.floating): raise ValueError( "Uniform initialization requires a floating-point dtype") if len(shape) == 1: shape = shape[0] if isinstance(lower_bound, Tensor): lower_bound = lower_bound.item() if isinstance(upper_bound, Tensor): upper_bound = upper_bound.item() return Tensor(np.random.uniform(lower_bound, upper_bound, shape), dtype=dtype, constant=constant)
def test_glorot_normal(shape, gain, dtype, constant): tensor = he_normal(shape, gain=Tensor(gain), dtype=dtype, constant=constant) assert tensor.shape == shape assert tensor.dtype == dtype assert tensor.constant == constant
def test_all_tensor_creation(constant, dtype): x = np.array([1, 2, 3]) e = empty((3, 2), dtype=dtype, constant=constant) assert e.shape == (3, 2) assert e.constant is constant e = empty_like(e, dtype=dtype, constant=constant) assert e.shape == (3, 2) assert e.constant is constant check_tensor_array(eye(3, dtype=dtype, constant=constant), np.eye(3, dtype=dtype), constant) check_tensor_array( identity(3, dtype=dtype, constant=constant), np.identity(3, dtype=dtype), constant, ) check_tensor_array( ones((4, 5, 6), dtype=dtype, constant=constant), np.ones((4, 5, 6), dtype=dtype), constant, ) check_tensor_array( ones_like(x, dtype=dtype, constant=constant), np.ones_like(x, dtype=dtype), constant, ) check_tensor_array( ones_like(Tensor(x), dtype=dtype, constant=constant), np.ones_like(x, dtype=dtype), constant, ) check_tensor_array( zeros((4, 5, 6), dtype=dtype, constant=constant), np.zeros((4, 5, 6), dtype=dtype), constant, ) check_tensor_array( zeros_like(x, dtype=dtype, constant=constant), np.zeros_like(x, dtype=dtype), constant, ) check_tensor_array( zeros_like(Tensor(x), dtype=dtype, constant=constant), np.zeros_like(x, dtype=dtype), constant, ) check_tensor_array( full((4, 5, 6), 5.0, dtype=dtype, constant=constant), np.full((4, 5, 6), 5.0, dtype=dtype), constant, ) check_tensor_array( full_like(x, 5.0, dtype=dtype, constant=constant), np.full_like(x, 5.0, dtype=dtype), constant, ) check_tensor_array( full_like(Tensor(x), 5.0, dtype=dtype, constant=constant), np.full_like(x, 5.0, dtype=dtype), constant, ) check_tensor_array( arange(3, 7, dtype=dtype, constant=constant), np.arange(3, 7, dtype=dtype), constant, ) check_tensor_array( linspace(3, 7, dtype=dtype, constant=constant), np.linspace(3, 7, dtype=dtype), constant, ) check_tensor_array( logspace(3, 7, dtype=dtype, constant=constant), np.logspace(3, 7, dtype=dtype), constant, ) check_tensor_array( geomspace(3, 7, dtype=dtype, constant=constant), np.geomspace(3, 7, dtype=dtype), constant, )
def test_contains(): t = Tensor([[0, 1, 2], [3, 4, 5]]) assert 0 in t and 0 in t.data assert [0, 1, 2] in t and [0, 1, 2] in t.data assert -1 not in t and -1 not in t.data
def create_node(self, value, constant): n = Node(value, constant=constant) t = Tensor(value, constant=constant) self.node_list.append((n, t)) return n, t
def wrapper(shapes: hnp.BroadcastableShapes, constant, data: st.DataObject): self.index_to_arr_shapes.update((k, v) for k, v in zip( sorted(self.missing_shapes), shapes.input_shapes)) # list of drawn arrays to feed to functions arrs = data.draw( st.tuples(*(self.arrays(i) for i in range(self.num_arrays))), label="arrays", ) # list of array-copies to check for mutation arr_copies = tuple(copy(arr) for arr in arrs) if callable(self.kwargs): kwargs = data.draw(self.kwargs(*arrs)) if not isinstance(kwargs, dict): raise TypeError( f"`kwargs` was a search strategy. This needs to draw dictionaries," f"instead drew: {kwargs}") else: # The keyword args to be passed to `self.op`. If any provided argument is callable # it is assumed to by a hypothesis search strategy, and all of the drawn arrays will # be passed to the strategy, in order to draw a value for that keyword argument. # Otherwise the provided value is used as-is. kwargs = { k: (data.draw(v( *arrs), label=f"kwarg: {k}") if callable(v) else v) for k, v in self.kwargs.items() } if self.assumptions is not None: assume(self.assumptions(*arrs, **kwargs)) for i, arr in enumerate( arrs): # assure arrays don't contain forbidden values for value in self.index_to_no_go.get(i, ()): assume(np.all(arr != value)) if self.permit_0d_array_as_float: # potentially cast a 0D array as a float arrs = tuple( arr.item() if arr.ndim == 0 and data. draw(st.booleans(), label=f"arr-{n} to float") else arr for n, arr in enumerate(arrs)) # execute mygrad and "true" functions. Compare outputs and check mygrad behavior tensor_constants = data.draw(st.tuples(*[st.booleans()] * len(arrs)), label="tensor_constants") o = self.op( *(Tensor(i, constant=c) for i, c in zip(arrs, tensor_constants)), **kwargs, constant=constant, ) tensor_out = o.data true_out = self.true_func(*arrs, **kwargs) assert isinstance( o, Tensor ), f"`mygrad_func` returned type {type(o)}, should return `mygrad.Tensor`" assert o.constant is constant or bool(sum(tensor_constants)), ( f"`mygrad_func` returned tensor.constant={o.constant}, " f"should be constant={constant or bool(sum(tensor_constants))}" ) assert_allclose( actual=tensor_out, desired=true_out, err_msg= "`mygrad_func(x)` and `true_func(x)` produce different results", **self.tolerances, ) for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)): assert_array_equal( arr, arr_copy, err_msg=f"arr-{n} was mutated during forward prop", )
def wrapper(x, constant, data): arrs = [x] # list of drawn arrays to feed to functions for i in range( 1, self.num_arrays ): # draw additional arrays according to `num_arrays` y = data.draw(self.gen_other_array(x, i), label="array-{}".format(i)) arrs.append(y) arr_copies = [copy(arr) for arr in arrs ] # list of array-copies to check for mutation if callable(self.kwargs): kwargs = data.draw(self.kwargs(*arrs)) if not isinstance(kwargs, dict): raise TypeError( "`kwargs` was a search strategy. This needs to draw dictionaries," "instead drew: {}".format(kwargs)) else: # set or draw keyword args to be passed to functions kwargs = { k: (data.draw(v(*arrs), label="kwarg: {}".format(k)) if callable(v) else v) for k, v in self.kwargs.items() } if self.assumptions is not None: assume(self.assumptions(*arrs, **kwargs)) for i, arr in enumerate( arrs): # assure arrays don't contain forbidden values for value in self.index_to_no_go.get(i, ()): assume(np.all(arr != value)) # execute mygrad and "true" functions. Compare outputs and check mygrad behavior o = self.op(*(Tensor(i) for i in arrs), **kwargs, constant=constant) tensor_out = o.data true_out = self.true_func(*arrs, **kwargs) assert isinstance( o, Tensor ), "`mygrad_func` returned type {}, should return `mygrad.Tensor`".format( type(o)) assert ( o.constant is constant ), "`mygrad_func` returned tensor.constant={}, should be constant={}".format( o.constant, constant) assert_allclose( actual=tensor_out, desired=true_out, err_msg= "`mygrad_func(x)` and `true_func(x)` produce different results", atol=1e-7, ) for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)): assert_array_equal( arr, arr_copy, err_msg="arr-{} was mutated during forward prop".format(n), )
def wrapper(shapes: hnp.BroadcastableShapes, data: st.DataObject): self.index_to_arr_shapes.update((k, v) for k, v in zip( sorted(self.missing_shapes), shapes.input_shapes)) # list of drawn arrays to feed to functions arrs = data.draw( st.tuples(*(self.arrays(i).map(Tensor) for i in range(self.num_arrays) if i not in self.arrs_from_kwargs)).map(list), label="arrays", ) if callable(self.kwargs): kwargs = data.draw(self.kwargs(*arrs), label="kwargs") if not isinstance(kwargs, dict): raise TypeError( f"`kwargs` was a search strategy. This needs to draw dictionaries," f"instead drew: {kwargs}") else: # The keyword args to be passed to `self.op`. If any provided argument is callable # it is assumed to by a hypothesis search strategy, and all of the drawn arrays will # be passed to the strategy, in order to draw a value for that keyword argument. # Otherwise the provided value is used as-is. kwargs = { k: (data.draw(v( *arrs), label=f"kwarg: {k}") if callable(v) else v) for k, v in self.kwargs.items() } if not set(self.arrs_from_kwargs.values()) <= set(kwargs): raise ValueError( f"`arrs_from_kwargs` specifies kwargs that aren't present: " f"{', '.join(v for v in self.arrs_from_kwargs.values() if v not in kwargs)}" ) for arr_id, key in sorted(self.arrs_from_kwargs.items(), key=lambda x: x[0]): v = kwargs.pop(key) if not isinstance(v, (np.ndarray, Tensor)): raise ValueError( f"kwarg {key} is to be used as array-{arr_id}, but is neither " f"an array nor a tensor, got {v}") arrs.insert(arr_id, Tensor(v)) arrs = tuple(arrs) arr_copies = tuple(copy(arr) for arr in arrs) if self.assumptions is not None: assume(self.assumptions(*arrs, **kwargs)) for i, arr in enumerate( arrs): # assure arrays don't contain forbidden values for value in self.index_to_no_go.get(i, ()): assume(np.all(arr != value)) # forward pass of the function out = self.op(*arrs, **kwargs) # gradient to be backpropped through this operation grad = data.draw( hnp.arrays( shape=out.shape, dtype=float, elements=st.floats(-10, 10), unique=True, ), label="grad", ) grad_copy = copy(grad) # keep a copy to check for later mutations # compute analytic derivatives via mygrad-backprop if any(out.shape != i.shape for i in arrs): # Broadcasting occurred # Must reduce `out` to scalar # first multiply by `grad` to simulate non-trivial back-prop (grad * out).sum().backward() else: out.backward(grad) if not self.use_finite_difference: # compute derivatives via numerical approximation of derivative # using the complex-step method numerical_grad = (numerical_gradient_full if self.vary_each_element else numerical_gradient) else: numerical_grad = finite_difference grads_numerical = numerical_grad(self.true_func, *(i.data for i in arrs), back_grad=grad, kwargs=kwargs) # check that the analytic and numeric derivatives match for n, (arr, d_num) in enumerate(zip(arrs, grads_numerical)): assert arr.grad is not None, f"arr-{n} grad is None, expected {d_num}" assert_allclose( arr.grad, d_num, **self.tolerances, err_msg= f"arr-{n}: mygrad derivative and numerical derivative do not match", ) # check that none of the set derivatives is a view of `grad` assert not np.shares_memory( arr.grad, grad), f"arr-{n}.grad stores a view of grad" # check that none of the set derivatives are views of one another for arr_i, arr_j in combinations(arrs, 2): assert not np.shares_memory( arr_i.grad, arr_j.grad ), "two input arrays were propagated views of the same gradient" # verify that null_gradients works out.null_gradients() assert all(i.grad is None for i in arrs), "null_gradients failed" # check if any of the input-arrays were mutated for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)): assert_array_equal( arr.data, arr_copy.data, err_msg=f"arr-{n} was mutated during backward prop", ) # check if `grad` was mutated assert_array_equal( grad, grad_copy, err_msg="`grad` was mutated during backward prop")
with raises(TypeError): int(nd_tensor) with raises(ValueError): nd_tensor.item() for size1_tensor in (Tensor(1), Tensor([[1]])): assert float(size1_tensor) == 1.0 assert int(size1_tensor) == 1 assert size1_tensor.item() == 1.0 @pytest.mark.parametrize( ("tensor", "repr_"), [ (Tensor(1), "Tensor(1)"), (Tensor([1]), "Tensor([1])"), (Tensor([1, 2]), "Tensor([1, 2])"), ( mg.arange(9).reshape((3, 3)), "Tensor([[0, 1, 2],\n [3, 4, 5],\n [6, 7, 8]])", ), ], ) def test_repr(tensor, repr_): assert repr(tensor) == repr_ @given(constant=st.booleans()) def test_invalid_gradient_raises(constant: bool): x = Tensor(3, constant=constant) * 2
def test_batchnorm(x, data): # optionally draw affine parameters gamma = data.draw(st.one_of( hnp.arrays(shape=x.shape[1:2], dtype=float, elements=st.floats(-10, 10)), st.none()), label="gamma") beta = data.draw(st.one_of( hnp.arrays(shape=x.shape[1:2], dtype=float, elements=st.floats(-10, 10)), st.none()), label="beta") x_orig = np.copy(x) gamma_orig = np.copy(gamma) if gamma is not None else None beta_orig = np.copy(beta) if beta is not None else None t1 = Tensor(x) t2 = Tensor(x) g1 = Tensor(gamma) if gamma is not None else None g2 = Tensor(gamma) if gamma is not None else None b1 = Tensor(beta) if beta is not None else None b2 = Tensor(beta) if beta is not None else None y1 = simple_batchnorm(t1, g1, b1, eps=1e-7) y2 = batchnorm(t2, gamma=g2, beta=b2, eps=1e-7) assert_allclose(actual=y2.data, desired=y1.data, atol=1e-7, rtol=1e-7) grad = data.draw(hnp.arrays(shape=y2.shape, dtype=t2.dtype, elements=st.floats(-10, 10)), label='grad') grad_orig = np.copy(grad) y1.backward(grad) y2.backward(grad) assert_allclose(actual=t2.grad, desired=t1.grad, atol=1e-4, rtol=1e-4) if beta is not None: assert_allclose(actual=b2.grad, desired=b1.grad, atol=1e-4, rtol=1e-4) else: assert b2 is None if gamma is not None: assert_allclose(actual=g2.grad, desired=g1.grad, atol=1e-4, rtol=1e-4) else: assert g2 is None for n, (o, c) in enumerate( zip((x, gamma, beta, grad), (x_orig, gamma_orig, beta_orig, grad_orig))): if o is None or c is None: assert o is c, f"{('x', 'gamma', 'beta', 'grad')[n]}" else: assert_array_equal(o, c, err_msg=f"{('x', 'gamma', 'beta', 'grad')[n]}") if gamma is not None and beta is not None: assert not np.shares_memory(g2.grad, b2.grad) assert not np.shares_memory(grad, t2.grad) y2.null_gradients() assert t2.grad is None if gamma is not None: assert g2.grad is None if beta is not None: assert b2.grad is None
def test_input_type_checking(data, constant, creator): with raises(TypeError): Tensor(data, constant=constant, _creator=creator)
def test_redundant_args(): """ Test behavior for when einsum receives redundant inputs. An optimization was added such that einsum will only compute the gradient for such an entry once and scale it accordingly. """ a = mg.arange(4).reshape(2, 2) a_copy = copy(a) # check standard summation o = einsum("ij,ij", a, a) assert len(o.creator.cache) == 1 o.sum().backward() o = einsum("ij,ij", a_copy, a_copy * 1) assert len(o.creator.cache) == 2 o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) # check standard summation using alt signature o = einsum(a, [0, 1], a, [0, 1]) assert len(o.creator.cache) == 1 o.sum().backward() o = einsum(a_copy, [0, 1], a_copy * 1, [0, 1]) assert len(o.creator.cache) == 2 o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) # check matmul (no redundant indices) o = einsum("ij,jk", a, a) assert len(o.creator.cache) == 2 o.sum().backward() o = a_copy @ a_copy o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) # check traces o = einsum("ii,ii", a, a) assert len(o.creator.cache) == 1 o.sum().backward() o = einsum("ii,ii", a_copy, a_copy * 1) assert len(o.creator.cache) == 2 o.sum().backward() assert_allclose(a.grad, a_copy.grad) a = Tensor(np.arange(4).reshape(2, 2)) a_copy = copy(a) b = Tensor(-1 * np.arange(2).reshape(2, 1)) b_copy = copy(b) # check broadcasting and multiply-redundant input tensors # with distinct einsum labels o = einsum("ii,ii,i...,i...,...i,...i", a, a, b, b, a, a) assert len(o.creator.cache) == 3 o.sum().backward() o = einsum( "ii,ii,i...,i...,...i,...i", a_copy, a_copy * 1, b_copy, b_copy * 1, a_copy, 1 * a_copy, ) assert len(o.creator.cache) == 6 o.sum().backward() assert_allclose(a.grad, a_copy.grad) assert_allclose(b.grad, b_copy.grad)
def test_clear_graph(x, y, z): x_orig = x y_orig = y z_orig = z x = Tensor(x) y = Tensor(y) z = Tensor(z) f = x * y + z g = x + z * f * f # check side effects unused = 2 * g - f w = 1 * f assert unused is not None g.backward() assert_allclose(f.grad, 2 * z.data * f.data) assert_allclose(x.grad, 1 + 2 * z.data * f.data * y.data) assert_allclose(y.grad, 2 * z.data * f.data * x.data) assert_allclose(z.grad, f.data**2 + z.data * 2 * f.data) assert w.grad is None assert_array_equal(x.data, x_orig, err_msg="x was mutated during the operation") assert_array_equal(y.data, y_orig, err_msg="y was mutated during the operation") assert_array_equal(z.data, z_orig, err_msg="z was mutated during the operation") # null-gradients without clearing the graph, confirm that backprop still works g.null_gradients(clear_graph=False) g.backward() assert_allclose(f.grad, 2 * z.data * f.data) assert_allclose(x.grad, 1 + 2 * z.data * f.data * y.data) assert_allclose(y.grad, 2 * z.data * f.data * x.data) assert_allclose(z.grad, f.data**2 + z.data * 2 * f.data) assert w.grad is None assert_array_equal(x.data, x_orig, err_msg="x was mutated during the operation") assert_array_equal(y.data, y_orig, err_msg="y was mutated during the operation") assert_array_equal(z.data, z_orig, err_msg="z was mutated during the operation") g.null_gradients(clear_graph=False) w.backward() assert_allclose(x.grad, y.data) assert_allclose(y.grad, x.data) assert_allclose(z.grad, np.array(1.0)) w.clear_graph() assert_allclose(x.grad, y.data) assert_allclose(y.grad, x.data) assert_allclose(z.grad, np.array(1.0)) assert len(g._ops) > 0 assert g.creator is not None assert len(x._ops) == 0 assert len(y._ops) == 0 assert len(z._ops) == 0 assert len(f._ops) == 0 assert x.creator is None assert y.creator is None assert z.creator is None assert f.creator is None with raises(InvalidBackprop): g.backward()
def wrapper(data, x): """ Performs hypothesis unit test for checking back-propagation through a `mygrad` op. Raises ------ AssertionError""" y = data.draw( hnp.arrays(shape=broadcastable_shape(x.shape), dtype=float, elements=st.floats(*self.ybnds))) for value in self.x_no_go: assume(np.all(x != value)) for value in self.y_no_go: assume(np.all(y != value)) S = st.SearchStrategy kwargs = { k: (data.draw(v) if isinstance(v, S) else v) for k, v in self.kwargs } # gradient to be backpropped through this operation x = Tensor(x) y = Tensor(y) out = self.op(x, y) grad = data.draw( hnp.arrays(shape=out.shape, dtype=float, elements=st.floats(1, 10))) x_copy = copy(x) y_copy = copy(y) grad_copy = copy(grad) if any(out.shape != i.shape for i in (x, y)): # broadcasting occurred, must reduce `out` to scalar # first multiply by `grad` to simulate non-trivial back-prop (grad * out).sum().backward() else: out.backward(grad) numerical_grad = numerical_gradient if self.func_is_mapping else numerical_gradient_full if self.func_is_mapping: dx, dy = numerical_grad(self.func, x.data, y.data, back_grad=grad, kwargs=kwargs) else: dx, dy = numerical_gradient_full(self.func, x.data, y.data, back_grad=grad, kwargs=kwargs, as_decimal=self.as_decimal) assert_allclose( x.grad, dx, **self.tolerances, err_msg= "x: numerical derivative and mygrad derivative do not match") assert_allclose( y.grad, dy, **self.tolerances, err_msg= "y: numerical derivative and mygrad derivative do not match") assert not np.shares_memory( x.grad, grad), "A view of `grad` was back-propped" assert not np.shares_memory( y.grad, grad), "A view of `grad` was back-propped" out.null_gradients() assert all(i.grad is None for i in (x, y)), "null_gradients failed" assert_array_equal(x, x_copy, err_msg="`x` was mutated during backward prop") assert_array_equal(y, y_copy, err_msg="`y` was mutated during backward prop") assert_array_equal( grad, grad_copy, err_msg="`grad` was mutated during backward prop")
def test_contains(element): t = Tensor([[0, 1, 2], [3, 4, 5]]) assert (element in t) is (element in t.data)
def test_input_validation(): x = Tensor([[1, 2]]) with raises(TypeError): transpose(x, (0, ), 1)
def test_conv_ND_bkwd(data, shape, num_filters, num_batch, num_channel): """ Test conv-backprop 1D-3D with various strides and dilations.""" img_shape = (num_batch, num_channel) + shape padding = data.draw( st.integers(0, 2) | st.tuples(*[st.integers(0, 2)] * len(shape)), label="padding", ) if isinstance(padding, tuple): shape = tuple(s + 2 * p for s, p in zip(shape, padding)) else: shape = tuple(s + 2 * padding for s in shape) win_dim = len(shape) shape = (num_batch, num_channel) + shape win_shape = data.draw(st.tuples(*(st.integers(1, s) for s in shape[-win_dim:])), label="win_shape") kernel_shape = (num_filters, shape[1], *win_shape) stride = data.draw(st.tuples(*(st.integers(1, s) for s in shape[-win_dim:])), label="stride") max_dilation = np.array(shape[-win_dim:]) // win_shape dilation = data.draw(st.tuples(*(st.integers(1, s) for s in max_dilation)), label="dilation") conf = dict(stride=stride, dilation=dilation, padding=padding) # skip invalid data/kernel/stride/dilation combinations assume( get_outshape(shape[2:], kernel_shape[2:], stride, dilation) is not None) kernels = data.draw( hnp.arrays(dtype=float, shape=kernel_shape, elements=st.floats(-10, 10)), label="kernels", ) x = data.draw(hnp.arrays(dtype=float, shape=img_shape, elements=st.floats(-10, 10)), label="x") x = Tensor(x) kernels = Tensor(kernels) out = conv_nd(x, kernels, **conf) grad = data.draw( hnp.arrays(shape=out.shape, dtype=float, elements=st.floats(-10, 10), unique=True), label="grad", ) out.backward(grad) grads_numerical = numerical_gradient_full(_conv_nd, *(i.data for i in (x, kernels)), back_grad=grad, kwargs=conf) for n, (arr, d_num) in enumerate(zip((x, kernels), grads_numerical)): assert_allclose( arr.grad, d_num, atol=1e-4, rtol=1e-4, err_msg= "arr-{}: numerical derivative and mygrad derivative do not match". format(n), )
data = np.array(data) ytrain = data[:, -1] oneHotEnc = np.zeros((len(ytrain), 5)) for i in range(len(ytrain)): oneHotEnc[i][ytrain[i] - 1] = 1 ytest = oneHotEnc[10000:] ytrain = oneHotEnc[:10000] xtrain = data[:, :-1] xtest = xtrain[10000:] xtrain = xtrain[:10000] del data D = len(xtrain[0]) K = 5 W = Tensor(np.random.randn(D, K)) b = Tensor(np.zeros((K, ), dtype=W.dtype)) l = [] acc = [] params = [b, W] rate = .1 y = np.argmax(ytrain, axis=1) for i in range(1000): o = dense(xtrain, W) + b loss = multiclass_hinge(o, y) l.append(loss.data.item()) loss.backward()