Exemple #1
0
def he_normal(*shape, gain=1):
    ''' Initialize a :class:`mygrad.Tensor` according to the normal initialization procedure
    described by He et al.

    Parameters
    ----------
    shape : Sequence[int]
        The shape of the output Tensor. Note that `shape` must be at least two-dimensional.

    gain : Real, optional (default=1)
        The gain (scaling factor) to apply.

    Returns
    -------
    mygrad.Tensor, shape=`shape`
        A Tensor, with values initialized according to the He normal initialization.

    Extended Description
    --------------------
    He, Zhang, Ren, and Sun put forward this initialization in the paper
        "Delving Deep into Rectifiers: Surpassing Human-Level Performance
        on ImageNet Classification"
    https://arxiv.org/abs/1502.01852

    A Tensor :math:`W` initialized in this way should be drawn from a distribution about

    .. math::
        \mathcal{N}(0, \sqrt{\frac{2}{(1+a^2)n_l}})

    where :math:`a` is the slope of the rectifier following this layer, which is incorporated
    using the `gain` variable above.
    '''
    assert len(shape) >= 2, 'He Normal initialization requires at least two dimensions!'

    tensor = np.empty(shape)
    std = gain / np.sqrt(shape[1] * tensor[0, 0].size)
    return Tensor(np.random.normal(0, std, shape))
Exemple #2
0
import numpy as np
import pytest
from hypothesis import given, settings
from numpy.testing import assert_array_equal

from mygrad import Tensor

real_types = (hnp.integer_dtypes() | hnp.unsigned_integer_dtypes()
              | hnp.floating_dtypes())


@given(
    tensor=st.tuples(
        hnp.arrays(shape=hnp.array_shapes(), dtype=real_types),
        st.booleans(),
    ).map(lambda x: Tensor(x[0], constant=x[1])),
    dest_type=real_types,
    constant=st.booleans() | st.none(),
)
def test_astype(tensor: Tensor, dest_type: type, constant: Optional[bool]):
    tensor = tensor * 1  # give tensor a creator
    new_tensor = tensor.astype(dest_type, constant=constant)

    assert new_tensor.constant is (tensor.constant
                                   if constant is None else constant)
    assert tensor.creator is not None
    assert new_tensor.creator is None
    assert new_tensor.dtype is dest_type
    assert new_tensor.shape == tensor.shape

    if new_tensor.dtype is tensor.dtype:
Exemple #3
0
def test_repr():
    assert repr(Tensor(1)) == 'Tensor(1)'
    assert repr(Tensor([1])) == 'Tensor([1])'
    assert repr(Tensor([1, 2])) == 'Tensor([1, 2])'
    tmp_rep = 'Tensor([[0, 1, 2],\n        [3, 4, 5],\n        [6, 7, 8]])'
    assert repr(mg.arange(9).reshape((3, 3))) == tmp_rep
Exemple #4
0
def test_pos(x: np.ndarray, constant: bool):
    x = Tensor(x, constant=constant)
    y = +x
    assert y.creator.variables[0] is x
    assert_array_equal(y.data, x.data)
    assert y.constant is x.constant
Exemple #5
0
def test_invalid_gradient_raises(constant: bool):
    x = Tensor(3, constant=constant) * 2
    with (pytest.raises(InvalidGradient)
          if not constant else does_not_raise()):
        x.backward("bad")
Exemple #6
0
        def wrapper(x, data):
            arrs = [x]  # list of drawn arrays to feed to functions
            # draw additional arrays according to `num_arrays`
            for i in range(1, self.num_arrays):
                arrs.append(
                    data.draw(self.gen_other_array(x, i),
                              label="array-{}".format(i)))

            arrs = tuple(Tensor(arr) for arr in arrs)
            arr_copies = tuple(copy(arr) for arr in arrs)

            if callable(self.kwargs):
                kwargs = data.draw(self.kwargs(*arrs), label="kwargs")
                if not isinstance(kwargs, dict):
                    raise TypeError(
                        "`kwargs` was a search strategy. This needs to draw dictionaries,"
                        "instead drew: {}".format(kwargs))
            else:
                # The keyword args to be passed to `self.op`. If any provided argument is callable
                # it is assumed to by a hypothesis search strategy, and all of the drawn arrays will
                # be passed to the strategy, in order to draw a value for that keyword argument.
                # Otherwise the provided value is used as-is.
                kwargs = {
                    k: (data.draw(v(*arrs), label="kwarg: {}".format(k))
                        if callable(v) else v)
                    for k, v in self.kwargs.items()
                }

            if self.assumptions is not None:
                assume(self.assumptions(*arrs, **kwargs))

            for i, arr in enumerate(
                    arrs):  # assure arrays don't contain forbidden values
                for value in self.index_to_no_go.get(i, ()):
                    assume(np.all(arr != value))

            # forward pass of the function
            out = self.op(*arrs, **kwargs)

            # gradient to be backpropped through this operation
            grad = data.draw(
                hnp.arrays(
                    shape=out.shape,
                    dtype=float,
                    elements=st.floats(-10, 10),
                    unique=True,
                ),
                label="grad",
            )
            grad_copy = copy(grad)  # keep a copy to check for later mutations

            # compute analytic derivatives via mygrad-backprop
            if any(out.shape != i.shape for i in arrs):
                # Broadcasting occurred
                # Must reduce `out` to scalar
                # first multiply by `grad` to simulate non-trivial back-prop
                (grad * out).sum().backward()
            else:
                out.backward(grad)

            if not self.finite_difference:
                # compute derivatives via numerical approximation of derivative
                # using the complex-step method
                numerical_grad = (numerical_gradient_full
                                  if self.vary_each_element else
                                  numerical_gradient)

            else:
                numerical_grad = finite_difference
            grads_numerical = numerical_grad(self.true_func,
                                             *(i.data for i in arrs),
                                             back_grad=grad,
                                             kwargs=kwargs)

            # check that the analytic and numeric derivatives match
            for n, (arr, d_num) in enumerate(zip(arrs, grads_numerical)):
                assert_allclose(
                    arr.grad,
                    d_num,
                    **self.tolerances,
                    err_msg=
                    "arr-{}: mygrad derivative and numerical derivative do not match"
                    .format(n))

                # check that none of the set derivatives is a view of `grad`
                assert not np.shares_memory(
                    arr.grad,
                    grad), "arr-{}.grad stores a view of grad".format(n)

            # check that none of the set derivatives are views of one another
            for arr_i, arr_j in combinations(arrs, 2):
                assert not np.shares_memory(
                    arr_i.grad, arr_j.grad
                ), "two input arrays were propagated views of the same gradient"

            # verify that null_gradients works
            out.null_gradients()
            assert all(i.grad is None for i in arrs), "null_gradients failed"

            # check if any of the input-arrays were mutated
            for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)):
                assert_array_equal(
                    arr.data,
                    arr_copy.data,
                    err_msg="arr-{} was mutated during backward prop".format(
                        n),
                )

            # check if `grad` was mutated
            assert_array_equal(
                grad,
                grad_copy,
                err_msg="`grad` was mutated during backward prop")
Exemple #7
0
def test_init_data_rand(x):
    assert_equal(actual=Tensor(x).data, desired=x)
Exemple #8
0
def test_0d_iter():
    x = Tensor(3)
    with pytest.raises(TypeError):
        sum(x)
Exemple #9
0
        def wrapper(shapes: hnp.BroadcastableShapes, constant,
                    data: st.DataObject):
            self.index_to_arr_shapes.update((k, v) for k, v in zip(
                sorted(self.missing_shapes), shapes.input_shapes))

            # list of drawn arrays to feed to functions
            arrs = data.draw(
                st.tuples(*(self.arrays(i) for i in range(self.num_arrays))),
                label="arrays",
            )

            # list of array-copies to check for mutation
            arr_copies = tuple(copy(arr) for arr in arrs)

            if callable(self.kwargs):
                kwargs = data.draw(self.kwargs(*arrs))
                if not isinstance(kwargs, dict):
                    raise TypeError(
                        "`kwargs` was a search strategy. This needs to draw dictionaries,"
                        "instead drew: {}".format(kwargs))
            else:
                # set or draw keyword args to be passed to functions
                kwargs = {
                    k: (data.draw(v(*arrs), label="kwarg: {}".format(k))
                        if callable(v) else v)
                    for k, v in self.kwargs.items()
                }

            if self.assumptions is not None:
                assume(self.assumptions(*arrs, **kwargs))

            for i, arr in enumerate(
                    arrs):  # assure arrays don't contain forbidden values
                for value in self.index_to_no_go.get(i, ()):
                    assume(np.all(arr != value))

            # execute mygrad and "true" functions. Compare outputs and check mygrad behavior
            o = self.op(*(Tensor(i) for i in arrs),
                        **kwargs,
                        constant=constant)
            tensor_out = o.data
            true_out = self.true_func(*arrs, **kwargs)

            assert isinstance(
                o, Tensor
            ), "`mygrad_func` returned type {}, should return `mygrad.Tensor`".format(
                type(o))
            assert (
                o.constant is constant
            ), "`mygrad_func` returned tensor.constant={}, should be constant={}".format(
                o.constant, constant)

            assert_allclose(
                actual=tensor_out,
                desired=true_out,
                err_msg=
                "`mygrad_func(x)` and `true_func(x)` produce different results",
                **self.tolerances,
            )

            for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)):
                assert_array_equal(
                    arr,
                    arr_copy,
                    err_msg="arr-{} was mutated during forward prop".format(n),
                )
Exemple #10
0
 def __init__(self, slope=0.1):
     """ Parameters
         ----------
         slope : Real, optional (default=0.1)
             The initial value to use for the slope."""
     self.slope = Tensor(slope)
Exemple #11
0
def uniform(*shape,
            lower_bound=0,
            upper_bound=1,
            dtype=np.float32,
            constant=False):
    """ Initialize a ``Tensor`` by drawing from a uniform distribution.

    Parameters
    ----------
    shape : Sequence[int]
        The output shape.

    lower_bound : Real, optional (default=0)
        Lower bound on the output interval, inclusive.

    upper_bound : Real, optional (default=1)
        Upper bound on the output interval, exclusive.

    dtype : data-type, optional (default=float32)
        The data type of the output tensor; must be a floating-point type.

    constant : bool, optional (default=False)
        If `True`, the returned tensor is a constant (it
            does not back-propagate a gradient).

    Returns
    -------
    mygrad.Tensor, shape=``shape``
        A Tensor, with values drawn uniformly from [lower_bound, upper_bound).

    Examples
    --------
    >>> from mygrad.nnet.initializers import uniform
    >>> uniform(2, 3)
    Tensor([[0.8731087 , 0.30872548, 0.75528544],
            [0.55404514, 0.7652222 , 0.4955769 ]], dtype=float32)

    >>> uniform(2, 2, lower_bound=-1, upper_bound=3)
    Tensor([[ 1.9151938 , -0.28968155],
            [-0.01240687, -0.24448799]], dtype=float32)

    >>> uniform(5, dtype="float16", constant=True)
    Tensor([0.5186, 0.1481, 0.3745, 0.941 , 0.331 ], dtype=float16)
    """
    if lower_bound >= upper_bound:
        raise ValueError(
            "Uniform lower bound cannot be greater than upper bound")
    if not np.issubdtype(dtype, np.floating):
        raise ValueError(
            "Uniform initialization requires a floating-point dtype")

    if len(shape) == 1:
        shape = shape[0]

    if isinstance(lower_bound, Tensor):
        lower_bound = lower_bound.item()
    if isinstance(upper_bound, Tensor):
        upper_bound = upper_bound.item()

    return Tensor(np.random.uniform(lower_bound, upper_bound, shape),
                  dtype=dtype,
                  constant=constant)
Exemple #12
0
def test_glorot_normal(shape, gain, dtype, constant):
    tensor = he_normal(shape, gain=Tensor(gain), dtype=dtype, constant=constant)
    assert tensor.shape == shape
    assert tensor.dtype == dtype
    assert tensor.constant == constant
def test_all_tensor_creation(constant, dtype):
    x = np.array([1, 2, 3])

    e = empty((3, 2), dtype=dtype, constant=constant)
    assert e.shape == (3, 2)
    assert e.constant is constant

    e = empty_like(e, dtype=dtype, constant=constant)
    assert e.shape == (3, 2)
    assert e.constant is constant

    check_tensor_array(eye(3, dtype=dtype, constant=constant),
                       np.eye(3, dtype=dtype), constant)

    check_tensor_array(
        identity(3, dtype=dtype, constant=constant),
        np.identity(3, dtype=dtype),
        constant,
    )

    check_tensor_array(
        ones((4, 5, 6), dtype=dtype, constant=constant),
        np.ones((4, 5, 6), dtype=dtype),
        constant,
    )

    check_tensor_array(
        ones_like(x, dtype=dtype, constant=constant),
        np.ones_like(x, dtype=dtype),
        constant,
    )

    check_tensor_array(
        ones_like(Tensor(x), dtype=dtype, constant=constant),
        np.ones_like(x, dtype=dtype),
        constant,
    )

    check_tensor_array(
        zeros((4, 5, 6), dtype=dtype, constant=constant),
        np.zeros((4, 5, 6), dtype=dtype),
        constant,
    )

    check_tensor_array(
        zeros_like(x, dtype=dtype, constant=constant),
        np.zeros_like(x, dtype=dtype),
        constant,
    )

    check_tensor_array(
        zeros_like(Tensor(x), dtype=dtype, constant=constant),
        np.zeros_like(x, dtype=dtype),
        constant,
    )

    check_tensor_array(
        full((4, 5, 6), 5.0, dtype=dtype, constant=constant),
        np.full((4, 5, 6), 5.0, dtype=dtype),
        constant,
    )

    check_tensor_array(
        full_like(x, 5.0, dtype=dtype, constant=constant),
        np.full_like(x, 5.0, dtype=dtype),
        constant,
    )

    check_tensor_array(
        full_like(Tensor(x), 5.0, dtype=dtype, constant=constant),
        np.full_like(x, 5.0, dtype=dtype),
        constant,
    )

    check_tensor_array(
        arange(3, 7, dtype=dtype, constant=constant),
        np.arange(3, 7, dtype=dtype),
        constant,
    )

    check_tensor_array(
        linspace(3, 7, dtype=dtype, constant=constant),
        np.linspace(3, 7, dtype=dtype),
        constant,
    )

    check_tensor_array(
        logspace(3, 7, dtype=dtype, constant=constant),
        np.logspace(3, 7, dtype=dtype),
        constant,
    )

    check_tensor_array(
        geomspace(3, 7, dtype=dtype, constant=constant),
        np.geomspace(3, 7, dtype=dtype),
        constant,
    )
Exemple #14
0
def test_contains():
    t = Tensor([[0, 1, 2], [3, 4, 5]])
    assert 0 in t and 0 in t.data
    assert [0, 1, 2] in t and [0, 1, 2] in t.data
    assert -1 not in t and -1 not in t.data
Exemple #15
0
 def create_node(self, value, constant):
     n = Node(value, constant=constant)
     t = Tensor(value, constant=constant)
     self.node_list.append((n, t))
     return n, t
Exemple #16
0
        def wrapper(shapes: hnp.BroadcastableShapes, constant,
                    data: st.DataObject):
            self.index_to_arr_shapes.update((k, v) for k, v in zip(
                sorted(self.missing_shapes), shapes.input_shapes))

            # list of drawn arrays to feed to functions
            arrs = data.draw(
                st.tuples(*(self.arrays(i) for i in range(self.num_arrays))),
                label="arrays",
            )

            # list of array-copies to check for mutation
            arr_copies = tuple(copy(arr) for arr in arrs)

            if callable(self.kwargs):
                kwargs = data.draw(self.kwargs(*arrs))
                if not isinstance(kwargs, dict):
                    raise TypeError(
                        f"`kwargs` was a search strategy. This needs to draw dictionaries,"
                        f"instead drew: {kwargs}")
            else:
                # The keyword args to be passed to `self.op`. If any provided argument is callable
                # it is assumed to by a hypothesis search strategy, and all of the drawn arrays will
                # be passed to the strategy, in order to draw a value for that keyword argument.
                # Otherwise the provided value is used as-is.
                kwargs = {
                    k: (data.draw(v(
                        *arrs), label=f"kwarg: {k}") if callable(v) else v)
                    for k, v in self.kwargs.items()
                }

            if self.assumptions is not None:
                assume(self.assumptions(*arrs, **kwargs))

            for i, arr in enumerate(
                    arrs):  # assure arrays don't contain forbidden values
                for value in self.index_to_no_go.get(i, ()):
                    assume(np.all(arr != value))

            if self.permit_0d_array_as_float:
                # potentially cast a 0D array as a float
                arrs = tuple(
                    arr.item() if arr.ndim == 0 and data.
                    draw(st.booleans(), label=f"arr-{n} to float") else arr
                    for n, arr in enumerate(arrs))

            # execute mygrad and "true" functions. Compare outputs and check mygrad behavior
            tensor_constants = data.draw(st.tuples(*[st.booleans()] *
                                                   len(arrs)),
                                         label="tensor_constants")
            o = self.op(
                *(Tensor(i, constant=c)
                  for i, c in zip(arrs, tensor_constants)),
                **kwargs,
                constant=constant,
            )
            tensor_out = o.data
            true_out = self.true_func(*arrs, **kwargs)

            assert isinstance(
                o, Tensor
            ), f"`mygrad_func` returned type {type(o)}, should return `mygrad.Tensor`"
            assert o.constant is constant or bool(sum(tensor_constants)), (
                f"`mygrad_func` returned tensor.constant={o.constant}, "
                f"should be constant={constant or  bool(sum(tensor_constants))}"
            )

            assert_allclose(
                actual=tensor_out,
                desired=true_out,
                err_msg=
                "`mygrad_func(x)` and `true_func(x)` produce different results",
                **self.tolerances,
            )

            for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)):
                assert_array_equal(
                    arr,
                    arr_copy,
                    err_msg=f"arr-{n} was mutated during forward prop",
                )
Exemple #17
0
        def wrapper(x, constant, data):
            arrs = [x]  # list of drawn arrays to feed to functions

            for i in range(
                    1, self.num_arrays
            ):  # draw additional arrays according to `num_arrays`
                y = data.draw(self.gen_other_array(x, i),
                              label="array-{}".format(i))
                arrs.append(y)

            arr_copies = [copy(arr) for arr in arrs
                          ]  # list of array-copies to check for mutation

            if callable(self.kwargs):
                kwargs = data.draw(self.kwargs(*arrs))
                if not isinstance(kwargs, dict):
                    raise TypeError(
                        "`kwargs` was a search strategy. This needs to draw dictionaries,"
                        "instead drew: {}".format(kwargs))
            else:
                # set or draw keyword args to be passed to functions
                kwargs = {
                    k: (data.draw(v(*arrs), label="kwarg: {}".format(k))
                        if callable(v) else v)
                    for k, v in self.kwargs.items()
                }

            if self.assumptions is not None:
                assume(self.assumptions(*arrs, **kwargs))

            for i, arr in enumerate(
                    arrs):  # assure arrays don't contain forbidden values
                for value in self.index_to_no_go.get(i, ()):
                    assume(np.all(arr != value))

            # execute mygrad and "true" functions. Compare outputs and check mygrad behavior
            o = self.op(*(Tensor(i) for i in arrs),
                        **kwargs,
                        constant=constant)
            tensor_out = o.data
            true_out = self.true_func(*arrs, **kwargs)

            assert isinstance(
                o, Tensor
            ), "`mygrad_func` returned type {}, should return `mygrad.Tensor`".format(
                type(o))
            assert (
                o.constant is constant
            ), "`mygrad_func` returned tensor.constant={}, should be constant={}".format(
                o.constant, constant)

            assert_allclose(
                actual=tensor_out,
                desired=true_out,
                err_msg=
                "`mygrad_func(x)` and `true_func(x)` produce different results",
                atol=1e-7,
            )

            for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)):
                assert_array_equal(
                    arr,
                    arr_copy,
                    err_msg="arr-{} was mutated during forward prop".format(n),
                )
Exemple #18
0
        def wrapper(shapes: hnp.BroadcastableShapes, data: st.DataObject):
            self.index_to_arr_shapes.update((k, v) for k, v in zip(
                sorted(self.missing_shapes), shapes.input_shapes))

            # list of drawn arrays to feed to functions
            arrs = data.draw(
                st.tuples(*(self.arrays(i).map(Tensor)
                            for i in range(self.num_arrays)
                            if i not in self.arrs_from_kwargs)).map(list),
                label="arrays",
            )

            if callable(self.kwargs):
                kwargs = data.draw(self.kwargs(*arrs), label="kwargs")
                if not isinstance(kwargs, dict):
                    raise TypeError(
                        f"`kwargs` was a search strategy. This needs to draw dictionaries,"
                        f"instead drew: {kwargs}")
            else:
                # The keyword args to be passed to `self.op`. If any provided argument is callable
                # it is assumed to by a hypothesis search strategy, and all of the drawn arrays will
                # be passed to the strategy, in order to draw a value for that keyword argument.
                # Otherwise the provided value is used as-is.
                kwargs = {
                    k: (data.draw(v(
                        *arrs), label=f"kwarg: {k}") if callable(v) else v)
                    for k, v in self.kwargs.items()
                }

            if not set(self.arrs_from_kwargs.values()) <= set(kwargs):
                raise ValueError(
                    f"`arrs_from_kwargs` specifies kwargs that aren't present: "
                    f"{', '.join(v for v in self.arrs_from_kwargs.values() if v not in kwargs)}"
                )

            for arr_id, key in sorted(self.arrs_from_kwargs.items(),
                                      key=lambda x: x[0]):
                v = kwargs.pop(key)
                if not isinstance(v, (np.ndarray, Tensor)):
                    raise ValueError(
                        f"kwarg {key} is to be used as array-{arr_id}, but is neither "
                        f"an array nor a tensor, got {v}")

                arrs.insert(arr_id, Tensor(v))

            arrs = tuple(arrs)

            arr_copies = tuple(copy(arr) for arr in arrs)

            if self.assumptions is not None:
                assume(self.assumptions(*arrs, **kwargs))

            for i, arr in enumerate(
                    arrs):  # assure arrays don't contain forbidden values
                for value in self.index_to_no_go.get(i, ()):
                    assume(np.all(arr != value))

            # forward pass of the function
            out = self.op(*arrs, **kwargs)

            # gradient to be backpropped through this operation
            grad = data.draw(
                hnp.arrays(
                    shape=out.shape,
                    dtype=float,
                    elements=st.floats(-10, 10),
                    unique=True,
                ),
                label="grad",
            )
            grad_copy = copy(grad)  # keep a copy to check for later mutations

            # compute analytic derivatives via mygrad-backprop
            if any(out.shape != i.shape for i in arrs):
                # Broadcasting occurred
                # Must reduce `out` to scalar
                # first multiply by `grad` to simulate non-trivial back-prop
                (grad * out).sum().backward()
            else:
                out.backward(grad)

            if not self.use_finite_difference:
                # compute derivatives via numerical approximation of derivative
                # using the complex-step method
                numerical_grad = (numerical_gradient_full
                                  if self.vary_each_element else
                                  numerical_gradient)

            else:
                numerical_grad = finite_difference
            grads_numerical = numerical_grad(self.true_func,
                                             *(i.data for i in arrs),
                                             back_grad=grad,
                                             kwargs=kwargs)

            # check that the analytic and numeric derivatives match
            for n, (arr, d_num) in enumerate(zip(arrs, grads_numerical)):
                assert arr.grad is not None, f"arr-{n} grad is None, expected {d_num}"
                assert_allclose(
                    arr.grad,
                    d_num,
                    **self.tolerances,
                    err_msg=
                    f"arr-{n}: mygrad derivative and numerical derivative do not match",
                )

                # check that none of the set derivatives is a view of `grad`
                assert not np.shares_memory(
                    arr.grad, grad), f"arr-{n}.grad stores a view of grad"

            # check that none of the set derivatives are views of one another
            for arr_i, arr_j in combinations(arrs, 2):
                assert not np.shares_memory(
                    arr_i.grad, arr_j.grad
                ), "two input arrays were propagated views of the same gradient"

            # verify that null_gradients works
            out.null_gradients()
            assert all(i.grad is None for i in arrs), "null_gradients failed"

            # check if any of the input-arrays were mutated
            for n, (arr, arr_copy) in enumerate(zip(arrs, arr_copies)):
                assert_array_equal(
                    arr.data,
                    arr_copy.data,
                    err_msg=f"arr-{n} was mutated during backward prop",
                )

            # check if `grad` was mutated
            assert_array_equal(
                grad,
                grad_copy,
                err_msg="`grad` was mutated during backward prop")
Exemple #19
0
    with raises(TypeError):
        int(nd_tensor)

    with raises(ValueError):
        nd_tensor.item()

    for size1_tensor in (Tensor(1), Tensor([[1]])):
        assert float(size1_tensor) == 1.0
        assert int(size1_tensor) == 1
        assert size1_tensor.item() == 1.0


@pytest.mark.parametrize(
    ("tensor", "repr_"),
    [
        (Tensor(1), "Tensor(1)"),
        (Tensor([1]), "Tensor([1])"),
        (Tensor([1, 2]), "Tensor([1, 2])"),
        (
            mg.arange(9).reshape((3, 3)),
            "Tensor([[0, 1, 2],\n        [3, 4, 5],\n        [6, 7, 8]])",
        ),
    ],
)
def test_repr(tensor, repr_):
    assert repr(tensor) == repr_


@given(constant=st.booleans())
def test_invalid_gradient_raises(constant: bool):
    x = Tensor(3, constant=constant) * 2
Exemple #20
0
def test_batchnorm(x, data):
    # optionally draw affine parameters
    gamma = data.draw(st.one_of(
        hnp.arrays(shape=x.shape[1:2],
                   dtype=float,
                   elements=st.floats(-10, 10)), st.none()),
                      label="gamma")
    beta = data.draw(st.one_of(
        hnp.arrays(shape=x.shape[1:2],
                   dtype=float,
                   elements=st.floats(-10, 10)), st.none()),
                     label="beta")
    x_orig = np.copy(x)

    gamma_orig = np.copy(gamma) if gamma is not None else None
    beta_orig = np.copy(beta) if beta is not None else None

    t1 = Tensor(x)
    t2 = Tensor(x)

    g1 = Tensor(gamma) if gamma is not None else None
    g2 = Tensor(gamma) if gamma is not None else None

    b1 = Tensor(beta) if beta is not None else None
    b2 = Tensor(beta) if beta is not None else None

    y1 = simple_batchnorm(t1, g1, b1, eps=1e-7)
    y2 = batchnorm(t2, gamma=g2, beta=b2, eps=1e-7)

    assert_allclose(actual=y2.data, desired=y1.data, atol=1e-7, rtol=1e-7)
    grad = data.draw(hnp.arrays(shape=y2.shape,
                                dtype=t2.dtype,
                                elements=st.floats(-10, 10)),
                     label='grad')
    grad_orig = np.copy(grad)

    y1.backward(grad)
    y2.backward(grad)

    assert_allclose(actual=t2.grad, desired=t1.grad, atol=1e-4, rtol=1e-4)

    if beta is not None:
        assert_allclose(actual=b2.grad, desired=b1.grad, atol=1e-4, rtol=1e-4)
    else:
        assert b2 is None

    if gamma is not None:
        assert_allclose(actual=g2.grad, desired=g1.grad, atol=1e-4, rtol=1e-4)
    else:
        assert g2 is None

    for n, (o, c) in enumerate(
            zip((x, gamma, beta, grad),
                (x_orig, gamma_orig, beta_orig, grad_orig))):
        if o is None or c is None:
            assert o is c, f"{('x', 'gamma', 'beta', 'grad')[n]}"
        else:
            assert_array_equal(o,
                               c,
                               err_msg=f"{('x', 'gamma', 'beta', 'grad')[n]}")

    if gamma is not None and beta is not None:
        assert not np.shares_memory(g2.grad, b2.grad)
    assert not np.shares_memory(grad, t2.grad)

    y2.null_gradients()
    assert t2.grad is None

    if gamma is not None:
        assert g2.grad is None

    if beta is not None:
        assert b2.grad is None
Exemple #21
0
def test_input_type_checking(data, constant, creator):
    with raises(TypeError):
        Tensor(data, constant=constant, _creator=creator)
Exemple #22
0
def test_redundant_args():
    """
    Test behavior for when einsum receives redundant inputs. An optimization
    was added such that einsum will only compute the gradient for such an entry
    once and scale it accordingly.
    """
    a = mg.arange(4).reshape(2, 2)
    a_copy = copy(a)

    # check standard summation
    o = einsum("ij,ij", a, a)
    assert len(o.creator.cache) == 1
    o.sum().backward()

    o = einsum("ij,ij", a_copy, a_copy * 1)
    assert len(o.creator.cache) == 2
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    # check standard summation using alt signature
    o = einsum(a, [0, 1], a, [0, 1])
    assert len(o.creator.cache) == 1
    o.sum().backward()

    o = einsum(a_copy, [0, 1], a_copy * 1, [0, 1])
    assert len(o.creator.cache) == 2
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    # check matmul (no redundant indices)
    o = einsum("ij,jk", a, a)
    assert len(o.creator.cache) == 2
    o.sum().backward()

    o = a_copy @ a_copy
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    # check traces
    o = einsum("ii,ii", a, a)
    assert len(o.creator.cache) == 1
    o.sum().backward()

    o = einsum("ii,ii", a_copy, a_copy * 1)
    assert len(o.creator.cache) == 2
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)

    a = Tensor(np.arange(4).reshape(2, 2))
    a_copy = copy(a)

    b = Tensor(-1 * np.arange(2).reshape(2, 1))
    b_copy = copy(b)

    # check broadcasting and multiply-redundant input tensors
    # with distinct einsum labels
    o = einsum("ii,ii,i...,i...,...i,...i", a, a, b, b, a, a)
    assert len(o.creator.cache) == 3
    o.sum().backward()

    o = einsum(
        "ii,ii,i...,i...,...i,...i",
        a_copy,
        a_copy * 1,
        b_copy,
        b_copy * 1,
        a_copy,
        1 * a_copy,
    )
    assert len(o.creator.cache) == 6
    o.sum().backward()
    assert_allclose(a.grad, a_copy.grad)
    assert_allclose(b.grad, b_copy.grad)
Exemple #23
0
def test_clear_graph(x, y, z):
    x_orig = x
    y_orig = y
    z_orig = z

    x = Tensor(x)
    y = Tensor(y)
    z = Tensor(z)

    f = x * y + z
    g = x + z * f * f

    # check side effects
    unused = 2 * g - f
    w = 1 * f
    assert unused is not None

    g.backward()
    assert_allclose(f.grad, 2 * z.data * f.data)
    assert_allclose(x.grad, 1 + 2 * z.data * f.data * y.data)
    assert_allclose(y.grad, 2 * z.data * f.data * x.data)
    assert_allclose(z.grad, f.data**2 + z.data * 2 * f.data)
    assert w.grad is None

    assert_array_equal(x.data,
                       x_orig,
                       err_msg="x was mutated during the operation")
    assert_array_equal(y.data,
                       y_orig,
                       err_msg="y was mutated during the operation")
    assert_array_equal(z.data,
                       z_orig,
                       err_msg="z was mutated during the operation")

    # null-gradients without clearing the graph, confirm that backprop still works
    g.null_gradients(clear_graph=False)
    g.backward()
    assert_allclose(f.grad, 2 * z.data * f.data)
    assert_allclose(x.grad, 1 + 2 * z.data * f.data * y.data)
    assert_allclose(y.grad, 2 * z.data * f.data * x.data)
    assert_allclose(z.grad, f.data**2 + z.data * 2 * f.data)
    assert w.grad is None

    assert_array_equal(x.data,
                       x_orig,
                       err_msg="x was mutated during the operation")
    assert_array_equal(y.data,
                       y_orig,
                       err_msg="y was mutated during the operation")
    assert_array_equal(z.data,
                       z_orig,
                       err_msg="z was mutated during the operation")

    g.null_gradients(clear_graph=False)
    w.backward()
    assert_allclose(x.grad, y.data)
    assert_allclose(y.grad, x.data)
    assert_allclose(z.grad, np.array(1.0))

    w.clear_graph()
    assert_allclose(x.grad, y.data)
    assert_allclose(y.grad, x.data)
    assert_allclose(z.grad, np.array(1.0))
    assert len(g._ops) > 0
    assert g.creator is not None
    assert len(x._ops) == 0
    assert len(y._ops) == 0
    assert len(z._ops) == 0
    assert len(f._ops) == 0
    assert x.creator is None
    assert y.creator is None
    assert z.creator is None
    assert f.creator is None

    with raises(InvalidBackprop):
        g.backward()
Exemple #24
0
        def wrapper(data, x):
            """ Performs hypothesis unit test for checking back-propagation
                through a `mygrad` op.

                Raises
                ------
                AssertionError"""

            y = data.draw(
                hnp.arrays(shape=broadcastable_shape(x.shape),
                           dtype=float,
                           elements=st.floats(*self.ybnds)))

            for value in self.x_no_go:
                assume(np.all(x != value))

            for value in self.y_no_go:
                assume(np.all(y != value))

            S = st.SearchStrategy
            kwargs = {
                k: (data.draw(v) if isinstance(v, S) else v)
                for k, v in self.kwargs
            }

            # gradient to be backpropped through this operation
            x = Tensor(x)
            y = Tensor(y)
            out = self.op(x, y)

            grad = data.draw(
                hnp.arrays(shape=out.shape,
                           dtype=float,
                           elements=st.floats(1, 10)))

            x_copy = copy(x)
            y_copy = copy(y)
            grad_copy = copy(grad)
            if any(out.shape != i.shape for i in (x, y)):
                # broadcasting occurred, must reduce `out` to scalar
                # first multiply by `grad` to simulate non-trivial back-prop
                (grad * out).sum().backward()
            else:
                out.backward(grad)

            numerical_grad = numerical_gradient if self.func_is_mapping else numerical_gradient_full
            if self.func_is_mapping:
                dx, dy = numerical_grad(self.func,
                                        x.data,
                                        y.data,
                                        back_grad=grad,
                                        kwargs=kwargs)
            else:
                dx, dy = numerical_gradient_full(self.func,
                                                 x.data,
                                                 y.data,
                                                 back_grad=grad,
                                                 kwargs=kwargs,
                                                 as_decimal=self.as_decimal)

            assert_allclose(
                x.grad,
                dx,
                **self.tolerances,
                err_msg=
                "x: numerical derivative and mygrad derivative do not match")
            assert_allclose(
                y.grad,
                dy,
                **self.tolerances,
                err_msg=
                "y: numerical derivative and mygrad derivative do not match")

            assert not np.shares_memory(
                x.grad, grad), "A view of `grad` was back-propped"
            assert not np.shares_memory(
                y.grad, grad), "A view of `grad` was back-propped"
            out.null_gradients()
            assert all(i.grad is None for i in (x, y)), "null_gradients failed"

            assert_array_equal(x,
                               x_copy,
                               err_msg="`x` was mutated during backward prop")
            assert_array_equal(y,
                               y_copy,
                               err_msg="`y` was mutated during backward prop")
            assert_array_equal(
                grad,
                grad_copy,
                err_msg="`grad` was mutated during backward prop")
Exemple #25
0
def test_contains(element):
    t = Tensor([[0, 1, 2], [3, 4, 5]])
    assert (element in t) is (element in t.data)
Exemple #26
0
def test_input_validation():
    x = Tensor([[1, 2]])

    with raises(TypeError):
        transpose(x, (0, ), 1)
Exemple #27
0
def test_conv_ND_bkwd(data, shape, num_filters, num_batch, num_channel):
    """ Test conv-backprop 1D-3D with various strides and dilations."""
    img_shape = (num_batch, num_channel) + shape

    padding = data.draw(
        st.integers(0, 2) | st.tuples(*[st.integers(0, 2)] * len(shape)),
        label="padding",
    )

    if isinstance(padding, tuple):
        shape = tuple(s + 2 * p for s, p in zip(shape, padding))
    else:
        shape = tuple(s + 2 * padding for s in shape)

    win_dim = len(shape)
    shape = (num_batch, num_channel) + shape
    win_shape = data.draw(st.tuples(*(st.integers(1, s)
                                      for s in shape[-win_dim:])),
                          label="win_shape")
    kernel_shape = (num_filters, shape[1], *win_shape)

    stride = data.draw(st.tuples(*(st.integers(1, s)
                                   for s in shape[-win_dim:])),
                       label="stride")

    max_dilation = np.array(shape[-win_dim:]) // win_shape
    dilation = data.draw(st.tuples(*(st.integers(1, s) for s in max_dilation)),
                         label="dilation")
    conf = dict(stride=stride, dilation=dilation, padding=padding)

    # skip invalid data/kernel/stride/dilation combinations
    assume(
        get_outshape(shape[2:], kernel_shape[2:], stride, dilation)
        is not None)

    kernels = data.draw(
        hnp.arrays(dtype=float,
                   shape=kernel_shape,
                   elements=st.floats(-10, 10)),
        label="kernels",
    )
    x = data.draw(hnp.arrays(dtype=float,
                             shape=img_shape,
                             elements=st.floats(-10, 10)),
                  label="x")

    x = Tensor(x)
    kernels = Tensor(kernels)

    out = conv_nd(x, kernels, **conf)
    grad = data.draw(
        hnp.arrays(shape=out.shape,
                   dtype=float,
                   elements=st.floats(-10, 10),
                   unique=True),
        label="grad",
    )

    out.backward(grad)
    grads_numerical = numerical_gradient_full(_conv_nd,
                                              *(i.data for i in (x, kernels)),
                                              back_grad=grad,
                                              kwargs=conf)

    for n, (arr, d_num) in enumerate(zip((x, kernels), grads_numerical)):
        assert_allclose(
            arr.grad,
            d_num,
            atol=1e-4,
            rtol=1e-4,
            err_msg=
            "arr-{}: numerical derivative and mygrad derivative do not match".
            format(n),
        )
data = np.array(data)
ytrain = data[:, -1]
oneHotEnc = np.zeros((len(ytrain), 5))
for i in range(len(ytrain)):
    oneHotEnc[i][ytrain[i] - 1] = 1
ytest = oneHotEnc[10000:]
ytrain = oneHotEnc[:10000]
xtrain = data[:, :-1]
xtest = xtrain[10000:]
xtrain = xtrain[:10000]
del data

D = len(xtrain[0])
K = 5

W = Tensor(np.random.randn(D, K))
b = Tensor(np.zeros((K, ), dtype=W.dtype))

l = []
acc = []

params = [b, W]
rate = .1
y = np.argmax(ytrain, axis=1)
for i in range(1000):
    o = dense(xtrain, W) + b

    loss = multiclass_hinge(o, y)

    l.append(loss.data.item())
    loss.backward()