def negative(a, where=True, constant=False): """ ``f(a) -> -a`` Parameters ---------- a : array_like where : numpy.ndarray Accepts a boolean array which is broadcast together with the operand(s). Values of True indicate to calculate the function at that position, values of False indicate to leave the value in the output alone. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor """ return Tensor._op(Negative, a, op_kwargs=(dict(where=where)), constant=constant)
def logaddexp(a, b, constant=False): """ ``f(a, b) -> log(exp(a) + exp(b))`` Parameters ---------- a : array_like b : array_like constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor Notes ----- This function is useful in statistics where the calculated probabilities of events may be so small as to exceed the range of normal floating point numbers. In such cases the logarithm of the calculated probability is stored. This function allows adding probabilities stored in such a fashion. """ return Tensor._op(Logaddexp, a, b, constant=constant)
def relu(x, constant=False): """ Applies the recitfied linear unit activation function:: f(x) = {x, x > 0 0, x <= 0 } Parameters ---------- x : array_like relu is applied element-wise on ``x``. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor Examples -------- >>> import mygrad as mg >>> from mygrad.nnet import relu >>> x = mg.linspace(-5, 5, 5) >>> x Tensor([-5. , -2.5, 0. , 2.5, 5. ]) >>> relu(x) Tensor([-0. , -0. , 0. , 2.5, 5. ]) >>> relu(x).backward() >>> x.grad # d(relu(x))/dx array([0., 0., 0., 1., 1.]) """ return Tensor._op(ReLu, x, constant=constant)
def add_sequence(*variables, constant=False): """ ``f(a, b, ...) -> a + b + ...`` Add a sequence of N tensors. Parameters ---------- variables : array_like constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor Notes ----- It is more efficient to back-propagate through this function than it is through a computational graph with N-1 corresponding addition operations.""" if len(variables) < 2: raise ValueError( f"`add_sequence` requires at least two inputs, got {len(variables)} inputs" ) return Tensor._op(AddSequence, *variables, constant=constant)
def test_transpose(x, data): axes = data.draw(valid_axes(x.ndim), label="axes") if axes is not None: assume(len(axes) == x.ndim) x_arr = Tensor(np.copy(x)) o = transpose(x_arr, axes, constant=False) grad = data.draw( hnp.arrays(shape=o.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) o.backward(grad) def f(x): return np.transpose(x, axes) assert_allclose(o.data, f(x)) dx, = numerical_gradient_full(f, x, back_grad=grad) assert_allclose(x_arr.grad, dx) out = transpose(x, constant=True) assert out.constant and not x_arr.constant
def arange(stop, start=0, step=1, dtype=None, constant=False): """ Return a Tensor with evenly-spaced values within a given interval. Values are generated within [start, stop). Note that for non-integer steps, results may be inconsistent; you are better off using `linspace` instead. Parameters ---------- start : Real, optional, default=0 The start of the interval, inclusive. stop : Real The end of the interval, exclusive. step : Real, optional (default=1) The spacing between successive values. dtype : data-type, optional (default=None) The data type of the output Tensor, or None to infer from the inputs. constant : bool, optional (default=False) Whether the output Tensor is a constant Tensor. Returns ------- Tensor A Tensor of evenly-spaced values in [start, end). """ if start > stop: tmp = start start = stop stop = tmp return Tensor(np.arange(start, stop, step, dtype), constant=constant)
def eye(rows, cols=None, diag_idx=0, dtype=np.float32, constant=False): """ Return a 2D Tensor with ones on the diagonal and zeros elsewhere. Parameters ---------- rows : int The number of rows in the output Tensor. cols : int, optional (default=None) The number of columns in the output, or None to match `rows`. diag_idx : int, optional (default=0) The index of the diagonal. 0 is the main diagonal; a positive value is the upper diagonal, while a negative value refers to the lower diagonal. dtype : data-type, optional (default=numpy.float32) The data type of the output Tensor. constant : bool, optional (default=False) Whether the output Tensor is a constant Tensor. Returns ------- Tensor A tensor whose elements are 0, except for the :math:`k`-th diagonal, whose values are 1. """ return Tensor(np.eye(rows, cols, diag_idx, dtype), constant=constant)
def test_scalar_only_op(a_const, a_scalar_only, b_const, b_scalar_only): """ op produces scalar_only result unless result is scalar. """ a = Tensor(0, constant=a_const, _scalar_only=a_scalar_only) b = Tensor(0, constant=b_const, _scalar_only=b_scalar_only) out = Tensor._op(ScalarOnlyOp, a, b) scalar_only = True and not out.constant assert scalar_only is out.scalar_only # check out.backward() if scalar_only: with raises(Exception): out.backward() else: out.backward() # a, b, out are const (nothing computed)
def test_identical_inputs(): v1 = Tensor(2.0, constant=False) v2 = v1 + v1 v3 = v2 + v2 v3.backward(1.0) # v3 = 4 * v1 assert v3.data.item() == 8.0 assert v1.grad.item() == 4.0
def identity(n, dtype=np.float32, constant=False): """ Return the identity Tensor; a square Tensor with 1s on the main diagonal and 0s elsewhere. Parameters ---------- n : int The number of rows and columns in the output Tensor. dtype : data-type, optional (default=numpy.float32) The data type of the output Tensor. constant : bool, optional (default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- Tensor A square Tensor whose main diagonal is 1 and all other elements are 0. Examples -------- >>> importy mygrad as mg >>> mg.identity(3) Tensor([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]]) """ return Tensor(np.identity(n, dtype), constant=constant)
def test_moveaxis(x, data): src = data.draw(valid_axes(x.ndim, permit_none=False), label="source") dest = data.draw(valid_axes(x.ndim, permit_none=False), label="destination") assume(len(src) == len(dest)) x_arr = Tensor(np.copy(x)) o = moveaxis(x_arr, src, dest, constant=False) grad = data.draw(hnp.arrays(shape=o.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad") o.backward(grad) def f(x): return np.moveaxis(x, src, dest) assert_allclose(o.data, f(x)) dx, = numerical_gradient_full(f, x, back_grad=grad, as_decimal=True) assert_allclose(x_arr.grad, dx)
def add(a, b, constant=False): """ ``f(a, b) -> a + b`` Parameters ---------- a : array_like b : array_like constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor Examples -------- >>> import mygrad as mg >>> mg.add(1.0, 4.0) Tensor(5.0) >>> x1 = mg.arange(9.0).reshape((3, 3)) >>> x2 = mg.arange(3.0) >>> mg.add(x1, x2) # equivalent to `x1 + x2` Tensor([[ 0., 2., 4.], [ 3., 5., 7.], [ 6., 8., 10.]])""" return Tensor._op(Add, a, b, constant=constant)
def test_setitem_multiple_input(): """ Ensures proper backprop through computational graph in which variable that is set on serves as multiple inputs to a single operation. Ensures that null-gradient and clear-graph works properly. """ from mygrad import add_sequence x = Tensor([1.0]) y = x + 0 assert_array_equal(y.data, np.array([1.0])) o = add_sequence(y, y, y) y[0] = 4 assert_array_equal(y.data, np.array([4.0])) f = o * y # 3 * 4 f.backward() assert_array_equal(o.data, np.array([3.0])) assert_array_equal(f.data, np.array([12.0])) assert_array_equal(x.grad, np.array([12.0])) assert_array_equal(o.grad, np.array([4.0])) assert_array_equal(y.grad, np.array([3.0])) f.null_gradients() assert x.grad is None and not x._ops and not x._accum_ops assert y.grad is None and not y._ops and not y._accum_ops assert o.grad is None and not o._ops and not o._accum_ops assert f.grad is None and not f._ops and not f._accum_ops
def mean(x, axis=None, keepdims=False, constant=False): """ Mean of tensor elements over a given axis. Parameters ---------- x : array_like axis : Optional[int, Tuple[ints, ...] Axis or axes along which a mean is performed. The default, axis=None, will mean all of the elements of the input tensor. If axis is negative it counts from the last to the first axis. If axis is a tuple of ints, a mean is performed on all of the axes specified in the tuple instead of a single axis or all the axes as before. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input tensor. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mean_along_axis : Tensor A Tensor with the same shape as `self`, with the specified axis/axes removed. If `self` is a 0-d tensor, or if `axis` is None, a 0-dim Tensor is returned. Examples -------- >>> import mygrad as mg >>> import numpy as np >>> a = mg.Tensor([[1, 2], ... [3, 4]]) >>> mg.mean(a) Tensor(2.5) >>> mg.mean(a, axis=0) Tensor([ 2., 3.]) >>> mg.mean(a, axis=1) Tensor([ 1.5, 3.5]) In single precision, `mean` can be inaccurate: >>> a = mg.zeros((2, 512*512), dtype=np.float32) >>> a[0, :] = 1.0 >>> a[1, :] = 0.1 >>> mg.mean(a) Tensor(0.54999924) Computing the mean in float64 is more accurate: >>> mg.mean(a, dtype=np.float64) Tensor(0.55000000074505806) """ return Tensor._op(Mean, x, op_args=(axis, keepdims), constant=constant)
def multiclass_hinge(x, y_true, hinge=1.0, constant=False): """ Parameters ---------- x : array_like, shape=(N, K) The K class scores for each of the N pieces of data. y_true : array_like, shape=(N,) The correct class-indices, in [0, K), for each datum. hinge : float The size of the "hinge" outside of which a nonzero loss is incurred. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- The average multiclass hinge loss Raises ------ TypeError `y_true` must be an integer-type array-like object ValueError `x` must be a 2-dimensional array-like object `y_true` must be a shape-(N,) array-like object """ return Tensor._op(MulticlassHinge, x, op_args=(y_true, hinge), constant=constant)
def mean(x, axis=None, keepdims=False, constant=False): """ Mean of tensor elements over a given axis. Parameters ---------- x : array_like axis : Optional[int, Tuple[ints, ...] Axis or axes along which a mean is performed. The default, axis=None, will mean all of the elements of the input tensor. If axis is negative it counts from the last to the first axis. If axis is a tuple of ints, a mean is performed on all of the axes specified in the tuple instead of a single axis or all the axes as before. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input tensor. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mean_along_axis : Tensor A Tensor with the same shape as `self`, with the specified axis/axes removed. If `self` is a 0-d tensor, or if `axis` is None, a 0-dim Tensor is returned.""" return Tensor._op(Mean, x, op_args=(axis, keepdims), constant=constant)
def prod(a, axis=None, keepdims=False, constant=False): """ Return the product of array elements over given axes. Parameters ---------- a : array_like Input data. axis : Optional[int, Tuple[int, ...]] Axis or axes along which to operate. By default, flattened input is used. keepdims : bool, optional (default=False) If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input array. Returns ------- product_along_axis : mygrad.Tensor A tensor shaped as `a` but with the specified axis removed.""" return Tensor._op(Prod, a, op_kwargs=dict(axis=axis, keepdims=keepdims), constant=constant)
def min(x, axis=None, keepdims=False, constant=False): """ Return the minimum of a tensor or minimum along its axes. Parameters ---------- axis : Optional[int, Tuple[int, ...]] Axis or axes along which to operate. By default, flattened input is used. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original `arr`. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- min : mygrad.Tensor Minimum of `a`. If `axis` is None, the result is a 0-D tensor.""" return Tensor._op(MaxMin, x, op_kwargs=dict(axis=axis, keepdims=keepdims, maxmin='min'), constant=constant)
def test_1d_case(): x = np.array([[[17, 10, 15, 28, 25, 23], [44, 26, 18, 16, 39, 34], [5, 42, 36, 0, 2, 46], [30, 20, 1, 31, 35, 43]], [[6, 7, 45, 27, 11, 8], [37, 4, 41, 22, 9, 33], [47, 3, 13, 32, 21, 38], [19, 12, 40, 24, 14, 29]]]) x = Tensor(x) pool = (3, ) stride = (1, ) out = max_pool(x, pool, stride) out.backward(np.arange(out.data.size).reshape(out.shape)) fwd_ans = np.array([[[17, 28, 28, 28], [44, 26, 39, 39], [42, 42, 36, 46], [30, 31, 35, 43]], [[45, 45, 45, 27], [41, 41, 41, 33], [47, 32, 32, 38], [40, 40, 40, 29]]]) bkc_ans = np.array([[[0., 0., 0., 6., 0., 0.], [4., 5., 0., 0., 13., 0.], [0., 17., 10., 0., 0., 11.], [12., 0., 0., 13., 14., 15.]], [[0., 0., 51., 19., 0., 0.], [0., 0., 63., 0., 0., 23.], [24., 0., 0., 51., 0., 27.], [0., 0., 87., 0., 0., 31.]]]) assert isinstance(out, Tensor) assert_allclose(fwd_ans, out.data) assert_allclose(bkc_ans, x.grad) assert max_pool(x, pool, stride, constant=True).constant is True assert max_pool(x, pool, stride, constant=False).constant is False
def subtract(a, b, constant=False): """ ``f(a, b) -> a - b`` Parameters ---------- a : array_like b : array_like constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor Examples -------- >>> import mygrad as mg >>> mg.subtract(1.0, 4.0, constant=True) # resulting tensor will not back-propagate a gradient Tensor(5.0) >>> x1 = mg.arange(9.0).reshape((3, 3)) >>> x2 = mg.arange(3.0) >>> mg.subtract(x2, x1) # equivalent to `x2 - x1` Tensor([[ 0., 0., 0.], [ 3., 3., 3.], [ 6., 6., 6.]]) """ return Tensor._op(Subtract, a, b, constant=constant)
def empty_like(other, dtype=None, constant=False): """ Return a new Tensor of the same shape and type as the given array. Parameters ---------- other : Union[Tensor, ArrayLike] The Tensor or array whose shape and datatype should be mirrored. dtype : data-type, optional (default=None) Override the data type of the returned Tensor with this value, or None to not override. constant : bool, optional (default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- Tensor A tensor of uninitialized data whose shape and type match `other`. Examples -------- >>> import mygrad as mg >>> x = mg.arange(4).reshape >>> mg.empty(x, constant=True) Tensor([[ -9.74499359e+001, 6.69583040e-309], [ 2.13182611e-314, 3.06959433e-309]]) #random >>> mg.empty(x, dtype=int) Tensor([[-1073741821, -1067949133], [ 496041986, 19249760]]) #random """ return Tensor(np.empty_like(asarray(other), dtype), constant=constant)
def test_setitem_bool_axes_index(x, data): """ index consists of boolean arrays specified for each axis """ index = data.draw( st.tuples(hnp.arrays(shape=(3, ), dtype=bool), hnp.arrays(shape=(3, ), dtype=bool))) try: o = np.asarray(x[index]) except IndexError: return None y = data.draw( hnp.arrays( shape=broadcastable_shape(o.shape, max_dim=o.ndim), dtype=float, elements=st.floats(-10.0, 10.0), ), label="y", ) grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x0 = np.copy(x) y0 = np.copy(y) x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) x1_arr = x_arr[:] x1_arr[index] = y_arr (x1_arr * grad).sum().backward() x0[index] = y0 assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full(setitem, x, y, back_grad=grad, kwargs=dict(index=index)) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_setitem_bool_basic_index(x, data): """ index mixes boolean and basic indexing""" index = (np.array([False, True, False, True]), np.newaxis, slice(None)) o = np.asarray(x[index]) y = data.draw( hnp.arrays( shape=broadcastable_shapes(o.shape, max_dims=o.ndim), dtype=float, elements=st.floats(-10.0, 10.0), ), label="y", ) grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x0 = np.copy(x) y0 = np.copy(y) try: x0[index] = y0 # don't permit invalid set-items except ValueError: assume(False) return x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) x1_arr = x_arr[:] x1_arr[index] = y_arr (x1_arr * grad).sum().backward() assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full(setitem, x, y, back_grad=grad, kwargs=dict(index=index)) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_reshape_method_fwd(a): new_shape = gen_shape(a.size) x = Tensor(a).reshape(new_shape) a = a.reshape(new_shape) assert x.shape == a.shape, "Tensor.reshape failed" assert_allclose(a, x.data), "Tensor.reshape failed"
def test_transpose_property(): dat = np.arange(6).reshape(2, 3) x = Tensor(dat) f = x.T f.backward(dat.T) assert_allclose(f.data, dat.T) assert_allclose(x.grad, dat)
def margin_ranking_loss(x1, x2, y, margin, constant=False): r"""Computes the margin average margin ranking loss. Equivalent to:: >>> import mygrad as mg >>> mg.mean(mg.maximum(0, margin - y * (x1 - x2))) Parameters ---------- x1 : array_like, shape=(N,) or (N, D) A batch of scores or descriptors to compare against those in `x2` x2 : array_like, shape=(N,) or (N, D) A batch of scores or descriptors to compare against those in `x1` y : Union[int, array_like], scalar or shape=(N,) 1 or -1. Specifies whether the margin is compared against `(x1 - x2)` or `(x2 - x1)`, for each of the N comparisons. margin : float A non-negative value to be used as the margin for the loss. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor, shape=() The mean margin ranking loss. """ if not 0 < x1.ndim < 3: raise ValueError("`x1` must have shape (N,) or (N, D)") if not x1.shape == x2.shape: raise ValueError("`x1` and `x2` must have the same shape") if not np.issubdtype(x1.dtype, np.floating): raise TypeError("`x1` must contain floats") if not np.issubdtype(x2.dtype, np.floating): raise TypeError("`x2` must contain floats") if not isinstance(margin, Real) or margin < 0: raise ValueError("`margin` must be a non-negative scalar") y = asarray(y) if y.size == 1: y = np.array(y.item()) if not y.ndim == 0 and not (y.ndim == 1 and len(y) == len(x1)): raise ValueError("`y` must be a scalar or shape-(N,) array of ones") if y.ndim: if x1.ndim == 2: y = y.reshape(-1, 1) return Tensor._op(MarginRanking, x1, x2, op_args=(y, margin), constant=constant)
def test_setitem_adv_int_index(x, data): """ index consists of a tuple of integer-valued arrays """ index = data.draw(adv_integer_index(x.shape), label="index") o = np.asarray(x[index]) y = data.draw( hnp.arrays( shape=broadcastable_shapes(o.shape, max_dims=o.ndim, max_side=max(o.shape)), dtype=float, elements=st.floats(-10.0, 10.0), ) if o.shape and o.size else st.floats(-10.0, 10.0).map(lambda _x: np.array(_x)), label="y", ) grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x0 = np.copy(x) y0 = np.copy(y) x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) try: x0[index] = y0 # don't permit invalid set-items except ValueError: assume(False) return x1_arr = x_arr[:] x1_arr[index] = y_arr (x1_arr * grad).sum().backward() assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full( setitem, x, y, back_grad=grad, kwargs=dict(index=index) ) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_setitem_broadcast_index(x, data): """ index is two broadcast-compatible integer arrays""" # test broadcast-compatible int-arrays rows = np.array([0, 3], dtype=np.intp) columns = np.array([0, 2], dtype=np.intp) index = np.ix_(rows, columns) o = np.asarray(x[index]) y = data.draw( hnp.arrays( shape=broadcastable_shape(o.shape, max_dim=o.ndim), dtype=float, elements=st.floats(-10.0, 10.0), ), label="y", ) grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x0 = np.copy(x) y0 = np.copy(y) x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) x1_arr = x_arr[:] x1_arr[index] = y_arr (x1_arr * grad).sum().backward() x0[index] = y0 assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full(setitem, x, y, back_grad=grad, kwargs=dict(index=index)) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_reshape_fwd(a): new_shape = gen_shape(a.size) x = Tensor(a) x = reshape(x, new_shape, constant=True) a = a.reshape(new_shape) assert x.shape == a.shape, "Tensor.reshape failed" assert_allclose(a, x.data), "Tensor.reshape failed"
def test_chainrule_scalar(x, y, z): x = Tensor(x) y = Tensor(y) z = Tensor(z) f = x * y + z g = x + z * f * f # check side effects unused = 2 * g - f w = 1 * f g.backward() assert np.allclose(f.grad, 2 * z.data * f.data) assert np.allclose(x.grad, 1 + 2 * z.data * f.data * y.data) assert np.allclose(y.grad, 2 * z.data * f.data * x.data) assert np.allclose(z.grad, f.data**2 + z.data * 2 * f.data) assert w.grad is None