예제 #1
0
def huber_loss(x, y, *, delta=1):
    ''' Returns the Huber loss (smooth L1).

    Parameters
    ----------
    outputs : mygrad.Tensor, shape=(N, any)
        The output for each of the N pieces of data.

    targets : Union[mygrad.Tensor, numpy.ndarray], shape=(N, any)
        The target for each datum.

    delta : Real > 0, optional (default=1)
        The value under which to use a squared error.

    Returns
    -------
    mygrad.Tensor, shape=()
        The average Huber loss.

    Extended Description
    --------------------
    The Huber loss is given by

    .. math::
        L_\delta(x, y) = \frac{1}{N}\sum\limits_1^N \bigl\{ \begin{array}{l l} 
            \frac{(x_i - y_i)^2}{2} & |x_i - y_i| \leq \delta\\
            \delta|x_i - y_i| - \frac{\delta}{2} & |x_i - y_i| > \delta\end{array}

    '''
    return Tensor._op(HuberLoss, x, op_args=(y, delta))
예제 #2
0
def softmax_focal_loss(x, y, *, alpha=1, gamma=0, constant=False):
    """
    Parameters
    ----------
    outputs : mygrad.Tensor, shape=(N, C)
        The C class scores for each of the N pieces of data.

    targets : array_like, shape=(N,)
        The correct class indices, in [0, C), for each datum.

    alpha : Real, optional (default=1)
        The ɑ weighting factor in the loss formulation.

    gamma : Real, optional (default=0)
        The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss.

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    mygrad.Tensor, shape=(N,)
        The per-datum focal loss.
    """
    return Tensor._op(SoftmaxFocalLoss,
                      x,
                      op_args=(y, alpha, gamma),
                      constant=constant)
예제 #3
0
def batchnorm(x, *, gamma=None, beta=None, eps, constant=False):
    """
    Performs batch normalization on ``x``::

                 y(x) = (x - E[x]) / sqrt(Var[x] + eps)
                 batchnorm(x) = gamma * y(x) + beta

    Where :math:`E[x]` and :math:`Var[x]` represent the mean and variance, respectively,
    over axis-1 of ``x``. The subsequent affine transformation on ``y``
    is optional.

    Parameters
    ----------
    x : array_like, shape=(N, C, ...)
        The batch to be normalized within each entry of C

    gamma : Optional[array_like], shape=(C,)
        Optional per-channel scaling factors to be applied after the
        normalization step.

    beta  : Optional[array_like], shape=(C,)
        Optional per-channel scaling bias factors to be applied after the
        normalization step.

    eps : Real
       A small non-negative number.

    constant : bool, optional (default=False)
        If True, the resulting Tensor is a constant.

    Returns
    -------
    mygrad.Tensor
        The batch-normalized data.

    Examples
    --------
    >>> import mygrad as mg
    >>> from mygrad.nnet import batchnorm
    >>> x = mg.Tensor([1., 4., 1.]).reshape(3, 1)
    >>> batchnorm(x, eps=0)
    Tensor([[-0.70710678],
            [ 1.41421356],
            [-0.70710678]])
    """
    # pass gamma and beta as empty arrays if they are not supplied
    if gamma is None:
        gamma = np.array([])
    if beta is None:
        beta = np.array([])
    return Tensor._op(BatchNorm,
                      x,
                      gamma,
                      beta,
                      op_kwargs=dict(eps=eps),
                      constant=constant)
예제 #4
0
def focal_loss(class_probs, targets, *, alpha=1, gamma=0, constant=False):
    r""" Return the per-datum focal loss.

    Parameters
    ----------
    class_probs : mygrad.Tensor, shape=(N, C)
        The C class probabilities for each of the N pieces of data.
        Each value is expected to lie on (0, 1]

    targets : Sequence[int], shape=(N,)
        The correct class indices, in [0, C), for each datum.

    alpha : Real, optional (default=1)
        The ɑ weighting factor in the loss formulation.

    gamma : Real, optional (default=0)
        The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss.
        Must be a non-negative value.

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    mygrad.Tensor, shape=(N,)
        The per-datum focal loss.

    Notes
    -----
    The formulation for the focal loss introduced in https://arxiv.org/abs/1708.02002.
    It is given by -ɑ(1-p)ˠlog(p).


    The focal loss for datum-:math:`i` is given by

    .. math::
        -\alpha \hat{y}_i(1-p_i)^\gamma\log(p_i)

    where :math:`\hat{y}_i` is one in correspondence to the label associated with the
    datum and 0 elsewhere. That is, if the label :math:`y_k` is 2 and
    there are four possible label values, then :math:`\hat{y}_k = (0, 0, 1, 0)`.

    It is recommended in the paper that you normalize by the number of foreground samples.
    """
    if not isinstance(gamma, Real) or gamma < 0:
        raise ValueError(
            f"`gamma` must be a non-negative number, got: {gamma}")

    return Tensor._op(FocalLoss,
                      class_probs,
                      op_args=(targets, alpha, gamma),
                      constant=constant)
예제 #5
0
def softmax_focal_loss(scores, targets, *, alpha=1, gamma=0, constant=False):
    r"""
    Applies the softmax normalization to the input scores before computing the
    per-datum focal loss.

    Parameters
    ----------
    scores : mygrad.Tensor, shape=(N, C)
        The C class scores for each of the N pieces of data.

    targets : array_like, shape=(N,)
        The correct class indices, in [0, C), for each datum.

    alpha : Real, optional (default=1)
        The ɑ weighting factor in the loss formulation.

    gamma : Real, optional (default=0)
        The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss.

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    mygrad.Tensor, shape=(N,)
        The per-datum focal loss.

    Notes
    -----
    The formulation for the focal loss introduced in https://arxiv.org/abs/1708.02002.
    It is given by -ɑ(1-p)ˠlog(p).

    The focal loss for datum-:math:`i` is given by

    .. math::
        -\alpha \hat{y}_i(1-p_i)^\gamma\log(p_i)

    where :math:`\hat{y}_i` is one in correspondence to the label associated with the
    datum and 0 elsewhere. That is, if the label :math:`y_k` is 2 and
    there are four possible label values, then :math:`\hat{y}_k = (0, 0, 1, 0)`.

    It is recommended in the paper that you normalize by the number of foreground samples.
    """
    return Tensor._op(SoftmaxFocalLoss,
                      scores,
                      op_args=(targets, alpha, gamma),
                      constant=constant)
예제 #6
0
def elu(x, alpha, constant=False):
    """ Returns the exponential linear activation (ELU) elementwise along x.

    The ELU is given by  `ɑ(exp(x) - 1) for x < 0 and x for x ≥ 0`.

    Parameters
    ----------
    x : mygrad.Tensor
        Input data.

    alpha : Real
        The multiplicative factor on the negative activation.

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    mygrad.Tensor
        The ELU function applied to `x` elementwise.

    Examples
    --------
    >>> import mygrad as mg
    >>> from mygrad.nnet.activations import elu
    >>> x = mg.arange(-5, 6)
    >>> x
    Tensor([-5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5])
    >>> y = elu(x, alpha=0.1); y
    Tensor([-0.09932621, -0.09816844, -0.09502129, -0.08646647, -0.06321206,
             0.        ,  1.        ,  2.        ,  3.        ,  4.        ,
             5.        ])
    >>> y.backward()
    >>> x.grad
    array([6.73794700e-04, 1.83156389e-03, 4.97870684e-03, 1.35335283e-02,
           3.67879441e-02, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
           1.00000000e+00, 1.00000000e+00, 1.00000000e+00])
    """
    if isinstance(alpha, (np.ndarray, Tensor)):
        alpha = alpha.item()

    if not isinstance(alpha, Real):
        raise TypeError(
            f"`alpha` must be a real-valued scalar, got {alpha} (type {type(alpha)})"
        )

    return Tensor._op(ELU, x, op_args=(alpha,), constant=constant)
예제 #7
0
파일: elu.py 프로젝트: IanCoolidge0/MyNN
def elu(x, alpha):
    ''' Returns the exponential linear activation (ELU) elementwise along x. The ELU is given by 
    ɑ(exp(x) - 1) for x < 0 and x for x ≥ 0.

    Parameters
    ----------
    x : mygrad.Tensor
        Input data.

    alpha : Real
        The multiplicative factor on the negative activation.

    Returns
    -------
    mygrad.Tensor
        The ELU function applied to `x` elementwise.
    '''
    return Tensor._op(ELU, x, op_args=(alpha,))
예제 #8
0
def softmax_focal_loss(x, y, *, alpha=1, gamma=0):
    """
    Parameters
    ----------
    outputs : mygrad.Tensor, shape=(N, C)
        The C class scores for each of the N pieces of data.

    targets : Sequence[int], shape=(N,)
        The correct class indices, in [0, C), for each datum.

    alpha : Real, optional (default=1)
        The ɑ weighting factor in the loss formulation.

    gamma : Real, optional (default=0)
        The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss.

    Returns
    -------
    mygrad.Tensor
        The average focal loss.
    """
    return Tensor._op(SoftmaxFocalLoss, x, op_args=(y, alpha, gamma))
예제 #9
0
def selu(x):
    ''' Returns the scaled exponential linear activation (SELU) elementwise along x. The SELU is
    given by  λɑ(exp(x) - 1) for x < 0 and λx for x ≥ 0.

    Parameters
    ----------
    x : mygrad.Tensor
        Input data.

    Returns
    -------
    mygrad.Tensor
        The SELU function applied to `x` elementwise.

    Notes
    -----
    The SELU activation was proposed in the paper
        Self-Normalizing Neural Networks
        Günter Klambauer, Thomas Unterthiner, Andreas Mayr, Sepp Hochreiter
    at https://arxiv.org/abs/1706.02515
    '''
    return Tensor._op(SELU, x)
예제 #10
0
def selu(x, constant=False):
    """ Returns the scaled exponential linear activation (SELU) elementwise along x.

    The SELU is given by  λɑ(exp(x) - 1) for x < 0 and λx for x ≥ 0.

    Parameters
    ----------
    x : mygrad.Tensor
        Input data.

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    mygrad.Tensor
        The SELU function applied to `x` elementwise.

    Notes
    -----
    The SELU activation was proposed in the paper
        Self-Normalizing Neural Networks
        Günter Klambauer, Thomas Unterthiner, Andreas Mayr, Sepp Hochreiter
    at https://arxiv.org/abs/1706.02515

    Examples
    --------
    >>> import mygrad as mg
    >>> from mygrad.nnet.activations import selu
    >>> x = mg.arange(-5, 6)
    >>> x
    Tensor([-5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5])
    >>> y = elu(x, alpha=0.1); y
    Tensor([-1.74625336, -1.72589863, -1.67056873, -1.52016647, -1.11133074,
         0.        ,  1.05070099,  2.10140197,  3.15210296,  4.20280395,
         5.25350494])
    """
    return Tensor._op(SELU, x, constant=constant)
예제 #11
0
def batchnorm(x, *, gamma=None, beta=None, eps, constant=False):
    """
    Performs batch normalization on ``x``

                 y(x) = (x - E[x]) / sqrt(Var[x] + eps)
                 batchnorm(x) = gamma * y(x) + beta

    Where E[x] and Var[x] represent the mean and variance, respectively,
    over axis-1 of ``x``. The subsequent affine transformation on ``y``
    is optional.

    Parameters
    ----------
    x : array_like, shape=(N, C, ...)
        The batch to be normalized within each entry of C

    gamma : Optional[array_like], shape=(C,)

    beta  : Optional[array_like], shape=(C,)

    eps : Real
       A small non-negative number.

    constant    constant : bool, optional (default=False)
        If True, the resulting Tensor is a constant.

    Returns
    -------
    mygrad.Tensor
        The batch-normalized data.
    """
    # pass gamma and beta as empty arrays if they are not supplied
    if gamma is None:
        gamma = np.array([])
    if beta is None:
        beta = np.array([])
    return Tensor._op(BatchNorm, x, gamma, beta, op_kwargs=dict(eps=eps), constant=constant)
예제 #12
0
def einsum(*operands, optimize=False, constant=False):
    r"""
    einsum(subscripts, *operands)

    Evaluates the Einstein summation convention on the operands. This implementation
    exactly mirrors that of ``numpy.einsum`` and supports back-propagation through
    all variety of tensor-products, sums, traces, and views that it can perform.

    The following docstring was adapted from the documentation for ``numpy.einsum``

    Using the Einstein summation convention, many common multi-dimensional
    array operations can be represented in a simple fashion.  This function
    provides a way to compute such summations. The best way to understand this
    function is to try the examples below, which show how many common NumPy/MyGrad
    functions can be implemented as calls to ``einsum``.

    Back-propagation via ``einsum`` is optimized such that any tensor that occurs
    redundantly within the summation will only have its gradient computed once.
    This optimization accommodates all number and combination of redundancies that can
    be encountered.

    E.g. back-propping through ``einsum('...,...->', x, x)`` will only incur a single
    computation/accumulation for ``x.grad`` rather than two. This permits users to
    leverage the efficiency of sum-reduction, where ``(x ** 2).sum()`` is sub-optimal,
    without being penalized during back-propagation.

    Parameters
    ----------
    subscripts : str
        Specifies the subscripts for summation.

    operands : array_like
        The tensors used in the summation.

    optimize : {False, True, 'greedy', 'optimal'}, optional (default=False)
        Controls if intermediate optimization should occur; also enables
        the use of BLAS where possible. This can produce significant speedups
        for computations like matrix multiplication.

        No optimization will occur if False and True will default to the 'greedy'
        algorithm. Also accepts an explicit contraction list from the
        ``np.einsum_path`` function. See ``np.einsum_path`` for more details.

    constant : bool, optional (default=False)
        If True, the resulting Tensor is a constant.

    Returns
    -------
    output : mygrad.Tensor
        The calculation based on the Einstein summation convention.

    Notes
    -----
    The subscripts string is a comma-separated list of subscript labels,
    where each label refers to a dimension of the corresponding operand.
    Repeated subscripts labels in one operand take the diagonal.  For example,
    ``einsum('ii', a)`` is equivalent to ``np.trace(a)`` (however, the former
    supports back-propagation).

    Whenever a label is repeated, it is summed, so ``einsum('i, i', a, b)``
    is equivalent to ``np.inner(a, b)``.  If a label appears only once,
    it is not summed, so ``einsum('i', a)`` produces a view of ``a``
    with no changes.

    The order of labels in the output is by default alphabetical.  This
    means that ``np.einsum('ij', a)`` doesn't affect a 2D tensor, while
    ``einsum('ji', a)`` takes its transpose.

    The output can be controlled by specifying output subscript labels
    as well.  This specifies the label order, and allows summing to
    be disallowed or forced when desired.  The call ``einsum('i->', a)``
    is like ``np.sum(a, axis=-1)``, and ``einsum('ii->i', a)``
    is like ``np.diag(a)``.  The difference is that `einsum` does not
    allow broadcasting by default.

    To enable and control broadcasting, use an ellipsis.  Default
    NumPy-style broadcasting is done by adding an ellipsis
    to the left of each term, like ``einsum('...ii->...i', a)``.
    To take the trace along the first and last axes,
    you can do ``einsum('i...i', a)``, or to do a matrix-matrix
    product with the left-most indices instead of rightmost, you can do
    ``einsum('ij...,jk...->ik...', a, b)``.

    When there is only one operand, no axes are summed, and no output
    parameter is provided, a view into the operand is returned instead
    of a new tensor.  Thus, taking the diagonal as ``einsum('ii->i', a)``
    produces a view.

    An alternative way to provide the subscripts and operands is as
    ``einsum(op0, sublist0, op1, sublist1, ..., [sublistout])``. The examples
    below have corresponding `einsum` calls with the two parameter methods.

    Examples
    --------
    >>> import mygrad as mg
    >>> import numpy as np
    >>> a = mg.arange(25).reshape(5,5)
    >>> b = mg.arange(5)
    >>> c = mg.arange(6).reshape(2,3)

    Compute the trace of ``a``, :math:`\sum_{i}{A_{ii}} = f`:

    >>> einsum('ii', a)
    Tensor(60)
    >>> einsum(a, [0, 0])
    Tensor(60)
    >>> np.trace(a.data)
    array(60)

    Return a view along the diagonal of ``a``, :math:`A_{ii} = F_{i}`:

    >>> einsum('ii->i', a)
    Tensor([ 0,  6, 12, 18, 24])
    >>> einsum(a, [0,0], [0])
    Tensor([ 0,  6, 12, 18, 24])
    >>> np.diag(a.data)
    array([ 0,  6, 12, 18, 24])

    Compute the matrix-vector product of ``a`` with ``b``, :math:`\sum_{j}{A_{ij} B_{j}} = F_{i}`:

    >>> einsum('ij,j', a, b)
    Tensor([ 30,  80, 130, 180, 230])
    >>> einsum(a, [0,1], b, [1])
    Tensor([ 30,  80, 130, 180, 230])
    >>> mg.matmul(a, b)
    Tensor([ 30,  80, 130, 180, 230])
    >>> einsum('...j,j', a, b)
    Tensor([ 30,  80, 130, 180, 230])

    Take the transpose of ``c``, :math:`C_{ji} = F_{ij}`:

    >>> einsum('ji', c)
    Tensor([[0, 3],
            [1, 4],
            [2, 5]])
    >>> einsum(c, [1, 0])
    Tensor([[0, 3],
            [1, 4],
            [2, 5]])
    >>> c.T
    Tensor([[0, 3],
            [1, 4],
            [2, 5]])

    Compute ``3 * c``:

    >>> einsum('..., ...', 3, c)
    Tensor([[ 0,  3,  6],
            [ 9, 12, 15]])
    >>> einsum(',ij', 3, c)
    Tensor([[ 0,  3,  6],
            [ 9, 12, 15]])
    >>> einsum(3, [Ellipsis], c, [Ellipsis])
    Tensor([[ 0,  3,  6],
            [ 9, 12, 15]])
    >>> 3 * c
    Tensor([[ 0,  3,  6],
            [ 9, 12, 15]])

    Compute the inner product of ``b`` with itself, :math:`\sum_{i}{B_{i} B_{i}} = f`:

    >>> einsum('i,i', b, b)
    Tensor(30)
    >>> einsum(b, [0], b, [0])
    Tensor(30)
    >>> np.inner(b.data, b.data)
    30

    Compute the outer product of ``array([1, 2])`` with ``b``, :math:`A_{i}B_{j} = F_{ij}`:

    >>> einsum('i,j', np.arange(2)+1, b)
    Tensor([[0, 1, 2, 3, 4],
           [0, 2, 4, 6, 8]])
    >>> einsum(np.arange(2)+1, [0], b, [1])
    Tensor([[0, 1, 2, 3, 4],
           [0, 2, 4, 6, 8]])
    >>> np.outer(np.arange(2)+1, b)
    array([[0, 1, 2, 3, 4],
           [0, 2, 4, 6, 8]])
    >>> einsum('i...->...', a)
    Tensor([50, 55, 60, 65, 70])
    >>> einsum(a, [0,Ellipsis], [Ellipsis])
    Tensor([50, 55, 60, 65, 70])
    >>> np.sum(a, axis=0)
    array([50, 55, 60, 65, 70])

    Compute the tensor product :math:`\sum_{ij}{A_{ijk} B_{jil}} = F_{kl}`

    >>> a = mg.arange(60.).reshape(3,4,5)
    >>> b = mg.arange(24.).reshape(4,3,2)
    >>> einsum('ijk,jil->kl', a, b)
    Tensor([[ 4400.,  4730.],
            [ 4532.,  4874.],
            [ 4664.,  5018.],
            [ 4796.,  5162.],
            [ 4928.,  5306.]])
    >>> einsum(a, [0,1,2], b, [1,0,3], [2,3])
    Tensor([[ 4400.,  4730.],
            [ 4532.,  4874.],
            [ 4664.,  5018.],
            [ 4796.,  5162.],
            [ 4928.,  5306.]])
    >>> np.tensordot(a,b, axes=([1,0],[0,1]))
    array([[ 4400.,  4730.],
            [ 4532.,  4874.],
            [ 4664.,  5018.],
            [ 4796.,  5162.],
            [ 4928.,  5306.]])

    Matrix multiply ``a.T`` with ``b.T``, :math:`\sum_{k}{A_{ki} B_{jk}} = F_{ij}`

    >>> a = mg.arange(6).reshape((3,2))
    >>> b = mg.arange(12).reshape((4,3))
    >>> einsum('ki,jk->ij', a, b)
    Tensor([[10, 28, 46, 64],
            [13, 40, 67, 94]])
    >>> einsum('ki,...k->i...', a, b)
    Tensor([[10, 28, 46, 64],
            [13, 40, 67, 94]])
    >>> einsum('k...,jk', a, b)
    Tensor([[10, 28, 46, 64],
            [13, 40, 67, 94]])

    Make an assignment to a view along the diagonal of ``a``:

    >>> a = mg.zeros((3, 3))
    >>> einsum('ii->i', a).data[:] = 1
    >>> a
    Tensor([[ 1.,  0.,  0.],
            [ 0.,  1.,  0.],
            [ 0.,  0.,  1.]])
    """

    # TODO: normalize error handling for invalid inputs
    operands = list(operands)
    if isinstance(operands[0], str):
        # operands form: "ijk, ijk", x, y
        variables = operands[1:]
        if any(isinstance(i, Tensor) for i in operands):
            operands[1:] = (
                var.data if isinstance(var, Tensor) else var for var in operands[1:]
            )
    else:
        # operands form: op0, sublist0, op1, sublist1, ..., [sublistout]
        end = -1 if len(operands) % 2 else None  # -1 if sublistout is included
        variables = operands[:end:2]
        if any(isinstance(i, Tensor) for i in operands):
            operands[:end:2] = (
                var.data if isinstance(var, Tensor) else var for var in operands[:end:2]
            )

    in_lbls, out_lbls, _ = _parse_einsum_input(operands)
    return Tensor._op(
        EinSum,
        *variables,
        op_kwargs=dict(in_lbls=in_lbls, out_lbls=out_lbls, optimize=optimize),
        constant=constant
    )
예제 #13
0
def matmul(a, b, constant=False):
    r"""
    Matrix product of two tensors:

    ``matmul(x, y)`` is equivalent to ``x @ y``.

    This documentation was adapted from ``numpy.matmul``

    The behavior depends on the arguments in the following way.

    - If both arguments are 2-D they are multiplied like conventional
      matrices.
    - If either argument is N-D, N > 2, it is treated as a stack of
      matrices residing in the last two indexes and broadcast accordingly.
    - If the first argument is 1-D, it is promoted to a matrix by
      prepending a 1 to its dimensions. After matrix multiplication
      the prepended 1 is removed.
    - If the second argument is 1-D, it is promoted to a matrix by
      appending a 1 to its dimensions. After matrix multiplication
      the appended 1 is removed.

    Multiplication by a scalar is not allowed, use ``*`` instead. Note that
    multiplying a stack of matrices with a vector will result in a stack of
    vectors, but matmul will not recognize it as such.

    ``matmul`` differs from ``numpy.dot`` in two important ways.

    - Multiplication by scalars is not allowed.
    - Stacks of matrices are broadcast together as if the matrices
      were elements.


    Parameters
    ----------
    a : array_like

    b : array_like

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    output : mygrad.Tensor
        Returns the matrix product of `a` and `b`.  If `a` and `b` are both
        1-D arrays then a scalar is returned; otherwise an array is
        returned.


    Raises
    ------
    ValueError
        If the last dimension of `a` is not the same size as
        the second-to-last dimension of `b`.

        If scalar value is passed.

    Notes
    -----
    The matmul function implements the semantics of the `@` operator introduced
    in Python 3.5 following PEP465.

    Examples
    --------
    For two 2D tensors, ``matmul(a, b)`` is the matrix product :math:`\sum_{j}{A_{ij} B_{jk}} = F_{ik}`:

    >>> import mygrad as mg
    >>> a = [[1, 0], [0, 1]]
    >>> b = [[4, 1], [2, 2]]
    >>> mg.matmul(a, b)
    Tensor([[4, 1],
            [2, 2]])

    For 2-D mixed with 1-D, the result is the matrix-vector product, :math:`\sum_{j}{A_{ij} B_{j}} = F_{i}`:

    >>> a = [[1, 0], [0, 1]]
    >>> b = [1, 2]
    >>> mg.matmul(a, b)
    Tensor([1, 2])

    Broadcasting is conventional for stacks of arrays. Here ``a`` is treated
    like a stack of three 5x6 matrices, and the 6x4 matrix ``b`` is broadcast
    matrix-multiplied against each one. This produces a shape-(3, 5, 4) tensor
    as a result.

    >>> a = mg.arange(3*5*6).reshape((3,5,6))
    >>> b = mg.arange(6*4).reshape((6,4))
    >>> mg.matmul(a,b).shape
    (3, 5, 4)

    Scalar multiplication raises an error.

    >>> mg.matmul(a, 3)
    Traceback (most recent call last):
    ...
    ValueError: Scalar operands are not allowed, use '*' instead"""
    return Tensor._op(MatMul, a, b, constant=constant)
예제 #14
0
def old_op(a):
    return Tensor._op(OldOperation, a)
예제 #15
0
def where(condition, x=not_set, y=not_set, constant=False):
    """
    where(condition, [x, y])

    Return elements chosen from `x` or `y` depending on `condition`.

    .. note::
        When only ``condition`` is provided, this function is a shorthand for
        ``np.asarray(condition).nonzero()``. The rest of this
        documentation covers only the case where all three arguments are
        provided.

    This docstring was adapted from that of ``numpy.where``.

    Parameters
    ----------
    condition : array_like, bool
        Where True, yield `x`, otherwise yield ``y``. ``x``, ``y``
        and `condition` need to be broadcastable to some shape.

    x : array_like
        Values from which to chosen where ``condition`` is ``True``.

    y : array_like
       Values from which to chosen where ``condition`` is ``False``.

    constant : bool, optional(default=False)
        If ``True``, the returned tensor is a constant (it
        does not back-propagate a gradient)

    Returns
    -------
    out : mygrad.Tensor
        A tensor with elements from `x` where `condition` is True, and elements
        from `y` elsewhere.

    Examples
    --------
    >>> import mygrad as mg
    >>> a = mg.arange(10)
    >>> a
    Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    >>> mg.where(a < 5, a, 10*a)
    Tensor([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])

    This can be used on multidimensional tensors too:

    >>> mg.where([[True, False], [True, True]],
    ...          [[1, 2], [3, 4]],
    ...          [[9, 8], [7, 6]])
    Tensor([[1, 8],
            [3, 4]])

    The shapes of x, y, and the condition are broadcast together:

    >>> x, y = np.ogrid[:3, :4]
    >>> mg.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
    Tensor([[10,  0,  0,  0],
            [10, 11,  1,  1],
            [10, 11, 12,  2]])

    >>> a = mg.Tensor([[0, 1, 2],
    ...                [0, 2, 4],
    ...                [0, 3, 6]])
    >>> mg.where(a < 4, a, -1)  # -1 is broadcast
    Tensor([[ 0,  1,  2],
            [ 0,  2, -1],
            [ 0,  3, -1]])
    """
    if x is not_set and y is not_set:
        return np.where(asarray(condition))

    if x is not_set or y is not_set:
        raise ValueError("either both or neither of x and y should be given")

    return Tensor._op(
        Where, x, y, op_kwargs=dict(condition=condition), constant=constant
    )