Beispiel #1
0
    def bprop(x, z, out, dout):
        if mean_flag:
            if F.issubclass_(F.typeof(dout), mstype.tensor):
                if do_mirror:
                    z = F.depend(z, F.assign_add(z, dout))
                    real_grad = all_reduce(z)
                    dx = real_grad
                else:
                    dx = dout
                float_one = F.scalar_cast(1.0, F.dtype(dx))
                num = F.scalar_cast(dev_num, F.dtype(dx))
                dx = mul(dx,
                         cast(F.scalar_to_array(float_one / num), F.dtype(dx)))
            else:
                dx = zeros_like(
                    x)  # The grad accumulation do not support row tensor now
        else:
            if F.issubclass_(F.typeof(dout), mstype.tensor):
                if do_mirror:
                    z = F.depend(z, F.assign_add(z, dout))
                    real_grad = all_reduce(z)
                    dx = real_grad
                else:
                    dx = dout
            else:
                dx = zeros_like(
                    x)  # The grad accumulation do not support row tensor now

        return (dx, zeros_like(z))
Beispiel #2
0
    def bprop(x, out, dout):
        if mean_flag:
            if F.issubclass_(F.typeof(dout), mstype.tensor):
                dx = all_reduce(dout)
                float_one = F.scalar_cast(1.0, F.dtype(dx))
                num = F.scalar_cast(dev_num, F.dtype(dx))
                dx = mul(dx,
                         cast(F.scalar_to_array(float_one / num), F.dtype(dx)))
            else:
                indices = all_gather(dout.indices)
                grad = all_gather(dout.values)
                float_one = F.scalar_cast(1.0, F.dtype(grad))
                num = F.scalar_cast(dev_num, F.dtype(grad))
                grad = mul(
                    grad,
                    cast(F.scalar_to_array(float_one / num), F.dtype(grad)))
                dx = RowTensor(indices, grad, dout.dense_shape)
        else:
            if F.issubclass_(F.typeof(dout), mstype.tensor):
                dx = all_reduce(dout)
            else:
                indices = all_gather(dout.indices)
                grad = all_gather(dout.values)
                dx = RowTensor(indices, grad, dout.dense_shape)

        return (dx, )
Beispiel #3
0
 def bprop(x, out, dout):
     if F.issubclass_(F.typeof(dout), mstype.tensor):
         dx = all_reduce_grad(dout)
     else:
         indices = all_gather(dout.indices)
         grad = all_gather(dout.values)
         dx = RowTensor(indices, grad, dout.dense_shape)
     return (dx, )
Beispiel #4
0
 def bprop(x, out, dout):
     if F.issubclass_(F.typeof(dout), mstype.tensor):
         dx = all_reduce_grad(dout)
     else:
         indices = all_gather(dout[0])
         grad = all_gather(dout[1])
         dx = (indices, grad, dout[2])
     return (dx,)
Beispiel #5
0
def concatenate(arrays, axis=0):
    """
    Join a sequence of arrays along an existing axis.

    Args:
        arrays: Union[Tensor, tuple(Tensor), list(Tensor)], a Tensor or a list
        of Tensor to be concatenated.

        axis (int, optional): The axis along which the arrays will be joined,
            if axis is None, arrays are flattened before use. Default is 0.

    Returns:
        Tensor, a Tensor concatenated from a Tensor or a list of Tensors.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> import mindspore.numpy as np
        >>> x1 = np.ones((1,2,3))
        >>> x2 = np.ones((1,2,1))
        >>> x = np.concatenate((x1, x2), axis=-1)
        >>> print(x,shape)
        (1,2,4)
    """
    array_type = F.typeof(arrays)
    if _check_is_tensor(array_type):
        # if the input is a single tensor
        # if only one tensor is provided, it is treated as a tuple along the
        # first dimension. For example, a tensor of shape (3,4,5) will be treated
        # as: tuple(tensor_1(4,5), tensor_2(4,5), tensor_3(4,5))
        if axis is None:
            return ravel(arrays)
        arr_shape = F.shape(arrays)
        _check_axes_range((axis, ), len(arr_shape))
        # move axis 0 to the disiganated position, while keep other axes' relative
        # positions unchanged
        new_axes, new_shape = _move_axes_for_concatenate(arr_shape, axis)
        arrays = transpose(arrays, new_axes)
        arrays = reshape(arrays, new_shape)
        return arrays

    flattened_arrays = ()
    if axis is None:
        for arr in arrays:
            flattened_arrays += (ravel(arr), )
        axis = -1
        return P.Concat(axis)(flattened_arrays)
    arr_shape = F.shape(arrays[0])
    _check_axes_range((axis, ), len(arr_shape))

    # if only one tensor in the tuple/list, return the tensor itself
    if len(arrays) == 1:
        return arrays[0]

    return P.Concat(axis)(arrays)
Beispiel #6
0
    def bprop(x, y, z, out, dout):
        do_mirror = equal(y, grad_accumulation_step)
        do_mirror = reshape(do_mirror, (()))
        if mean_flag:
            if F.issubclass_(F.typeof(dout), mstype.tensor):
                if do_mirror:
                    tmp = z + dout
                    real_grad = all_reduce(tmp)
                    dx = real_grad - z
                else:
                    dx = dout
                float_one = F.scalar_cast(1.0, F.dtype(dx))
                num = F.scalar_cast(dev_num, F.dtype(dx))
                dx = mul(dx, cast(F.scalar_to_array(float_one/num), F.dtype(dx)))
            else:
                if do_mirror:
                    indices = all_gather(dout.indices)
                    grad = all_gather(dout.values)
                else:
                    indices = dout.indices
                    grad = dout.values
                float_one = F.scalar_cast(1.0, F.dtype(grad))
                num = F.scalar_cast(dev_num, F.dtype(grad))
                grad = mul(grad, cast(F.scalar_to_array(float_one/num), F.dtype(grad)))
                dx = RowTensor(indices, grad, dout.dense_shape)
        else:
            if F.issubclass_(F.typeof(dout), mstype.tensor):
                if do_mirror:
                    tmp = z + dout
                    real_grad = all_reduce(tmp)
                    dx = real_grad - z
                else:
                    dx = dout
            else:
                if do_mirror:
                    indices = all_gather(dout.indices)
                    grad = all_gather(dout.values)
                else:
                    indices = dout.indices
                    grad = dout.values
                dx = RowTensor(indices, grad, dout.dense_shape)

        return (dx, zeros_like(y), zeros_like(z))
Beispiel #7
0
    def bprop(x, out, dout):
        if F.issubclass_(F.typeof(dout), mstype.tensor):
            if F.issubclass_(F.dtype(dout), mstype.bool_) or F.issubclass_(F.dtype(dout), mstype.int32) \
                                     or F.issubclass_(F.dtype(dout), mstype.int16):
                return (dout,)
            dx = op(dout, cast(F.scalar_to_array(divisor), dtype(dout)))
            return (dx,)

        if F.issubclass_(F.typeof(dout), mstype.tuple_):
            dx = ()
            input_nums = F.tuple_len(dout)
            for i in range(input_nums):
                ele_grad = op(dout[i], cast(F.scalar_to_array(divisor), dtype(dout[i])))
                dx = dx + (ele_grad,)
            return (dx,)

        dx = []
        input_nums = F.list_len(dout)
        for i in range(input_nums):
            ele_grad = op(dout[i], cast(F.scalar_to_array(divisor), dtype(dout[i])))
            dx.append(ele_grad)
        return (dx,)
Beispiel #8
0
 def bprop(x, out, dout):
     if F.issubclass_(F.typeof(dout), mstype.tensor):
         dx = all_reduce_grad(dout)
         z = equal(x, out)
         z = cast(z, dtype(dx))
         dx = mul(dx, z)
     else:
         indices = all_gather(dout.indices)
         grad = all_gather(dout.values)
         z = equal(x, out)
         z = cast(z, dtype(grad))
         grad = mul(grad, z)
         dx = RowTensor(indices, grad, dout.dense_shape)
     return (dx, )
Beispiel #9
0
 def bprop(x, out, dout):
     if F.issubclass_(F.typeof(dout), mstype.tensor):
         dx = all_reduce_grad(dout)
         z = equal(x, out)
         z = cast(z, dtype(dx))
         dx = mul(dx, z)
     else:
         indices = all_gather(dout[0])
         grad = all_gather(dout[1])
         z = equal(x, out)
         z = cast(z, dtype(grad))
         grad = mul(grad, z)
         dx = (indices, grad, dout[2])
     return (dx,)