Exemple #1
0
    def sum_grad(self):
        """Sum the gradients of all parameters.

        Call this method after each ``backward`` pass:

        ```python
        x = torch.ones(1, requires_grad=True)
        optimizer = torch.optim.SGD([x], lr=0.1)
        for epoch in range(2):
            for step in range(3):
                y = x + 1
                y.backward()
                optimizer.sum_grad()
            optimizer.step()
        print(x)  # 0.4
        ```

        """
        current_ws = workspace.get_workspace()
        for group in self.param_groups:
            grads, sum_grads = [], []
            for param in group['params']:
                grad = self._get_grad(current_ws, param)
                if grad is not None:
                    grads.append(grad)
                    sum_grads.append(grad.id + '_sum')
            Function.apply(
                'Axpby', grads[0].device,
                grads, outputs=sum_grads,
                alpha=1., beta=1. if self._sums_grad else 0.)
        self._sums_grad = True
def all_gather(tensor_list, tensor, group=None):
    """Gather the tensor across all nodes in a group.

    Parameters
    ----------
    tensor_list : Sequence[dragon.vm.torch.Tensor]
        The output tensor list.
    tensor : dragon.vm.torch.Tensor
        The tensor to be sent.
    group : ProcessGroup, optional
        The group for communication.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    group = group or distributed.get_group()
    if group is None:
        return tensor
    output_tensor = Function.apply('Collective',
                                   tensor.device, [tensor],
                                   operation='ALLGATHER',
                                   **group.arguments)
    if len(tensor_list) > 0:
        return Function.apply('Split',
                              output_tensor.device, [output_tensor],
                              outputs=[None] * len(tensor_list),
                              axis=0,
                              size_split=None,
                              copy=True)
    return output_tensor
Exemple #3
0
def transpose(input, dim0, dim1, out=None):
    """Return a new tensor with two dimensions swapped.

    Examples:

    ```python
    x = torch.ones(2, 3, 4)
    print(torch.transpose(x, 0, 2).shape)  # (4, 3, 2)
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    dim0 : int
        The first dimension to be transposed.
    dim1 : int
        The second dimension to be transposed.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    dims = list(range(input.ndimension()))
    dims[dim0], dims[dim1] = dims[dim1], dims[dim0]
    return Function.apply('Transpose',
                          input.device, [input],
                          outputs=[out],
                          ndim=len(dims),
                          perm=dims)
Exemple #4
0
def narrow(input, dimension, start, length):
    """Return a narrowed tensor of input.

    Parameters
    ----------
    input : torch.Tensor
        The input tensor.
    dimension : int
        The dimension to slice.
    start : int
        The starting position.
    length : int
        The distance to the ending position.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    sizes = list(input.shape[:])
    starts = [0] * len(sizes)
    starts[dimension], sizes[dimension] = start, length
    return Function.apply('Slice',
                          input.device, [input],
                          ndim=len(starts),
                          starts=starts,
                          sizes=sizes)
Exemple #5
0
def multinomial(input, num_samples, out=None):
    """Return an index tensor sampled from the multinomial distribution.

    Examples:

    ```python
    input = torch.tensor([0.5, 0.5]).log()
    index = torch.multinomial(input, 1)
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    num_samples : int
        The number of samples in each row.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply('Multinomial',
                          input.device, [input],
                          outputs=[out],
                          sample_size=num_samples)
Exemple #6
0
def masked_fill(input, mask, value, out=None):
    """Fill tensor with the value where mask is true.

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    mask : dragon.vm.torch.Tensor
        The boolean mask.
    value : Union[number, dragon.vm.torch.Tensor]
        The value to fill.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    if not isinstance(value, Tensor):
        value = constant_ops.scalar(value, input.dtype, input.device)
    return Function.apply('Where',
                          input.device, [mask, value, input],
                          outputs=[out])
Exemple #7
0
def flip(input, dims):
    """Reverse elements along the given dimension.

    :attr:`dims` could be negative:

    ```python
    x = torch.tensor([[1, 2, 3], [4, 5, 6]])

    # A negative dimension is the last-k dimension
    print(torch.flip(x, dims=1))  # [[3, 2, 1], [6, 5, 4]]
    print(torch.flip(x, dims=-1))  # Equivalent

    # Also, dimension could be a sequence of integers
    print(torch.flip(x, dims=(0, 1)))  # [[6, 5, 4], [3, 2, 1]]
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    dims : Union[int, Sequence[int]]
        The dimension to reverse.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply(
        'Reverse',
        input.device, [input],
        axes=nest.flatten(dims) if dims is not None else dims)
def broadcast(tensor, src=0, group=None):
    """Broadcast the tensor from source node in a group.

    Parameters
    ----------
    tensor : dragon.vm.torch.Tensor
        The tensor to be sent.
    src : int
        The rank of the source node.
    group : ProcessGroup, optional
        The group for communication.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    group = group or distributed.get_group()
    if group is None:
        return tensor
    return Function.apply('Collective',
                          tensor.device, [tensor],
                          outputs=[tensor],
                          operation='BROADCAST',
                          root=src,
                          **group.arguments)
Exemple #9
0
def where(condition, x, y):
    r"""Select the elements from two branches under the condition.

    .. math::
        \text{out}_{i} =
            \begin{cases}
                \text{x}_{i}, & \text{ if } \text{condition}_{i} \\
                \text{y}_{i}, & \text{ otherwise }
            \end{cases}

    Parameters
    ----------
    condition : dragon.vm.torch.Tensor
        The condition tensor.
    x : dragon.vm.torch.Tensor
        The elements for ``True`` branch.
    y : dragon.vm.torch.Tensor
        The elements for ``False`` branch.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply('Where', condition.device, [condition, x, y])
Exemple #10
0
def zeros(*size, out=None, dtype='float32', device=None, requires_grad=False):
    r"""Return a tensor filled with zeros.

    .. math:: \text{out} \leftarrow 0

    Parameters
    ----------
    size : int...
        The output tensor shape.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.
    dtype : str, optional, default='float32'
        The data type of output tensor.
    device : dragon.vm.torch.device, optional
        The device of output tensor.
    requires_grad : bool, optional, default=False
        Record gradient for output tensor or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    size = nest.flatten(size)
    device = out.device if out else (device or cpp.device())
    out = Function.apply('Fill',
                         device, [],
                         outputs=[out],
                         dtype=dtype,
                         value=0.0,
                         ndim=len(size),
                         dims=size)
    out._requires_grad = requires_grad
    return out
Exemple #11
0
def zeros_like(input, dtype='float32', device=None, requires_grad=False):
    r"""Return a tensor of zeros with shape as the other.

    .. math:: \text{out} \leftarrow 0

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The tensor for indicating shape.
    dtype : str, optional, default='float32'
        The data type of output tensor.
    device : dragon.vm.torch.device, optional
        The device of output tensor.
    requires_grad : bool, optional, default=False
        Record gradient for output tensor or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    device = device or input.device
    out = Function.apply('Fill', device, [input], dtype=dtype, value=0.0)
    out._requires_grad = requires_grad
    return out
Exemple #12
0
def clamp(input, min=None, max=None, out=None):
    r"""Compute the clipped input according to the given bounds.

    .. math:: \text{out} = \min(\max(\text{input}, low), high)

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    min : number, optional
        The min value.
    max : number, optional
        The max value.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    low = float(min) if min is not None else None
    high = float(max) if max is not None else None
    return Function.apply('Clip',
                          input.device, [input],
                          outputs=[out],
                          low=low,
                          high=high)
Exemple #13
0
def argmin(input, dim, keepdim=False, out=None):
    """Return the index of minimum elements along the given dimension.

    :attr:`dim` could be negative:

    ```python
    # A negative dimension is the last-k dimension
    x = torch.tensor([[1, 2, 3], [4, 5, 6]])
    print(torch.argmin(x, dim=1))
    print(torch.argmin(x, dim=-1))  # Equivalent
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    dim : int, optional
        The dimension to reduce.
    keepdim : bool, optional, default=False
        Keep the reduced dimension or not.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The index of minimum elements.

    """
    return Function.apply('ArgMin',
                          input.device, [input],
                          outputs=[out],
                          axis=dim,
                          keepdims=keepdim)
Exemple #14
0
def normal(mean, std, size, out=None):
    r"""Return a tensor initialized from the normal distribution.

    .. math:: \text{out} \sim \mathcal{N}(\mu, \sigma^{2})

    Parameters
    ----------
    mean : number
        The value to :math:`\mu`.
    std : number
        The value to :math:`\sigma`.
    size : Sequence[int]
        The output tensor shape.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    dtype = out.dtype if out else 'float32'
    device = out.device if out else cpp.device()
    return Function.apply(
        'RandomNormal', device, [], outputs=[out],
        dtype=dtype, mean=float(mean), std=float(std),
        ndim=len(size), dims=size)
Exemple #15
0
def cumsum(input, dim, out=None):
    """Compute the cumulative sum of elements along the given dimension.

    :attr:`dim` could be negative:

    ```python
    # A negative dimension is the last-k dimension
    x = torch.tensor([[1, 2, 3], [4, 5, 6]])
    print(torch.cumsum(x, dim=1))  # [[1, 3, 6], [4, 9, 15]]
    print(torch.cumsum(x, dim=-1))  # Equivalent
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    dim : int
        The cumulative dimension.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply('CumSum',
                          input.device, [input],
                          outputs=[out],
                          axis=dim)
Exemple #16
0
def randperm(n, out=None, dtype='int64', device=None, requires_grad=False):
    """Return a tensor with value in the permuted range.

    Specify ``n`` to determine an interval :math:`[0, n)`:

    ```python
    print(torch.randperm(4))
    ```

    Parameters
    ----------
    n: number
        The end of interval.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.
    dtype : str, optional, default='int64'
        The data type of output tensor.
    device : dragon.vm.torch.device, optional
        The device of output tensor.
    requires_grad : bool, optional, default=False
        Record gradient for output tensor or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    device = out.device if out else (device or cpp.device())
    out = Function.apply(
        'Permutation', device, [], outputs=[out],
        dtype=dtype, limit=n)
    out._requires_grad = requires_grad
    return out
Exemple #17
0
def uniform(low, high, size, out=None):
    r"""Return a tensor initialized from the uniform distribution.

    .. math:: \text{out} \sim \mathcal{U}(\alpha, \beta)

    Parameters
    ----------
    low : number
        The value to :math:`\alpha`.
    high : number
        The value to :math:`\beta`.
    size : Sequence[int]
        The output tensor shape.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    dtype = out.dtype if out else 'float32'
    device = out.device if out else cpp.device()
    return Function.apply(
        'RandomUniform', device, [], outputs=[out],
        dtype=dtype, low=float(low), high=float(high),
        ndim=len(size), dims=size)
def all_reduce(tensor, op='sum', group=None):
    """Reduce the tensor across all nodes in a group.

    Parameters
    ----------
    tensor : dragon.vm.torch.Tensor
        The tensor to reduce.
    op : str, optional
        The reduction op.
    group : ProcessGroup, optional
        The group for communication.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    group = group or distributed.get_group()
    if group is None:
        return tensor
    op = op.upper()
    if op not in ('MEAN', 'SUM'):
        raise ValueError('Unsupported reduction: ' + op)
    return Function.apply('Collective',
                          tensor.device, [tensor],
                          outputs=[tensor],
                          operation='ALLREDUCE',
                          reduction=op,
                          **group.arguments)
Exemple #19
0
def flatten(input, start_dim=0, end_dim=-1, out=None):
    """Return a tensor with dimensions flattened.

    :attr:`start_dim` and :attr:`end_dim` could be negative:

    ```python
    # A negative dimension is the last-k dimension
    x = torch.tensor([[1, 2, 3], [4, 5, 6]])
    print(torch.flatten(x, start_dim=0, end_dim=1))
    print(torch.flatten(x, start_dim=0, end_dim=-1))  # Equivalent
    ```

    Parameters
    ----------
    input : torch.Tensor
        The input tensor.
    start_dim : int, optional, default=0
        The start dimension to flatten.
    end_dim : int, optional, default=-1
        The end dimension to flatten.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply('Flatten',
                          input.device, [input],
                          outputs=[out],
                          axis=start_dim,
                          end_axis=end_dim)
Exemple #20
0
def addmm(input, mat1, mat2, beta=1, alpha=1, out=None):
    r"""Add input to the result of matrix-matrix multiplication.

    .. math:: \text{out} = \alpha (\text{mat1} \times \text{mat2}) + \beta \text{input}

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    mat1 : dragon.vm.torch.Tensor
        The first matrix.
    mat2 : dragon.vm.torch.Tensor
        The second matrix.
    beta : float, optional, default=1
        The value to :math:`\beta`.
    alpha : float, optional, default=1
        The value to :math:`\alpha`.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply('Gemm',
                          input.device, [mat1, mat2, input],
                          outputs=[out],
                          alpha=float(alpha),
                          beta=float(beta))
Exemple #21
0
def randn(*size, out=None, dtype='float32', device=None, requires_grad=False):
    """Return a tensor from the normal distribution of N(0, 1).

    Parameters
    ----------
    size : int...
        The output tensor shape.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.
    dtype : str, optional, default='float32'
        The data type of output tensor.
    device : dragon.vm.torch.device, optional
        The device of output tensor.
    requires_grad : bool, optional, default=False
        Record gradient for output tensor or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    size = nest.flatten(size)
    device = out.device if out else (device or cpp.device())
    out = Function.apply(
        'RandomNormal', device, [], outputs=[out],
        dtype=dtype, mean=0.0, std=1.0, ndim=len(size), dims=size)
    out._requires_grad = requires_grad
    return out
Exemple #22
0
def split(tensor, split_size_or_sections, dim=0, copy=True):
    """Split input into chunks along the given dimension.

    Either size of every chunk or each chunk will be accepted:

    ```python
    x = torch.tensor([1, 2, 3, 4, 5, 6])
    # Shape: (6,) -> (4,), (2,)
    print(torch.split(x, split_size_or_sections=4))
    # Shape: (6,) -> (5,), (1,)
    print(torch.split(x, split_size_or_sections=(5, 1)))
    ```

    :attr:`dim` can be negative:

    ```python
    x = torch.tensor([[1, 2, 3], [4, 5, 6]])
    print(torch.split(x, 2, dim=1))
    print(torch.split(x, 2, dim=-1))  # Equivalent
    ```

    Parameters
    ----------
    tensor : dragon.vm.torch.Tensor
        The input tensor.
    split_size_or_sections : Union[int, Sequence[int]
        The number or size of chunks.
    dim : int, optional, default=0
        The dimension to split.
    copy : bool, optional, default=True
        Copy or create the views of input.

    Returns
    -------
    Sequence[dragon.vm.torch.Tensor]
        The output tensors.

    """
    if nest.is_sequence(split_size_or_sections):
        size_splits = split_size_or_sections
        num_splits = len(split_size_or_sections)
    else:
        size = tensor.shape[dim]
        if size % split_size_or_sections == 0:
            num_splits = size // split_size_or_sections
            size_splits = [split_size_or_sections] * num_splits
        else:
            num_splits = size // split_size_or_sections + 1
            size_splits = [split_size_or_sections] * num_splits
            size_splits[-1] = size - (split_size_or_sections *
                                      (num_splits - 1))
    return Function.apply('Split',
                          tensor.device, [tensor],
                          outputs=[None] * num_splits,
                          axis=dim,
                          num_splits=num_splits,
                          split=size_splits,
                          copy=copy)
Exemple #23
0
def norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None):
    """Compute the norm value of elements along the given dimension.

    :attr:`dim` could be negative or ``None``:

    ```python
    x = torch.tensor([[1., 2., 3.], [4., 5., 6.]])

    # A negative dimension is the last-k axis
    print(torch.norm(x, dim=1))
    print(torch.norm(x, dim=-1))  # Equivalent

    # If ``dim`` is None, the vector-style reduction
    # will be applied to return a scalar result
    print(torch.norm(x))  # 9.539

    # Also, ``dim`` could be a sequence of integers
    print(torch.norm(x, dim=(0, 1)))  # 9.539
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    p : {'fro', 1, 2}, optional
        The norm order.
    dim : Union[int, Sequence[int]], optional
        The dimension to reduce.
    keepdim : bool, optional, default=False
        Keep the reduced dimension or not.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.
    dtype : str, optional
        The data type to cast to.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    if p is None or p == 2 or p == 'fro':
        op_type = 'ReduceL2'
    elif p == 1:
        op_type = 'ReduceL1'
    else:
        raise ValueError('Unsupported norm order: ' + str(p))
    input = input.to(dtype=dtype)
    keepdim = keepdim if dim is not None else False
    dim = nest.flatten(dim) if dim is not None else dim
    return Function.apply(op_type,
                          input.device, [input],
                          outputs=[out],
                          axes=dim,
                          keepdims=keepdim)
Exemple #24
0
def unique(input, return_inverse=False, return_counts=False, **kwargs):
    """Return the unique elements of input.

    If ``return_inverse``, return the extra index where input mapping to:

    ```python
    x = torch.tensor([1, 2, 3, 2])
    y, index = torch.unique(x, return_inverse=True)
    print(y)  # [1, 2, 3]
    print(index)  # [0, 1, 2, 1]
    ```

    If ``return_counts``, return the extra counts of output:

    ```python
    x = torch.tensor([1, 2, 3, 2])
    y, counts = torch.unique(x, return_counts=True)
    print(y)  # [1, 2, 3]
    print(counts)  # [1, 2, 1]
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    return_inverse : bool, optional, default=False
        Return the inverse index or not.
    return_counts : bool, optional, default=False
        Return the counts or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.
    dragon.vm.torch.Tensor, optional
        The inverse index tensor.
    dragon.vm.torch.Tensor, optional
        The counts tensor.

    """
    if 'sorted' in kwargs:
        kwargs.pop('sorted')
    num_outputs = 1
    if return_inverse:
        num_outputs += 1
    if return_counts:
        num_outputs += 1
    return Function.apply('Unique',
                          input.device, [input],
                          outputs=[None] * num_outputs,
                          return_inverse=return_inverse,
                          return_counts=return_counts)
Exemple #25
0
def matmul(input, other, out=None):
    r"""Compute the matrix multiplication.

    .. math:: \text{out} = \text{input} \times \text{other}

    The behavior depends on the shape of input tensors:

    * If both tensors are 1d, computes the vector product.
    * If tensors are 1d and >=2d, computes the vector-matrix multiplication.
    * If tensors are >=2d and 1d, computes the matrix-vector multiplication.
    * If both tensors are >= 2d, computes the matrix-matrix multiplication.
    * If one tensor is >= 3d, applies batching and broadcasting to the computation.

    Examples:

    ```python
    # Vector x Vector
    a = torch.ones(2)
    b = torch.ones(2)
    print(torch.matmul(a, b))
    # Vector x Matrix
    a = torch.ones(2)
    b = torch.ones(2, 3)
    print(torch.matmul(a, b))
    # Matrix x Vector
    a = torch.ones(3, 2)
    b = torch.ones(2)
    print(torch.matmul(a, b))
    # Matrix x Matrix
    a = torch.ones(2, 3)
    b = torch.ones(3, 2)
    print(torch.matmul(a, b))
    ```

    Parameters
    ----------
    input : dragon.vm.torch.Tensor
        The input tensor.
    other : dragon.vm.torch.Tensor
        The tensor to multiply.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    return Function.apply('MatMul',
                          input.device, [input, other],
                          outputs=[out])
Exemple #26
0
def eye(
    n,
    m=None,
    out=None,
    dtype='float32',
    device=None,
    requires_grad=False,
):
    r"""Return a tensor constructed as the identity matrix.

    .. math:: \text{out} \leftarrow \text{diag}(1, 1, ..., 1)

    The rows and cols of matrix are determined by ``n`` and ``m``:

    ```python
    print(torch.eye(2))  # [[1., 0.], [0., 1.]]
    print(torch.eye(2, 3))  # [[1., 0., 0.], [0., 1., 0.]]
    ```

    Parameters
    ----------
    n : int
        The number output rows.
    m : int, optional
        The number output cols.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.
    dtype : str, optional, default='float32'
        The data type of output tensor.
    device : dragon.vm.torch.device, optional
        The device of output tensor.
    requires_grad : bool, optional, default=False
        Record gradient for output tensor or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    m = n if m is None else m
    device = out.device if out else (device or cpp.device())
    out = Function.apply('Eye',
                         device, [],
                         outputs=[out],
                         dtype=dtype,
                         ndim=2,
                         dims=(n, m))
    out._requires_grad = requires_grad
    return out
Exemple #27
0
    def _update_group(self, group):
        """Update parameters for the group."""
        execute_ws = workspace.get_workspace()

        # Collect params and grads.
        params_with_grad, grads = [], []
        for p in group['params']:
            g = self._get_grad(execute_ws, p, self._sums_grad)
            if g is not None:
                params_with_grad.append(p)
                grads.append(g)

        # Skip if grads are all missing.
        if len(params_with_grad) == 0:
            return

        # Update hyper from group values.
        for name in self._hyper.keys():
            group_name = group['name']
            impl_name, group_dict = self._hyper[name]
            if group_name not in group_dict:
                impl_name = group_name + '/' + impl_name
                group_dict[group_name] = execute_ws.create_tensor(impl_name)
            impl = group_dict[group_name]
            impl.FromNumpy(numpy.array(group[name], 'float32'), False)

        # Reduce grads in the process group.
        process_group = distributed.get_group()
        if process_group is not None:
            Function.apply('Collective', grads[0].device, grads,
                           outputs=grads, operation='ALLREDUCE',
                           reduction='MEAN', **process_group.arguments)

        # Apply updates.
        Function.apply(self._op_type, params_with_grad[0].device, grads,
                       outputs=params_with_grad, name=group['name'],
                       weight_decay=None)
Exemple #28
0
def full(
    size,
    fill_value,
    out=None,
    dtype='int64',
    device=None,
    requires_grad=False,
):
    """Return a tensor filled with a scalar.

    Examples:

    ```python
    print(torch.full((1, 2), 1))  # [[1, 1]]
    ```

    Parameters
    ----------
    size : int...
        The output shape.
    fill_value : number
        The scalar to fill.
    out : dragon.vm.torch.Tensor, optional
        The output tensor.
    dtype : str, optional, default='int64'
        The data type of output tensor.
    device : dragon.vm.torch.device, optional
        The device of output tensor.
    requires_grad : bool, optional, default=False
        Record gradient for output tensor or not.

    Returns
    -------
    dragon.vm.torch.Tensor
        The output tensor.

    """
    size = nest.flatten(size)
    device = out.device if out else (device or cpp.device())
    out = Function.apply('Fill',
                         device, [],
                         outputs=[out],
                         dtype=dtype,
                         value=float(fill_value),
                         ndim=len(size),
                         dims=size)
    out._requires_grad = requires_grad
    return out
Exemple #29
0
 def _set_parameter(self,
                    data,
                    layer_id=0,
                    param_id=0,
                    param_type='matrix'):
     """Set the data of a parameter."""
     return Function.apply('RNNParamSet',
                           data.device, [data],
                           outputs=[self.weights],
                           rnn_mode=self.mode,
                           bidirectional=self.bidirectional,
                           input_size=self.input_size,
                           hidden_size=self.hidden_size,
                           layer_id=layer_id,
                           param_id=param_id,
                           param_type=param_type)
Exemple #30
0
 def forward(self, input, hx=None):
     inputs = [input, self.weights]
     if hx is not None:
         inputs += nest.flatten(hx)
     outputs = [None] * (3 if self.mode == 'lstm' else 2)
     outputs = Function.apply('Recurrent',
                              input.device,
                              inputs,
                              outputs=outputs,
                              rnn_mode=self.mode,
                              bidirectional=self.bidirectional,
                              input_size=self.input_size,
                              hidden_size=self.hidden_size,
                              dropout=self.dropout,
                              phase='TRAIN' if self.training else 'TEST')
     output, hidden = outputs[0], outputs[1:]
     return output, hidden[0] if len(hidden) == 1 else hidden