def _test_min(
    test_case, placement, sbp, np_out, np_out_grad, input_arr, shape, dim, keepdims
):
    # of result
    global_x = flow.tensor(
        input_arr,
        dtype=flow.float32,
        requires_grad=True,
        placement=flow.env.all_device_placement("cpu"),
        sbp=flow.sbp.broadcast,
    )
    if dim is None:
        of_out = flow.min(global_x)
    else:
        of_out = flow.min(global_x, dim, keepdims)[0]
    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
    of_out = of_out.sum()
    of_out.backward()

    test_case.assertTrue(
        np.allclose(global_x.grad.numpy(), np_out_grad, 0.0001, 0.0001)
    )
Beispiel #2
0
def _test_min(test_case, device, shape, dim, keepdims):
    input_arr = np.random.randn(*shape)
    np_out = np.amin(input_arr, axis=dim, keepdims=keepdims)
    x = flow.tensor(input_arr,
                    dtype=flow.float32,
                    device=flow.device(device),
                    requires_grad=True)
    of_out = flow.min(x, dim, keepdims)
    if dim != None:
        of_out = of_out[0]

    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
    of_out = of_out.sum()
    of_out.backward()
    np_out_grad = np.zeros_like(input_arr)
    if dim == None:
        arg_min = np.argmin(input_arr)
        np.put(np_out_grad, arg_min, 1)
    else:
        arg_min = np.expand_dims(np.argmin(input_arr, axis=dim), axis=dim)
        np.put_along_axis(np_out_grad, arg_min, 1, axis=dim)
    test_case.assertTrue(
        np.allclose(x.grad.numpy(), np_out_grad, 0.0001, 0.0001))
Beispiel #3
0
def _min(self, *args, **kwargs):
    return flow.min(self, *args, **kwargs)
Beispiel #4
0
def _min(self, dim=None, keepdim=False):
    return flow.min(self, dim, keepdim)
Beispiel #5
0
def clip_grad_norm_(
    parameters: _tensor_or_tensors,
    max_norm: float,
    norm_type: float = 2.0,
    error_if_nonfinite: bool = False,
) -> Tensor:
    r"""Clips gradient norm of an iterable of parameters.
    The norm is computed over all gradients together, as if they were
    concatenated into a single vector.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized
        max_norm (float or int): max norm of the gradients
        norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        error_if_nonfinite (bool): if True, an error is thrown if the total
            norm of the gradients from :attr:``parameters`` is ``nan``,
            ``inf``, or ``-inf``. Default: False (will switch to True in the future)

    Returns:
        Parameters after cliping gradient norm
        Total norm of the parameters (viewed as a single vector).
    

    For example:

    .. code-block:: python

        >>> import oneflow as flow
        >>> import numpy as np
        >>> x1 = flow.tensor(np.array([[2, 3, 4], [1.5, 2.6, 3.7]]).astype(np.float32), requires_grad=True)
        >>> m1 = flow.nn.ReLU()
        >>> out1 = m1(x1)
        >>> out1 = out1.sum()
        >>> out1.backward()
        >>> norm1 = flow.nn.utils.clip_grad_norm_(x1, 0.6, 1.0)
        >>> norm1
        tensor(6., dtype=oneflow.float32)
        >>> x1.grad
        tensor([[0.1000, 0.1000, 0.1000],
                [0.1000, 0.1000, 0.1000]], dtype=oneflow.float32)
        >>> x2 = flow.tensor(np.array([[-2, -3, -4], [2.5, 0, 3.2]]).astype(np.float32), requires_grad=True)
        >>> out2 = flow.atan(x2)
        >>> out2 = out2.sum()
        >>> out2.backward()
        >>> norm2 = flow.nn.utils.clip_grad_norm_(x2, 0.5)
        >>> norm2
        tensor(1.0394, dtype=oneflow.float32)
        >>> x2.grad
        tensor([[0.0962, 0.0481, 0.0283],
                [0.0663, 0.4810, 0.0428]], dtype=oneflow.float32)

    """

    if isinstance(parameters, (Tensor, flow._oneflow_internal.Tensor)):
        parameters = [parameters]
    parameters = [p for p in parameters if p.grad is not None]
    max_norm = float(max_norm)
    norm_type = float(norm_type)
    if len(parameters) == 0:
        return flow.tensor(0.0)

    if parameters[0].is_global:
        assert all([p.is_global for p in parameters
                    ]), "All parameters must be consistent tensor."
        sbp_broadcast = [flow.sbp.broadcast for _ in parameters[0].sbp]
        param0_placement = parameters[0].placement
        if norm_type == float("inf"):
            norms = [
                p.grad.detach().to_global(
                    sbp=sbp_broadcast).abs().max().to_global(
                        placement=param0_placement) for p in parameters
            ]
            total_norm = norms[0] if len(norms) == 1 else flow.max(
                flow.stack(norms))
        elif norm_type == float("-inf"):
            norms = [
                p.grad.detach().to_global(
                    sbp=sbp_broadcast).abs().min().to_global(
                        placement=param0_placement) for p in parameters
            ]
            total_norm = norms[0] if len(norms) == 1 else flow.min(
                flow.stack(norms))
        else:
            total_norm = flow.linalg.vector_norm(
                flow.stack([
                    flow.linalg.vector_norm(
                        p.grad.detach().to_global(sbp=sbp_broadcast),
                        norm_type).to_global(placement=param0_placement)
                    for p in parameters
                ]),
                norm_type,
            )
        if error_if_nonfinite and flow.logical_or(total_norm.isnan(),
                                                  total_norm.isinf()):
            raise RuntimeError(
                f"The total norm of order {norm_type} for gradients from "
                "`parameters` is non-finite, so it cannot be clipped. To disable "
                "this error and scale the gradients by the non-finite norm anyway, "
                "set `error_if_nonfinite=False`")
        clip_coef = max_norm / (total_norm + 1e-6)
        clip_coef_clamped = clip_coef.clamp(max=1.0)
        for p in parameters:
            p.grad.detach().mul_(
                clip_coef_clamped.to_global(placement=p.placement))
    else:
        device = parameters[0].grad.device
        if norm_type == float("inf"):
            norms = [
                p.grad.detach().abs().max().to(device) for p in parameters
            ]
            total_norm = norms[0] if len(norms) == 1 else flow.max(
                flow.stack(norms))
        elif norm_type == float("-inf"):
            norms = [
                p.grad.detach().abs().min().to(device) for p in parameters
            ]
            total_norm = norms[0] if len(norms) == 1 else flow.min(
                flow.stack(norms))
        else:
            total_norm = flow.linalg.vector_norm(
                flow.stack([
                    flow.linalg.vector_norm(p.grad.detach(),
                                            norm_type).to(device)
                    for p in parameters
                ]),
                norm_type,
            )
        if error_if_nonfinite and flow.logical_or(total_norm.isnan(),
                                                  total_norm.isinf()):
            raise RuntimeError(
                f"The total norm of order {norm_type} for gradients from "
                "`parameters` is non-finite, so it cannot be clipped. To disable "
                "this error and scale the gradients by the non-finite norm anyway, "
                "set `error_if_nonfinite=False`")
        clip_coef = max_norm / (total_norm + 1e-6)
        clip_coef_clamped = clip_coef.clamp(max=1.0)
        for p in parameters:
            p.grad.detach().mul_(clip_coef_clamped.to(p.grad.device))
    return total_norm