def first_order_grad(*inputs):
        xs = inputs[:n_x]
        gys = inputs[n_x:]

        y = _as_tuple(func(*xs))
        # Let all elements of y share the same creator.
        # See the comment in check_backward.
        y = identity.Identity().apply(y)

        _set_y_grad(y, gys)
        y[0].backward(enable_double_backprop=True)

        return tuple([x.grad_var for x in xs] + [p.grad_var for p in params])
Exemple #2
0
    def first_order_grad(*inputs):
        xs = inputs[:n_x]
        gys = inputs[n_x:]

        y = _as_tuple(func(*xs))
        # Let all elements of y share the same creator.
        # See the comment in check_backward.
        y = identity.Identity().apply(y)

        _set_y_grad(y, gys)
        y[0].backward()

        ret = tuple([x.grad_var for x in xs])
        for x in xs:
            x.grad_var = None
        return ret
def check_backward(func,
                   x_data,
                   y_grad,
                   params=(),
                   eps=1e-3,
                   atol=1e-5,
                   rtol=1e-4,
                   no_grads=None,
                   dtype=None):
    """Test backward procedure of a given function.

    This function automatically checks backward-process of a given function.
    For example, when you have a :class:`~chainer.Function` class ``MyFunc``,
    that gets two arguments and returns one value, you can make its test like
    this::

    >> def test_my_func(self):
    >>   func = MyFunc()
    >>   x1_data = xp.array(...)
    >>   x2_data = xp.array(...)
    >>   gy_data = xp.array(...)
    >>   check_backward(func, (x1_data, x2_data), gy_data)

    This method creates :class:`~chainer.Variable` objects with ``x_data``
    and calls ``func`` with the :class:`~chainer.Variable` s to get its result
    as :class:`~chainer.Variable`.
    Then, it sets ``y_grad`` array to ``grad`` attribute of the result and
    calls ``backward`` method to get gradients of the inputs.
    To check correctness of the gradients, the function calls
    :func:`numerical_grad` to calculate numerically the gradients and compares
    the types of gradients with :func:`chainer.testing.assert_allclose`.

    To reduce computational time, it uses directional derivative along a
    random vector. A function
    :math:`g: \\mathbb{R} \\rightarrow \\mathbb{R}^n` is defined as
    :math:`g(\\delta) = f(x + \\delta r)`, where
    :math:`\\delta \\in \\mathbb{R}`, :math:`r \\in \\mathbb{R}^n`
    is a random vector
    and :math:`f` is a function which you want to test.
    Its gradient is

    .. math::
       g'(\\delta) = f'(x + \\delta r) \\cdot r.

    Therefore, :math:`g'(0) = f'(x) \\cdot r`.
    So we can check the correctness of back propagation of :math:`f` indirectly
    by comparing this equation with the gradient of :math:`g` numerically
    calculated and that of :math:`f` computed by backprop.
    If :math:`r` is chosen from uniform distribution, we can conclude with
    high probability that the gradient of :math:`f` itself is correct.

    If input objects (``x1_data`` or/and ``x2_data`` in this example) represent
    integer variables, their gradients are ignored.

    You can simplify a test when ``MyFunc`` gets only one argument::

    >>   check_backward(func, x1_data, gy_data)

    If ``MyFunc`` is a loss function which returns a zero-dimensional
    array, pass ``None`` to ``gy_data``. In this case, it sets ``1`` to
    ``grad`` attribute of the result::

    >>   check_backward(my_loss_func, (x1_data, x2_data), None)

    If ``MyFunc`` returns multiple outputs, pass all gradients for outputs
    as a tuple::

    >>   gy1_data = xp.array(...)
    >>   gy2_data = xp.array(...)
    >>   check_backward(func, x1_data, (gy1_data, gy2_data))

    You can also test a :class:`~chainer.Link`.
    To check gradients of parameters of the link, set a tuple of the parameters
    to ``params`` arguments::

    >>   check_backward(my_link, (x1_data, x2_data), gy_data,
    >>                  (my_link.W, my_link.b))

    Note that ``params`` are not ``ndarray`` s,
    but :class:`~chainer.Variables` s.

    Function objects are acceptable as ``func`` argument::

    >>   check_backward(lambda x1, x2: f(x1, x2),
    >>                  (x1_data, x2_data), gy_data)

    .. note::

       ``func`` is called many times to get numerical gradients for all inputs.
       This function doesn't work correctly when ``func`` behaves randomly as
       it gets different gradients.


    Args:
        func (callable): A function which gets :class:`~chainer.Variable` s
            and returns :class:`~chainer.Variable` s. ``func`` must returns
            a tuple of :class:`~chainer.Variable` s or one
            :class:`~chainer.Variable`. You can use :class:`~chainer.Function`
            object, :class:`~chainer.Link` object or a function satisfying the
            condition.
        x_data (ndarray or tuple of ndarrays): A set of ``ndarray`` s to be
            passed to ``func``. If ``x_data`` is one ``ndarray`` object, it is
            treated as ``(x_data,)``.
        y_grad (ndarray or tuple of ndarrays or None):
            A set of ``ndarray`` s representing gradients of return-values of
            ``func``. If ``y_grad`` is one ``ndarray`` object, it is
            treated as ``(y_grad,)``. If ``func`` is a loss-function,
            ``y_grad`` should be set to ``None``.
        params (~chainer.Variable or tuple of ~chainder.Variable):
            A set of :class:`~chainer.Variable` s whose gradients are checked.
            When ``func`` is a :class:`~chainer.Link` object,
            set its parameters as ``params``.
            If ``params`` is one :class:`~chainer.Variable` object,
            it is treated as ``(params,)``.
        eps (float): Epsilon value to be passed to :func:`numerical_grad`.
        atol (float): Absolute tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        rtol (float): Relative tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        no_grads (list of bool): Flag to skip variable for gradient assertion.
            It should be same length as ``x_data``.
        dtype (~numpy.dtype): ``x_data``, ``y_grad`` and ``params`` are casted
            to this dtype when calculating numerical gradients. Only float
            types and ``None`` are allowed.

    .. seealso::
       :func:`numerical_grad`
    """
    if dtype is not None and numpy.dtype(dtype).kind != 'f':
        raise ValueError('`dtype` is allowed only float type')

    x_data = _as_tuple(x_data)
    if y_grad is not None:
        y_grad = _as_tuple(y_grad)
    params = _as_tuple(params)

    xs = [variable.Variable(x) for x in x_data]
    y = func(*xs)
    y = _as_tuple(y)

    # All creators of `y` need to be the same because we only call
    # `y[0].backward` to call `backward` method of the creator.
    # To do so we need to insert a dummy function `Ident` to the
    # computational graph.
    # Note that `func` may not be a `Function` object.
    y = identity.Identity().apply(y)

    y_grad = _set_y_grad(y, y_grad)

    # Clear gradients which may exist if func calls backward inside of itself.
    _clear_grads(xs)
    _clear_grads(params)

    # We only need to call `backward` for one result `Variable`.
    # `Variable.backward` method calls `Function.backward` of its creator.
    y[0].backward()

    if no_grads is None:
        no_grads = [x.dtype.kind != 'f' for x in xs]
    else:
        if len(no_grads) != len(xs):
            raise ValueError(
                'Length of no_grads param and xs should be same.\n'
                'Actual: {0} != {1}'.format(len(no_grads), len(xs)))

    for skip, x in six.moves.zip(no_grads, xs):
        if skip:
            if x.grad is not None:
                raise RuntimeError('gradient of int variable must be None')
        else:
            if x.grad is None:
                raise RuntimeError(
                    'gradients of some arguments are not calculated')

    if len(xs) - no_grads.count(True) + len(params) == 0:
        # When there is no float variables, we need not to check gradient
        # values
        return

    variables = _filter_list(xs, no_grads) + list(params)
    # Keep the gradient arrays of params which may be overwritten by func
    grads = [x.grad for x in variables]

    if dtype is None:
        casted_data = [x.data for x in variables]
    else:
        if numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')
        casted_data = [x.data.astype(dtype, copy=False) for x in variables]

        # Even skipped variable must have the same dtype.
        for x, skip in six.moves.zip(xs, no_grads):
            if skip and x.data.dtype.kind == 'f':
                x.data = x.data.astype(dtype, copy=False)

    xp = cuda.get_array_module(*xs)
    directions = [xp.random.normal(size=x.shape) for x in variables]
    # Use unit vector
    norm = math.sqrt(sum([xp.square(d).sum() for d in directions]))
    if norm != 0:
        # norm could be zero if input arrays are 0-sized.
        scale = 1. / norm
        directions = [d * scale for d in directions]

    delta = xp.array(0., 'd')

    def g():
        # This functions is called twice in `numerical_grad`.
        # `delta` is `epsilon` or `-epsilon` in these calls.
        # See the document of `numerical_grad`.
        for x, data, direction in six.moves.zip(variables, casted_data,
                                                directions):
            # astype is require to store data with the given type
            data = (data.astype('d') + delta * direction).astype(data.dtype)
            if numpy.isscalar(data):
                data = xp.array(data)
            x.data = data

        # Clear gradients to support func that calls backward inside of itself.
        _clear_grads(xs)
        _clear_grads(params)

        ys = func(*xs)
        ys = _as_tuple(ys)
        ys_data = tuple(y.data for y in ys)
        for x, data in six.moves.zip(variables, casted_data):
            x.data = data
        return ys_data

    gx, = numerical_grad(g, (delta, ), y_grad, eps=eps)
    gx_accum = 0
    for g, direction in six.moves.zip(grads, directions):
        gx_accum += (g.astype('d') * direction).sum()

    try:
        testing.assert_allclose(gx, gx_accum, atol=atol, rtol=rtol)
    except AssertionError as e:
        f = six.StringIO()
        f.write('check_backward failed (eps={} atol={} rtol={})\n'.format(
            eps, atol, rtol))
        for i, x_ in enumerate(xs):
            f.write('inputs[{}]:\n'.format(i))
            f.write('{}\n'.format(x_))
        for i, gy_ in enumerate(y_grad):
            f.write('grad_outputs[{}]:\n'.format(i))
            f.write('{}\n'.format(gy_))
        f.write('gradients (numeric):  {}\n'.format(gx))
        f.write('gradients (backward): {}\n'.format(gx_accum))
        f.write('\n')
        f.write(str(e))
        raise AssertionError(f.getvalue())
def check_backward(func, x_data, y_grad, params=(),
                   eps=1e-3, atol=1e-5, rtol=1e-4, no_grads=None, dtype=None):
    """Test backward procedure of a given function.

    This function automatically checks backward-process of a given function.
    For example, when you have a :class:`~chainer.Function` class ``MyFunc``,
    that gets two arguments and returns one value, you can make its test like
    this::

    >> def test_my_func(self):
    >>   func = MyFunc()
    >>   x1_data = xp.array(...)
    >>   x2_data = xp.array(...)
    >>   gy_data = xp.array(...)
    >>   check_backward(func, (x1_data, x2_data), gy_data)

    This method creates :class:`~chainer.Variable` objects with ``x_data``
    and calls ``func`` with the :class:`~chainer.Variable` s to get its result
    as :class:`~chainer.Variable`.
    Then, it sets ``y_grad`` array to ``grad`` attribute of the result and
    calls ``backward`` method to get gradients of the inputs.
    To check correctness of the gradients, the function calls
    :func:`numerical_grad` to calculate numerically the gradients and compares
    the types of gradients with :func:`chainer.testing.assert_allclose`.

    To reduce computational time, it uses a function
    :math:`g: \\mathbb{R} \\rightarrow \\mathbb{R}^n` defined as
    :math:`g(\\alpha) = f(\\alpha x)`, where :math:`\\alpha \\in \\mathbb{R}`
    and :math:`f` is a function which actually
    you want to test.
    Its gradient is

    .. math::
       g'(\\alpha) = f'(\\alpha x) \\cdot x.

    When :math:`\\alpha = 1`, :math:`g'(1) = f'(x) \\cdot x`.
    So :math:`g'(1)` is calculated with :func:`numerical_grad` and
    compared with dot product of the gradient :math:`f` and
    :math:`x`.

    If input objects (``x1_data`` or/and ``x2_data`` in this example) represent
    integer variables, their gradients are ignored.

    You can simplify a test when ``MyFunc`` gets only one argument::

    >>   check_backward(func, x1_data, gy_data)

    If ``MyFunc`` is a loss function which returns a zero-dimensional
    array, pass ``None`` to ``gy_data``. In this case, it sets ``1`` to
    ``grad`` attribute of the result::

    >>   check_backward(my_loss_func, (x1_data, x2_data), None)

    If ``MyFunc`` returns multiple outputs, pass all gradients for outputs
    as a tuple::

    >>   gy1_data = xp.array(...)
    >>   gy2_data = xp.array(...)
    >>   check_backward(func, x1_data, (gy1_data, gy2_data))

    You can also test a :class:`~chainer.Link`.
    To check gradients of parameters of the link, set a tuple of the parameters
    to ``params`` arguments::

    >>   check_backward(my_link, (x1_data, x2_data), gy_data,
    >>                  (my_link.W, my_link.b))

    Note that ``params`` are not ``ndarray`` s,
    but :class:`~chainer.Variables` s.

    Function objects are acceptable as ``func`` argument::

    >>   check_backward(lambda x1, x2: f(x1, x2),
    >>                  (x1_data, x2_data), gy_data)

    .. note::

       ``func`` is called many times to get numerical gradients for all inputs.
       This function doesn't work correctly when ``func`` behaves randomly as
       it gets different gradients.


    Args:
        func (callable): A function which gets :class:`~chainer.Variable` s
            and returns :class:`~chainer.Variable` s. ``func`` must returns
            a tuple of :class:`~chainer.Variable` s or one
            :class:`~chainer.Variable`. You can use :class:`~chainer.Function`
            object, :class:`~chainer.Link` object or a function satisfying the
            condition.
        x_data (ndarray or tuple of ndarrays): A set of ``ndarray`` s to be
            passed to ``func``. If ``x_data`` is one ``ndarray`` object, it is
            treated as ``(x_data,)``.
        y_grad (ndarray or tuple of ndarrays or None):
            A set of ``ndarray`` s representing gradients of return-values of
            ``func``. If ``y_grad`` is one ``ndarray`` object, it is
            treated as ``(y_grad,)``. If ``func`` is a loss-function,
            ``y_grad`` should be set to ``None``.
        params (~chainer.Variable or tuple of ~chainder.Variable):
            A set of :class:`~chainer.Variable` s whose gradients are checked.
            When ``func`` is a :class:`~chainer.Link` object,
            set its parameters as ``params``.
            If ``params`` is one :class:`~chainer.Variable` object,
            it is treated as ``(params,)``.
        eps (float): Epsilon value to be passed to :func:`numerical_grad`.
        atol (float): Absolute tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        rtol (float): Relative tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        no_grads (list of bool): Flag to skip variable for gradient assertion.
            It should be same length as ``x_data``.
        dtype (~numpy.dtype): ``x_data``, ``y_grad`` and ``params`` are casted
            to this dtype when calculating numerical gradients. Only float
            types and ``None`` are allowed.

    See:
       :func:`numerical_grad`
    """
    x_data = _as_tuple(x_data)
    if y_grad is not None:
        y_grad = _as_tuple(y_grad)
    params = _as_tuple(params)

    xs = [variable.Variable(x) for x in x_data]
    y = func(*xs)
    y = _as_tuple(y)

    # All creators of `y` need to be the same because we only call
    # `y[0].backward` to call `backward` method of the creator.
    # To do so we need to insert a dummy function `Ident` to the
    # computational graph.
    # Note that `func` may not be a `Function` object.
    y = identity.Identity().apply(y)

    y_grad = _set_y_grad(y, y_grad)

    # Clear gradients which may exist if func calls backward inside of itself.
    _clear_grads(xs)
    _clear_grads(params)

    # We only need to call `backward` for one result `Variable`.
    # `Variable.backward` method calls `Function.backward` of its creator.
    y[0].backward()

    param_data = [p.data for p in params]
    if dtype is None:
        casted_xs = [variable.Variable(x) for x in x_data]
    else:
        if numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')
        casted_xs = [variable.Variable(x.astype(dtype, copy=False)
                                       if x.dtype.kind == 'f' else x)
                     for x in x_data]

    if no_grads is None:
        no_grads = [x.dtype.kind != 'f' for x in xs]
    else:
        if len(no_grads) != len(xs):
            raise ValueError(
                'Length of no_grads param and xs should be same.')
    casted_data = [x.data.copy() for x in casted_xs]
    for skip, x in six.moves.zip(no_grads, xs):
        if skip:
            assert x.grad is None
        else:
            if x.grad is None:
                raise RuntimeError(
                    'gradients of some arguments are not calculated')

    # Keep the gradient arrays of params which may be overwritten by func
    params_grad = [param.grad for param in params]

    xp = cuda.get_array_module(*xs)
    one = xp.array(1., dtype)

    def g():
        # This functions is called twice in `numerical_grad`.
        # `one` is `1 + epsilon` or `1 - epsilon` in these calls.
        # See the document of `numerical_grad`.
        for skip, cx, data in six.moves.zip(no_grads, casted_xs, casted_data):
            if skip:
                continue
            # astype is require to store data with the given type
            data = (one * data).astype(data.dtype)
            if numpy.isscalar(data):
                data = xp.array(data)
            cx.data = data
        for param, data in six.moves.zip(params, param_data):
            if dtype is not None:
                param_dtype = dtype
            else:
                param_dtype = param.dtype
            # The inner astype is required to calculates __mul__ in
            # `param_type` when data is low accuracy float.
            # The outer one is require to store data with the given type.
            param.data = (one * data.astype(param_dtype)).astype(param_dtype)

        # Clear gradients to support func that calls backward inside of itself.
        _clear_grads(casted_xs)
        _clear_grads(params)

        ys = func(*casted_xs)
        ys = _as_tuple(ys)
        ys_data = tuple(y.data for y in ys)
        for skip, cx, data in six.moves.zip(no_grads, casted_xs, casted_data):
            if skip:
                continue
            cx.data = data
        for param, data in six.moves.zip(params, param_data):
            param.data = data
        return ys_data

    gx, = numerical_grad(g, (one,), y_grad, eps=eps)
    gx_accum = 0
    for skip, x, cx in six.moves.zip(no_grads, xs, casted_xs):
        if skip:
            continue
        gxi = x.grad.ravel()
        cxi = cx.data.ravel()
        if dtype is not None:
            gxi = gxi.astype(dtype, copy=False)
            cxi = cxi.astype(dtype, copy=False)
        gx_accum += gxi.dot(cxi)

    for p, gpi in six.moves.zip(params, params_grad):
        gpi = gpi.ravel()
        pi = p.data.ravel()
        if dtype is not None:
            gpi = gpi.astype(dtype, copy=False)
            pi = pi.astype(dtype, copy=False)
        gx_accum += gpi.dot(pi)

    testing.assert_allclose(gx, gx_accum, atol=atol, rtol=rtol)
Exemple #5
0
def check_backward(func,
                   x_data,
                   y_grad,
                   params=(),
                   eps=1e-3,
                   atol=1e-5,
                   rtol=1e-4,
                   no_grads=None,
                   dtype=None):
    """Test backward procedure of a given function.

    This function automatically check backward-process of given function.
    For example, when you have a :class:`~chainer.Function` class ``MyFunc``,
    that gets two arguments and returns one value, you can make its test like
    this::

    >> def test_my_func(self):
    >>   func = MyFunc()
    >>   x1_data = xp.array(...)
    >>   x2_data = xp.array(...)
    >>   gy_data = xp.array(...)
    >>   check_backward(func, (x1_data, x2_data), gy_data)

    This method creates :class:`~chainer.Variable` objects with ``x_data``
    and calls ``func`` with the :class:`~chainer.Variable` s to get its result
    as :class:`~chainer.Variable`.
    Then, it sets ``y_grad`` array to ``grad`` attribute of the result and
    calls ``backward`` method to get gradients of the inputs.
    To check correctness of the gradients, the function calls
    :func:`numerical_grad` to calculate numerically the gradients and compares
    the types of gradients with :func:`chainer.testing.assert_allclose`.
    If input objects (``x1_data`` or/and ``x2_data`` in this example) represent
    integer variables, their gradients are ignored.

    You can simplify a test when ``MyFunc`` gets only one argument::

    >>   check_backward(func, x1_data, gy_data)

    If ``MyFunc`` is a loss function which returns a zero-dimensional
    array, pass ``None`` to ``gy_data``. In this case, it sets ``1`` to
    ``grad`` attribute of the result::

    >>   check_backward(my_loss_func, (x1_data, x2_data), None)

    If ``MyFunc`` returns multiple outputs, pass all gradients for outputs
    as a tuple::

    >>   gy1_data = xp.array(...)
    >>   gy2_data = xp.array(...)
    >>   check_backward(func, x1_data, (gy1_data, gy2_data))

    You can also test a :class:`~chainer.Link`.
    To check gradients of parameters of the link, set a tuple of the parameters
    to ``params`` arguments::

    >>   check_backward(my_link, (x1_data, x2_data), gy_data,
    >>                  (my_link.W, my_link.b))

    Note that ``params`` are not ``ndarray`` s,
    but :class:`~chainer.Variables` s.

    Function objects are acceptable as ``func`` argument::

    >>   check_backward(lambda x1, x2: f(x1, x2),
    >>                  (x1_data, x2_data), gy_data)

    .. note::

       ``func`` is called many times to get numerical gradients for all inputs.
       This function doesn't work correctly when ``func`` behaves randomly as
       it gets different gradients.


    Args:
        func (callable): A function which gets :class:`~chainer.Variable` s
            and returns :class:`~chainer.Variable` s. ``func`` must returns
            a tuple of :class:`~chainer.Variable` s or one
            :class:`~chainer.Variable`. You can use :class:`~chainer.Function`
            object, :class:`~chainer.Link` object or a function satisfying the
            condition.
        x_data (ndarray or tuple of ndarrays): A set of ``ndarray`` s to be
            passed to ``func``. If ``x_data`` is one ``ndarray`` object, it is
            treated as ``(x_data,)``.
        y_grad (ndarray or tuple of ndarrays or None):
            A set of ``ndarray`` s representing gradients of return-values of
            ``func``. If ``y_grad`` is one ``ndarray`` object, it is
            treated as ``(y_grad,)``. If ``func`` is a loss-function,
            ``y_grad`` should be set to ``None``.
        params (~chainer.Variable or tuple of ~chainder.Variable):
            A set of :class:`~chainer.Variable` s whose gradients are checked.
            When ``func`` is a :class:`~chainer.Link` object,
            set its parameters as ``params``.
            If ``params`` is one :class:`~chainer.Variable` object,
            it is treated as ``(params,)``.
        eps (float): Epsilon value to be passed to :func:`numerical_grad`.
        atol (float): Absolute tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        rtol (float): Relative tolerance to be passed to
            :func:`chainer.testing.assert_allclose`.
        no_grads (list of bool): Flag to skip variable for gradient assertion.
            It should be same length as ``x_data``.
        dtype (~numpy.dtype): ``x_data`` and ``y_grad`` are casted to this
            dtype when calculating numerical gradients. Only float types and
            ``None`` are allowed.

    See:
       :func:`numerical_grad`
    """
    x_data = _as_tuple(x_data)
    if y_grad is not None:
        y_grad = _as_tuple(y_grad)
    params = _as_tuple(params)

    xs = [variable.Variable(x) for x in x_data]
    y = func(*xs)
    y = _as_tuple(y)

    # All creators of `y` need to be the same because we only call
    # `y[0].backward` to call `backward` method of the creator.
    # To do so we need to insert a dummy function `Ident` to the
    # computational graph.
    # Note that `func` may not be a `Function` object.
    y = identity.Identity()(*y)
    y = _as_tuple(y)

    if y_grad is not None:
        if len(y) != len(y_grad):
            raise ValueError(
                '`y_grad` must have the same length of output values')
        for iy, igy in six.moves.zip(y, y_grad):
            iy.grad = igy
    else:
        if len(y) != 1:
            raise ValueError(
                'When `y_grad` is `None`, the function must return a'
                'zero-dimentional array')
        y_grad = (1, )

    # We only need to call `backward` for one result `Variable`.
    # `Variable.backward` method calls `Function.backward` of its creator.
    y[0].backward()

    if dtype is None:
        casted_xs = [variable.Variable(x) for x in x_data]
    else:
        if numpy.dtype(dtype).kind != 'f':
            raise ValueError('`dtype` is allowed only float type')
        if len(params) > 0:
            raise ValueError('`dtype` is available only if `params` is empty')
        casted_xs = [
            variable.Variable(
                x.astype(dtype, copy=False) if x.dtype.kind == 'f' else x)
            for x in x_data
        ]

    def f():
        ys = func(*casted_xs)
        ys = _as_tuple(ys)
        return tuple(y.data for y in ys)

    if no_grads is None:
        no_grads = [x.dtype.kind != 'f' for x in xs]
    else:
        if len(no_grads) != len(xs):
            raise ValueError('Length of no_grads param and xs should be same.')
    for skip, x, cx in six.moves.zip(no_grads, xs, casted_xs):
        if skip:
            assert x.grad is None
            continue
        gx, = numerical_grad(f, (cx.data, ), y_grad, eps=eps)
        testing.assert_allclose(gx, x.grad, atol=atol, rtol=rtol)
        if dtype is None:
            assert gx.dtype == x.grad.dtype
        else:
            assert gx.dtype.kind == 'f' and gx.dtype == dtype

    for p in params:
        gp, = numerical_grad(f, (p.data, ), y_grad, eps=eps)
        testing.assert_allclose(gp, p.grad, atol=atol, rtol=rtol)
        assert gp.dtype is p.grad.dtype