Exemple #1
0
    def _directional_backward_gradients(self, xs, ys, params, directions):
        no_gxs = self.no_gxs

        gys = ([
            None if gy is None
            # Copy is needed to avoid being updated during backprop, which
            # would affect the numerical gradient.
            # TODO(niboshi): Preserve strides, for testing purpose.
            else chainer.Variable(gy.copy(), requires_grad=False)
            for gy in self.gys
        ])

        # Backward
        chainer.backward(ys, gys)

        for no_gx, x in six.moves.zip(no_gxs, xs):
            if no_gx and x.grad is not None:
                raise RuntimeError('gradient of int variable must be None')

        grads = ([
            None if x is None else x.grad
            for x, no_gx in six.moves.zip(xs, no_gxs) if not no_gx
        ] + [p.grad for p in params])

        gx_accum = 0
        assert len(grads) == len(directions)
        for g, direction in six.moves.zip(grads, directions):
            if g is not None:
                assert direction is not None
                gx_accum += (g.astype(numpy.float64) * direction).sum()

        return gx_accum
Exemple #2
0
    def first_order_grad(*inputs):
        xs = inputs[:n_x]
        gys = inputs[n_x:]

        ys = _as_tuple(func(*xs))
        _check_outputs_and_grad_outputs(ys, gys)

        chainer.backward(ys, gys, enable_double_backprop=True)

        gxs = []
        errors = []
        for i, (no_gx, x) in enumerate(six.moves.zip(first_order_no_gxs, xs)):
            if no_gx:
                if x.grad is not None:
                    errors.append(
                        '[{}]: Gradient was calculated while expected to not.'
                        .format(i))
            else:
                if x.grad is None:
                    gxs.append(None)
                else:
                    gxs.append(x.grad_var)

        if len(errors) > 0:
            f = six.StringIO()
            f.write('There are errors retrieving first-order gradients:\n')
            f.write('Inputs: {}\n'.format(utils._format_array_props(xs)))
            f.write('Skip: {}\n'.format(
                ', '.join(str(no_gx) for no_gx in first_order_no_gxs)))
            f.write('Errors:\n')
            for error in errors:
                f.write('{}\n'.format(error))
            raise RuntimeError(f.getvalue())

        return tuple(gxs + [p.grad_var for p in params])
Exemple #3
0
 def test_duplicate_outputs(self):
     x = chainer.Variable(np.array(0, np.float32))
     y = chainer.functions.identity(x)
     y.grad = np.array(3, np.float32)
     with testing.assert_warns(RuntimeWarning):
         chainer.backward([y, y])
     # 6 might be expected, but y.grad is used only once
     testing.assert_allclose(x.grad, np.array(3, np.float32))
Exemple #4
0
 def test_warn_no_grad(self):
     x = chainer.Variable(np.array(4, np.float32))
     x.grad = np.array(3, np.float32)
     y = x * 2
     with testing.assert_warns(RuntimeWarning):
         chainer.backward([y])
     testing.assert_allclose(x.grad, np.array(3, np.float32))
     assert y.grad is None
Exemple #5
0
 def check_multiple_output_2args(self, xp, skip_retain_grad_test=False):
     x = chainer.Variable(xp.array([1, 2], np.float32))
     h = x * 2
     y0 = h * 3
     y1 = h * 4
     gy0 = chainer.Variable(xp.array([1, 10], np.float32))
     gy1 = chainer.Variable(xp.array([100, 1000], np.float32))
     chainer.backward([y0, y1], [gy0, gy1])
     testing.assert_allclose(x.grad, np.array([806, 8060], np.float32))
     if skip_retain_grad_test:
         return
     assert y0.grad is None
     assert y1.grad is None
Exemple #6
0
    def test_length_check(self):
        x = chainer.Variable(np.array(3, np.float32))
        y = chainer.functions.identity(x)
        gy = chainer.Variable(np.array(7, np.float32))

        with self.assertRaises(ValueError):
            chainer.backward([y], [])
        with self.assertRaises(ValueError):
            chainer.backward([y], [gy, gy])
        with self.assertRaises(ValueError):
            chainer.backward([], [gy])
        with self.assertRaises(ValueError):
            chainer.backward([y, y], [gy])

        chainer.backward([y], [gy])
Exemple #7
0
    def first_order_grad(*inputs):
        xs = inputs[:n_x]
        gys = inputs[n_x:]

        ys = _as_tuple(func(*xs))

        # `gys` (inputs to `first_order_grad` forward function) may have been
        # casted to float64 by `numerical_grad`. For certain functions demoting
        # the dtypes (e.g. `F.cast` that casts to float16), the dtypes of `ys`
        # (e.g. outputs of `F.cast`) and `gys` (e.g. given by `numerical_grad`)
        # may mismatch and we need to align those dtypes here.
        gys = [
            None if gy is None else chainer.functions.cast(gy, y.dtype)
            for y, gy in zip(ys, gys)
        ]

        _check_outputs_and_grad_outputs(ys, gys)

        chainer.backward(ys, gys, enable_double_backprop=True)

        gxs = []
        errors = []
        for i, (no_gx, x) in enumerate(six.moves.zip(first_order_no_gxs, xs)):
            if no_gx:
                if x.grad is not None:
                    errors.append(
                        '[{}]: Gradient was calculated while expected to not.'.
                        format(i))
            else:
                if x.grad is None:
                    gxs.append(None)
                else:
                    gxs.append(x.grad_var)

        if len(errors) > 0:
            f = six.StringIO()
            f.write('There are errors retrieving first-order gradients:\n')
            f.write('Inputs: {}\n'.format(utils._format_array_props(xs)))
            f.write('Skip: {}\n'.format(', '.join(
                str(no_gx) for no_gx in first_order_no_gxs)))
            f.write('Errors:\n')
            for error in errors:
                f.write('{}\n'.format(error))
            raise RuntimeError(f.getvalue())

        return tuple(gxs + [p.grad_var for p in params])
Exemple #8
0
    def test_multiple_output_call_count(self):
        x = chainer.Variable(np.array([1, 2], np.float32))

        f = chainer.FunctionNode()
        f.forward = mock.MagicMock(
            side_effect=lambda xs: tuple(x * 2 for x in xs))
        f.backward = mock.MagicMock(
            side_effect=lambda _, gys: tuple(gy * 2 for gy in gys))

        h, = f.apply((x,))
        y0 = h * 3
        y1 = h * 4
        y0.grad = np.array([1, 10], np.float32)
        y1.grad = np.array([100, 1000], np.float32)
        chainer.backward([y0, y1])
        testing.assert_allclose(x.grad, np.array([806, 8060], np.float32))
        assert f.backward.call_count == 1
Exemple #9
0
    def test_type_check(self):
        x = chainer.Variable(self._rand())
        y = x * x
        y.grad = self._rand()
        gy = chainer.Variable(self._rand())

        with self.assertRaises(TypeError):
            chainer.backward(y)
        with self.assertRaises(TypeError):
            chainer.backward([y], gy)

        chainer.backward([y])
        chainer.backward([y], [gy])
Exemple #10
0
 def test_no_output(self):
     chainer.backward([])
     chainer.backward([], [])