Esempio n. 1
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]     # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(ndim + 2, ndim * 2 + 2))
        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(
            W.dtype, copy=False)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0,) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Esempio n. 2
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]  # W: C_I, C_O, k_1, k_2, ..., k_N
        dims = x.shape[2:]  # x: n, C_I, d_1, d_2, ..., d_N
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, stride, pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
        else:
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1, ) * ndim
            y += b.reshape(b_shape)

        return y,
Esempio n. 3
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]     # W: C_I, C_O, k_1, k_2, ..., k_N
        dims = x.shape[2:]      # x: n, C_I, d_1, d_2, ..., d_N
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        if self.outs is None:
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, stride, pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
        else:
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1,) * ndim
            y += b.reshape(b_shape)

        return y,
Esempio n. 4
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]  # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0, ) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0, ) + tuple(moves.range(ndim + 2, ndim * 2 + 2))
        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(W.dtype)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0, ) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Esempio n. 5
0
    def forward_cpu(self, gy):
        func = self.func

        if (func.ndim == 2 and intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_2d_ideep(gy)

        ndim = func.ndim
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        in_shape = func._in_shape
        in_dtype = func._in_dtype
        indexes = func.indexes

        n, c = gy[0].shape[:2]
        outs = gy[0].shape[2:]
        dims = in_shape[2:]
        prod_outs = functools.reduce(mul, outs)
        prod_ksize = functools.reduce(mul, ksize)

        gcol = numpy.zeros(n * c * prod_outs * prod_ksize, dtype=in_dtype)

        indexes = (indexes.flatten() +
                   numpy.arange(0, indexes.size * prod_ksize, prod_ksize))

        gcol[indexes] = gy[0].ravel()
        gcol_shape = (n, c) + outs + ksize
        gcol = gcol.reshape(gcol_shape)
        for i in six.moves.range(ndim):
            gcol = numpy.swapaxes(gcol, 2 + i, ndim + 2 + i)

        gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        return gx,
Esempio n. 6
0
    def _forward_xp_core(self, x, W, b, xp):
        ndim = self.ndim
        stride = self.stride
        pad = self.pad
        dilate = self.dilate

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol,
                                      stride,
                                      pad,
                                      self.outs,
                                      dilate=dilate)
        else:
            y = conv_nd.col2im_nd_gpu(gcol,
                                      stride,
                                      pad,
                                      self.outs,
                                      dilate=dilate)
        if b is not None:
            b_shape = (1, -1) + (1, ) * ndim
            y += b.reshape(b_shape)

        return y,
Esempio n. 7
0
 def backward_cpu(self, x, gy):
     dims = self._in_shape[2:]
     outs = gy[0].shape[2:]
     colon = slice(None, None, None)
     gy_index = (colon, colon) + (None, ) * len(dims)
     gcol_reps = (1, 1) + self.ksize + (1, ) * len(outs)
     gcol = numpy.tile(gy[0][gy_index], gcol_reps)
     gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, dims)
     gx /= functools.reduce(operator.mul, self.ksize)
     return gx,
Esempio n. 8
0
 def backward_cpu(self, x, gy):
     dims = self._in_shape[2:]
     outs = gy[0].shape[2:]
     colon = slice(None, None, None)
     gy_index = (colon, colon) + (None,) * len(dims)
     gcol_reps = (1, 1) + self.ksize + (1,) * len(outs)
     gcol = numpy.tile(gy[0][gy_index], gcol_reps)
     gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, dims)
     gx /= functools.reduce(operator.mul, self.ksize)
     return gx,
Esempio n. 9
0
 def forward_cpu(self, gys):
     gy, = gys
     idims = self._in_shape[2:]
     odims = gy.shape[2:]
     colon = slice(None, None, None)
     gy_index = (colon, colon) + (None,) * len(idims)
     gcol_reps = (1, 1) + self.ksize + (1,) * len(odims)
     gcol = numpy.tile(gy[gy_index], gcol_reps)
     gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, idims)
     if self.pad_value is None:
         width = self._get_pooling_width(numpy, odims, gx.dtype)
         numpy.divide(gx, width, out=gx)
     else:
         gx /= functools.reduce(operator.mul, self.ksize)
     return gx,
Esempio n. 10
0
 def forward_cpu(self, gys):
     gy, = gys
     idims = self._in_shape[2:]
     odims = gy.shape[2:]
     colon = slice(None, None, None)
     gy_index = (colon, colon) + (None,) * len(idims)
     gcol_reps = (1, 1) + self.ksize + (1,) * len(odims)
     gcol = numpy.tile(gy[gy_index], gcol_reps)
     gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, idims)
     if self.pad_value is None:
         width = self._get_pooling_width(numpy, odims, gx.dtype)
         numpy.divide(gx, width, out=gx)
     else:
         gx /= functools.reduce(operator.mul, self.ksize)
     return gx,
Esempio n. 11
0
 def forward_cpu(self, gys):
     gy, = gys
     idims = self._in_shape[2:]
     odims = gy.shape[2:]
     colon = slice(None, None, None)
     is_pad_value_none = self.pad_value is None
     if is_pad_value_none:
         width = self.apoolnd.width
         numpy.divide(gy, width, out=gy)
     gy_index = (colon, colon) + (None,) * len(idims)
     gcol_reps = (1, 1) + self.ksize + (1,) * len(odims)
     gcol = numpy.tile(gy[gy_index], gcol_reps)
     gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, idims)
     if not is_pad_value_none:
         gx /= functools.reduce(operator.mul, self.ksize)
     return gx,
Esempio n. 12
0
 def forward_cpu(self, gys):
     gy, = gys
     idims = self._in_shape[2:]
     odims = gy.shape[2:]
     colon = slice(None, None, None)
     is_pad_value_none = self.pad_value is None
     if is_pad_value_none:
         width = self.apoolnd.width
         numpy.divide(gy, width, out=gy)
     gy_index = (colon, colon) + (None, ) * len(idims)
     gcol_reps = (1, 1) + self.ksize + (1, ) * len(odims)
     gcol = numpy.tile(gy[gy_index], gcol_reps)
     gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, idims)
     if not is_pad_value_none:
         gx /= functools.reduce(operator.mul, self.ksize)
     return gx,
    def test_col2im_nd_cpu_parameter_ranks(self):
        # Invalid ksize length.
        col_shape = (2, 3) + (2,) + self.outs
        col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32)
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims)

        # Invalid stride length.
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_cpu(self.col, (1,), self.pad, self.dims)

        # Invalid pad length.
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_cpu(self.col, self.stride, (0,), self.dims)

        # Invalid dims length.
        with self.assertRaises(AssertionError):
            conv_nd.col2im_nd_cpu(self.col, self.stride, self.pad, (4,))
Esempio n. 14
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        stride = self.stride
        pad = self.pad

        # gcol: C_O, k_1, ..., k_N, n, d_1, ..., d_N
        gcol = xp.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # Roll n, which is batch size, before the first.
        gcol = xp.rollaxis(gcol, ndim + 1)

        # y: n, C_O, d_1, d_2, ..., d_N
        if xp is numpy:
            y = conv_nd.col2im_nd_cpu(gcol, stride, pad, self.outs)
        else:
            y = conv_nd.col2im_nd_gpu(gcol, stride, pad, self.outs)
        if b is not None:
            b_shape = (1, -1) + (1,) * ndim
            y += b.reshape(b_shape)

        return y,
Esempio n. 15
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]     # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(ndim + 2, ndim * 2 + 2))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (xp is numpy and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(
            W.dtype, copy=False)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0,) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Esempio n. 16
0
    def _backward_xp(self, x, W, b, gy, xp):
        dims = x.shape[2:]     # (n, c_I, d_1, d_2, ..., d_N)
        stride = self.stride
        pad = self.pad
        ndim = self.ndim

        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(ndim + 2, ndim * 2 + 2))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (xp is numpy and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gW = xp.tensordot(gy, self.col, (out_axes, col_axes)).astype(
            W.dtype, copy=False)

        # Compute patch array gradient.
        gcol = xp.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, ndim + 1)

        # Compute input gradient.
        if xp is numpy:
            gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, dims)
        else:
            gx = conv_nd.col2im_nd_gpu(gcol, stride, pad, dims)

        # Compute bias gradient if given and return gradients.
        if b is None:
            return gx, gW
        else:
            # (n, _, out_1, out_2, ..., out_N)
            axis = (0,) + tuple(moves.range(2, ndim + 2))
            gb = gy.sum(axis=axis)
            return gx, gW, gb
Esempio n. 17
0
    def backward_cpu(self, x, gy):
        ndim = self.ndim
        n, c = gy[0].shape[:2]
        outs = gy[0].shape[2:]
        dims = x[0].shape[2:]
        prod_outs = functools.reduce(mul, outs)
        prod_ksize = functools.reduce(mul, self.ksize)

        gcol = numpy.zeros(n * c * prod_outs * prod_ksize, dtype=x[0].dtype)

        indexes = self.indexes.ravel()
        indexes += numpy.arange(0, indexes.size * prod_ksize, prod_ksize)

        gcol[indexes] = gy[0].ravel()
        gcol_shape = (n, c) + outs + self.ksize
        gcol = gcol.reshape(gcol_shape)
        for i in six.moves.range(ndim):
            gcol = numpy.swapaxes(gcol, 2 + i, ndim + 2 + i)

        gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, dims)
        return gx,
Esempio n. 18
0
    def backward_cpu(self, x, gy):
        ndim = self.ndim
        n, c = gy[0].shape[:2]
        outs = gy[0].shape[2:]
        dims = x[0].shape[2:]
        prod_outs = functools.reduce(mul, outs)
        prod_ksize = functools.reduce(mul, self.ksize)

        gcol = numpy.zeros(n * c * prod_outs * prod_ksize, dtype=x[0].dtype)

        indexes = self.indexes.flatten()
        indexes += numpy.arange(0, indexes.size * prod_ksize, prod_ksize)

        gcol[indexes] = gy[0].ravel()
        gcol_shape = (n, c) + outs + self.ksize
        gcol = gcol.reshape(gcol_shape)
        for i in six.moves.range(ndim):
            gcol = numpy.swapaxes(gcol, 2 + i, ndim + 2 + i)

        gx = conv_nd.col2im_nd_cpu(gcol, self.stride, self.pad, dims)
        return gx,
    def forward_cpu(self, gys):
        func = self.func
        pad_value = func.pad_value
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        in_shape = func._in_shape

        gy, = gys
        idims = in_shape[2:]
        odims = gy.shape[2:]
        colon = slice(None, None, None)
        is_pad_value_none = pad_value is None
        if is_pad_value_none:
            numpy.divide(gy, func.width, out=gy)
        gy_index = (colon, colon) + (None, ) * len(idims)
        gcol_reps = (1, 1) + ksize + (1, ) * len(odims)
        gcol = numpy.tile(gy[gy_index], gcol_reps)
        gx = conv_nd.col2im_nd_cpu(gcol, stride, pad, idims)
        if not is_pad_value_none:
            gx /= functools.reduce(operator.mul, ksize)
        return gx,
Esempio n. 20
0
 def test_col2im_consistency(self):
     col = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad)
     im_cpu = conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims)
     im_gpu = conv_nd.col2im_nd_gpu(cuda.to_gpu(col), self.stride, self.pad,
                                    self.dims)
     testing.assert_allclose(im_cpu, im_gpu.get())
Esempio n. 21
0
 def test_col2im_consistency(self):
     col = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride, self.pad)
     im_cpu = conv_nd.col2im_nd_cpu(col, self.stride, self.pad, self.dims)
     im_gpu = conv_nd.col2im_nd_gpu(
         cuda.to_gpu(col), self.stride, self.pad, self.dims)
     testing.assert_allclose(im_cpu, im_gpu.get())