Ejemplo n.º 1
0
    def forward_gpu(self, x):
        if chainer.should_use_cudnn('>=auto') and 2 <= self.ndim <= 3:
            # With cuDNN v3 or greater, use cuDNN implementation for inputs
            # with spatial dimensions of two or more.
            return super(MaxPoolingND, self).forward_gpu(x)

        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c = x[0].shape[:2]
        dims = x[0].shape[2:]
        ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all)
                   for (d, k, s, p) in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty(y_shape, dtype=numpy.int32)

        in_params, out_params, operation, name = \
            max_pooling_nd_kernel.MaxPoolingNDKernelForward.generate(self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x[0].reduced_view(),
            *(dims + ys + self.ksize + self.stride + self.pad +
              (y, self.indexes)))

        return y,
Ejemplo n.º 2
0
    def check_im2col_nd(self, ksize, stride, pad, gpu):
        dims = self.dims
        if gpu:
            im2col = conv_nd.im2col_nd_gpu
            img = cuda.to_gpu(self.img)
        else:
            im2col = conv_nd.im2col_nd_cpu
            img = self.img

        col = im2col(img, ksize, stride, pad)
        outs = tuple(conv_nd.get_conv_outsize(d, k, s, p)
                     for (d, k, s, p) in zip(dims, ksize, stride, pad))
        expected_shape = (2, 3) + ksize + outs
        self.assertEqual(col.shape, expected_shape)

        col = cuda.to_cpu(col)

        for n in moves.range(2):
            for c in moves.range(3):
                for xs in itertools.product(
                        *[moves.range(out) for out in outs]):
                    for dxs in itertools.product(
                            *[moves.range(k) for k in ksize]):
                        oxs = tuple(x * s - p + dx
                                    for (x, s, p, dx)
                                    in zip(xs, stride, pad, dxs))
                        if all(0 <= ox < d for (ox, d) in zip(oxs, dims)):
                            col_index = (n, c) + dxs + xs
                            img_index = (n, c) + oxs
                            self.assertEqual(
                                col[col_index], self.img[img_index])
                        else:
                            col_index = (n, c) + dxs + xs
                            self.assertEqual(col[col_index], 0)
Ejemplo n.º 3
0
    def forward_gpu(self, inputs):
        if self._used_cudnn:
            x, = self.mpoolnd._cudnn_inputs
            return self._forward_gpu_compute_indexes_again((x, inputs[0]))
        x, = inputs
        self._in_shape = x.shape
        self._in_dtype = x.dtype

        n, c = x.shape[:2]
        dims = x.shape[2:]

        ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all)
                   for (d, k, s, p) in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes
        in_params, out_params, operation, name = cls.generate(self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x.reduced_view(),
            *(dims + ys + self.ksize + self.stride + self.pad +
              (self.indexes.reduced_view(), y)))
        return y,
Ejemplo n.º 4
0
    def check_col2im_nd(self, ksize, stride, pad, gpu):
        dims = self.dims
        outs = tuple(conv_nd.get_conv_outsize(d, k, s, p)
                     for (d, k, s, p) in zip(dims, ksize, stride, pad))
        col_shape = (2, 3) + ksize + outs
        col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32)

        if gpu:
            col_data = cuda.to_gpu(col)
        else:
            col_data = col

        img = conv_nd.col2im_nd(col_data, stride, pad, dims)
        img = cuda.to_cpu(img)
        img_shape = (2, 3) + dims
        self.assertEqual(img.shape, img_shape)
        for n in moves.range(2):
            for c in moves.range(3):
                for xs in itertools.product(
                        *[moves.range(d) for d in dims]):
                    v = numpy.float32(0.0)
                    for dxs in itertools.product(
                            *[moves.range(k) for k in ksize]):
                        oxs = tuple((x + p - dx) // s
                                    for (x, p, dx, s)
                                    in zip(xs, pad, dxs, stride))
                        if all((x + p - dx) % s == 0
                               for (x, p, dx, s)
                               in zip(xs, pad, dxs, stride)) and \
                            all(0 <= ox < out
                                for (ox, out) in zip(oxs, outs)):
                            col_index = (n, c) + dxs + oxs
                            v += col[col_index]
                    img_index = (n, c) + xs
                    self.assertAlmostEqual(img[img_index], v)
Ejemplo n.º 5
0
    def forward_gpu(self, x):
        if (chainer.should_use_cudnn('>=auto') and
                pooling_nd._check_cudnn_acceptable_type(x[0].dtype)):
            # With cuDNN v3 or greater, use cuDNN implementation for inputs
            # with spatial dimensions of two or more.
            if _cudnn_version >= 3000 and self.ndim >= 2:
                return super(AveragePoolingND, self).forward_gpu(x)
            # With cuDNN v2, use cuDNN implementation only for inputs with
            # spatial dimensions of two.
            elif self.ndim == 2:
                return super(AveragePoolingND, self).forward_gpu(x)

        self.retain_inputs(())
        self._in_shape = x[0].shape
        self._in_dtype = x[0].dtype

        n, c = x[0].shape[:2]
        dims = x[0].shape[2:]
        ys = tuple(conv_nd.get_conv_outsize(d, k, s, p,
                                            cover_all=self.cover_all)
                   for (d, k, s, p) in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x[0].dtype)
        coeff = 1. / functools.reduce(operator.mul, self.ksize)

        in_params, out_params, operation, name = \
            average_pooling_nd_kernel.AveragePoolingNDKernelForward.generate(
                self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x[0].reduced_view(),
            *(dims + ys + self.ksize + self.stride + self.pad + (coeff, y)))

        return y,
Ejemplo n.º 6
0
    def forward_gpu(self, x):
        if (cuda.cudnn_enabled and self.use_cudnn and
                pooling_nd._check_cudnn_acceptable_type(x[0].dtype)):
            # With cuDNN v3 or greater, use cuDNN implementation for inputs
            # with spatial dimensions of two or more.
            if _cudnn_version >= 3000 and self.ndim >= 2:
                return super(MaxPoolingND, self).forward_gpu(x)
            # With cuDNN v2, use cuDNN implementation only for inputs with
            # spatial dimensions of two.
            elif self.ndim == 2:
                return super(MaxPoolingND, self).forward_gpu(x)

        n, c = x[0].shape[:2]
        dims = x[0].shape[2:]
        ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all)
                   for (d, k, s, p) in six.moves.zip(
                       dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x[0].dtype)
        self.indexes = cuda.cupy.empty(y_shape, dtype=numpy.int32)

        in_params, out_params, operation, name = \
            max_pooling_nd_kernel.MaxPoolingNDKernelForward.generate(self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x[0].reduced_view(),
            *(dims + ys + self.ksize + self.stride + self.pad +
              (y, self.indexes)))

        return y,
Ejemplo n.º 7
0
 def setUp(self):
     self.dims = (4, 3)
     self.ksize = (2, 2)
     self.stride = (1, 1)
     self.pad = (0, 0)
     self.outs = tuple(conv_nd.get_conv_outsize(d, k, s, p)
                       for (d, k, s, p) in zip(
                           self.dims, self.ksize, self.stride, self.pad))
     col_shape = (2, 3) + self.ksize + self.outs
     self.col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32)
Ejemplo n.º 8
0
    def _forward_gpu_compute_indexes_again(self, inputs):
        x, ggx = inputs
        self._in_shape = x.shape
        self._in_dtype = x.dtype

        n, c = x.shape[:2]
        dims = x.shape[2:]

        ys = tuple(
            conv_nd.get_conv_outsize(d, k, s, p, self.cover_all)
            for (d, k, s,
                 p) in six.moves.zip(dims, self.ksize, self.stride, self.pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes1
        in_params, out_params, operation, name = cls.generate(self.ndim)
        cuda.elementwise(in_params, out_params, operation,
                         name)(x.reduced_view(),
                               *(dims + ys + self.ksize + self.stride +
                                 self.pad + (ggx.reduced_view(), y)))
        return y,
    def check_col2im_nd(self, ksize, stride, pad, gpu):
        dims = self.dims
        outs = tuple(conv_nd.get_conv_outsize(d, k, s, p)
                     for (d, k, s, p) in zip(dims, ksize, stride, pad))
        col_shape = (2, 3) + ksize + outs
        col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32)

        if gpu:
            col2im = conv_nd.col2im_nd_gpu
            col_data = cuda.to_gpu(col)
        else:
            col2im = conv_nd.col2im_nd_cpu
            col_data = col

        img = col2im(col_data, stride, pad, dims)
        img = cuda.to_cpu(img)
        img_shape = (2, 3) + dims
        self.assertEqual(img.shape, img_shape)
        for n in moves.range(2):
            for c in moves.range(3):
                for xs in itertools.product(
                        *[moves.range(d) for d in dims]):
                    v = numpy.float32(0.0)
                    for dxs in itertools.product(
                            *[moves.range(k) for k in ksize]):
                        oxs = tuple((x + p - dx) // s
                                    for (x, p, dx, s)
                                    in zip(xs, pad, dxs, stride))
                        if all((x + p - dx) % s == 0
                               for (x, p, dx, s)
                               in zip(xs, pad, dxs, stride)) and \
                            all(0 <= ox < out
                                for (ox, out) in zip(oxs, outs)):
                            col_index = (n, c) + dxs + oxs
                            v += col[col_index]
                    img_index = (n, c) + xs
                    self.assertAlmostEqual(img[img_index], v)
Ejemplo n.º 10
0
    def forward_gpu(self, inputs):
        func = self.func

        if func.is_cudnn_used:
            x = func.get_retained_inputs()[0].array
            return self._forward_gpu_compute_indexes_again((x, inputs[0]))

        ndim = func.ndim
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        cover_all = func.cover_all
        indexes = backend.from_chx(func.indexes)

        x, = inputs
        in_shape = x.shape
        in_dtype = x.dtype

        n, c = in_shape[:2]
        dims = in_shape[2:]

        ys = tuple(
            conv_nd.get_conv_outsize(d, k, s, p, cover_all)
            for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes
        in_params, out_params, operation, name = cls.generate(ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x.reduced_view(),
            *(dims + ys + ksize + stride + pad + (indexes.reduced_view(), y)))

        self._in_shape = in_shape
        self._in_dtype = in_dtype
        return y,