def forward_gpu(self, x): if chainer.should_use_cudnn('>=auto') and 2 <= self.ndim <= 3: # With cuDNN v3 or greater, use cuDNN implementation for inputs # with spatial dimensions of two or more. return super(MaxPoolingND, self).forward_gpu(x) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c = x[0].shape[:2] dims = x[0].shape[2:] ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip( dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x[0].dtype) self.indexes = cuda.cupy.empty(y_shape, dtype=numpy.int32) in_params, out_params, operation, name = \ max_pooling_nd_kernel.MaxPoolingNDKernelForward.generate(self.ndim) cuda.elementwise(in_params, out_params, operation, name)( x[0].reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (y, self.indexes))) return y,
def check_im2col_nd(self, ksize, stride, pad, gpu): dims = self.dims if gpu: im2col = conv_nd.im2col_nd_gpu img = cuda.to_gpu(self.img) else: im2col = conv_nd.im2col_nd_cpu img = self.img col = im2col(img, ksize, stride, pad) outs = tuple(conv_nd.get_conv_outsize(d, k, s, p) for (d, k, s, p) in zip(dims, ksize, stride, pad)) expected_shape = (2, 3) + ksize + outs self.assertEqual(col.shape, expected_shape) col = cuda.to_cpu(col) for n in moves.range(2): for c in moves.range(3): for xs in itertools.product( *[moves.range(out) for out in outs]): for dxs in itertools.product( *[moves.range(k) for k in ksize]): oxs = tuple(x * s - p + dx for (x, s, p, dx) in zip(xs, stride, pad, dxs)) if all(0 <= ox < d for (ox, d) in zip(oxs, dims)): col_index = (n, c) + dxs + xs img_index = (n, c) + oxs self.assertEqual( col[col_index], self.img[img_index]) else: col_index = (n, c) + dxs + xs self.assertEqual(col[col_index], 0)
def forward_gpu(self, inputs): if self._used_cudnn: x, = self.mpoolnd._cudnn_inputs return self._forward_gpu_compute_indexes_again((x, inputs[0])) x, = inputs self._in_shape = x.shape self._in_dtype = x.dtype n, c = x.shape[:2] dims = x.shape[2:] ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip( dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x.dtype) cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes in_params, out_params, operation, name = cls.generate(self.ndim) cuda.elementwise(in_params, out_params, operation, name)( x.reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (self.indexes.reduced_view(), y))) return y,
def check_col2im_nd(self, ksize, stride, pad, gpu): dims = self.dims outs = tuple(conv_nd.get_conv_outsize(d, k, s, p) for (d, k, s, p) in zip(dims, ksize, stride, pad)) col_shape = (2, 3) + ksize + outs col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32) if gpu: col_data = cuda.to_gpu(col) else: col_data = col img = conv_nd.col2im_nd(col_data, stride, pad, dims) img = cuda.to_cpu(img) img_shape = (2, 3) + dims self.assertEqual(img.shape, img_shape) for n in moves.range(2): for c in moves.range(3): for xs in itertools.product( *[moves.range(d) for d in dims]): v = numpy.float32(0.0) for dxs in itertools.product( *[moves.range(k) for k in ksize]): oxs = tuple((x + p - dx) // s for (x, p, dx, s) in zip(xs, pad, dxs, stride)) if all((x + p - dx) % s == 0 for (x, p, dx, s) in zip(xs, pad, dxs, stride)) and \ all(0 <= ox < out for (ox, out) in zip(oxs, outs)): col_index = (n, c) + dxs + oxs v += col[col_index] img_index = (n, c) + xs self.assertAlmostEqual(img[img_index], v)
def forward_gpu(self, x): if (chainer.should_use_cudnn('>=auto') and pooling_nd._check_cudnn_acceptable_type(x[0].dtype)): # With cuDNN v3 or greater, use cuDNN implementation for inputs # with spatial dimensions of two or more. if _cudnn_version >= 3000 and self.ndim >= 2: return super(AveragePoolingND, self).forward_gpu(x) # With cuDNN v2, use cuDNN implementation only for inputs with # spatial dimensions of two. elif self.ndim == 2: return super(AveragePoolingND, self).forward_gpu(x) self.retain_inputs(()) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c = x[0].shape[:2] dims = x[0].shape[2:] ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in six.moves.zip( dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x[0].dtype) coeff = 1. / functools.reduce(operator.mul, self.ksize) in_params, out_params, operation, name = \ average_pooling_nd_kernel.AveragePoolingNDKernelForward.generate( self.ndim) cuda.elementwise(in_params, out_params, operation, name)( x[0].reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (coeff, y))) return y,
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and pooling_nd._check_cudnn_acceptable_type(x[0].dtype)): # With cuDNN v3 or greater, use cuDNN implementation for inputs # with spatial dimensions of two or more. if _cudnn_version >= 3000 and self.ndim >= 2: return super(MaxPoolingND, self).forward_gpu(x) # With cuDNN v2, use cuDNN implementation only for inputs with # spatial dimensions of two. elif self.ndim == 2: return super(MaxPoolingND, self).forward_gpu(x) n, c = x[0].shape[:2] dims = x[0].shape[2:] ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip( dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x[0].dtype) self.indexes = cuda.cupy.empty(y_shape, dtype=numpy.int32) in_params, out_params, operation, name = \ max_pooling_nd_kernel.MaxPoolingNDKernelForward.generate(self.ndim) cuda.elementwise(in_params, out_params, operation, name)( x[0].reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (y, self.indexes))) return y,
def setUp(self): self.dims = (4, 3) self.ksize = (2, 2) self.stride = (1, 1) self.pad = (0, 0) self.outs = tuple(conv_nd.get_conv_outsize(d, k, s, p) for (d, k, s, p) in zip( self.dims, self.ksize, self.stride, self.pad)) col_shape = (2, 3) + self.ksize + self.outs self.col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32)
def _forward_gpu_compute_indexes_again(self, inputs): x, ggx = inputs self._in_shape = x.shape self._in_dtype = x.dtype n, c = x.shape[:2] dims = x.shape[2:] ys = tuple( conv_nd.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip(dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x.dtype) cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes1 in_params, out_params, operation, name = cls.generate(self.ndim) cuda.elementwise(in_params, out_params, operation, name)(x.reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (ggx.reduced_view(), y))) return y,
def check_col2im_nd(self, ksize, stride, pad, gpu): dims = self.dims outs = tuple(conv_nd.get_conv_outsize(d, k, s, p) for (d, k, s, p) in zip(dims, ksize, stride, pad)) col_shape = (2, 3) + ksize + outs col = numpy.random.uniform(-1, 1, col_shape).astype(numpy.float32) if gpu: col2im = conv_nd.col2im_nd_gpu col_data = cuda.to_gpu(col) else: col2im = conv_nd.col2im_nd_cpu col_data = col img = col2im(col_data, stride, pad, dims) img = cuda.to_cpu(img) img_shape = (2, 3) + dims self.assertEqual(img.shape, img_shape) for n in moves.range(2): for c in moves.range(3): for xs in itertools.product( *[moves.range(d) for d in dims]): v = numpy.float32(0.0) for dxs in itertools.product( *[moves.range(k) for k in ksize]): oxs = tuple((x + p - dx) // s for (x, p, dx, s) in zip(xs, pad, dxs, stride)) if all((x + p - dx) % s == 0 for (x, p, dx, s) in zip(xs, pad, dxs, stride)) and \ all(0 <= ox < out for (ox, out) in zip(oxs, outs)): col_index = (n, c) + dxs + oxs v += col[col_index] img_index = (n, c) + xs self.assertAlmostEqual(img[img_index], v)
def forward_gpu(self, inputs): func = self.func if func.is_cudnn_used: x = func.get_retained_inputs()[0].array return self._forward_gpu_compute_indexes_again((x, inputs[0])) ndim = func.ndim ksize = func.ksize stride = func.stride pad = func.pad cover_all = func.cover_all indexes = backend.from_chx(func.indexes) x, = inputs in_shape = x.shape in_dtype = x.dtype n, c = in_shape[:2] dims = in_shape[2:] ys = tuple( conv_nd.get_conv_outsize(d, k, s, p, cover_all) for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x.dtype) cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes in_params, out_params, operation, name = cls.generate(ndim) cuda.elementwise(in_params, out_params, operation, name)( x.reduced_view(), *(dims + ys + ksize + stride + pad + (indexes.reduced_view(), y))) self._in_shape = in_shape self._in_dtype = in_dtype return y,