def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): if gpu: im2col = conv.im2col_gpu img = cuda.to_gpu(self.img) else: im2col = conv.im2col_cpu img = self.img col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx) col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for n in moves.range(2): for c in moves.range(3): for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < self.h and 0 <= ox < self.w: self.assertEqual( col[n, c, ky, kx, y, x], self.img[n, c, oy, ox]) else: self.assertEqual(col[n, c, ky, kx, y, x], 0)
def forward_gpu(self, x): self._used_cudnn = True # Implementation using cudnn x = cuda.cupy.ascontiguousarray(x[0]) n, c, h, w = x.shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype) handle = cudnn.get_handle() pool_desc = self.create_pool_desc() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.poolingForward(handle, pool_desc.value, one.data, x_desc.value, x.data.ptr, zero.data, y_desc.value, y.data.ptr) self.retain_outputs((0, )) return y,
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(2 <= n_in, n_in <= 3) x_type, w_type = in_types[:2] type_check.expect(x_type.dtype.kind == 'f', w_type.dtype.kind == 'f', x_type.ndim == 4, w_type.ndim == 4, x_type.shape[1] == w_type.shape[0]) if self.outh is not None: lower_bound = conv.get_conv_outsize(self.outh, w_type.shape[2], self.sy, self.ph) upper_bound = conv.get_conv_outsize(self.outh, w_type.shape[2], self.sy, self.ph, cover_all=True) type_check.expect(lower_bound <= x_type.shape[2], x_type.shape[2] <= upper_bound) if self.outw is not None: lower_bound = conv.get_conv_outsize(self.outw, w_type.shape[3], self.sx, self.pw) upper_bound = conv.get_conv_outsize(self.outw, w_type.shape[3], self.sx, self.pw, cover_all=True) type_check.expect(lower_bound <= x_type.shape[3], x_type.shape[3] <= upper_bound) if type_check.eval(n_in) == 3: b_type = in_types[2] type_check.expect(b_type.dtype == x_type.dtype, b_type.ndim == 1, b_type.shape[0] == w_type.shape[1])
def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): if gpu: im2col = conv.im2col_gpu img = cuda.to_gpu(self.img) else: im2col = conv.im2col_cpu img = self.img col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx) col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < self.h and 0 <= ox < self.w: testing.assert_allclose(col[:, :, ky, kx, y, x], self.img[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))
def forward_gpu(self, x): if (cuda.cudnn_enabled and self.use_cudnn and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(AveragePooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph) y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw) y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) coeff = 1. / (self.kh * self.kw) kern = cuda.elementwise( 'raw T in, int32 h, int32 w,' 'int32 out_h, int32 out_w, int32 kh, int32 kw,' 'int32 sy, int32 sx, int32 ph, int32 pw, T coeff', 'T out', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T val = 0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { val = val + in[x + offset_y]; } } out = val * coeff; ''', 'avg_pool_fwd') kern(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, coeff, y) return y,
def forward_gpu(self, x): self.retain_inputs((0,)) self._used_cudnn = True # Implementation using cudnn x = cuda.cupy.ascontiguousarray(x[0]) n, c, h, w = x.shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype) handle = cudnn.get_handle() pool_desc = self.create_pool_desc() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.poolingForward( handle, pool_desc.value, one.data, x_desc.value, x.data.ptr, zero.data, y_desc.value, y.data.ptr) self.retain_outputs((0,)) return y,
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(3 <= n_in, n_in <= 4) x_type = in_types[0] v_type = in_types[1] g_type = in_types[2] type_check.expect( x_type.dtype.kind == "f", v_type.dtype.kind == "f", g_type.dtype.kind == "f", x_type.ndim == 4, v_type.ndim == 4, g_type.ndim == 4, x_type.shape[1] == v_type.shape[0] ) if self.outh is not None: type_check.expect( x_type.shape[2] == conv.get_conv_outsize(self.outh, v_type.shape[2],self.sy, self.ph), ) if self.outw is not None: type_check.expect( x_type.shape[3] == conv.get_conv_outsize(self.outw, v_type.shape[3], self.sx, self.pw), ) if type_check.eval(n_in) == 4: b_type = in_types[3] type_check.expect( b_type.dtype == x_type.dtype, b_type.ndim == 1, b_type.shape[0] == v_type.shape[1] )
def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): if gpu: im2col = conv.im2col_gpu img = cuda.to_gpu(self.img) else: im2col = conv.im2col_cpu img = self.img col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx) col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < self.h and 0 <= ox < self.w: testing.assert_allclose( col[:, :, ky, kx, y, x], self.img[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))
def check_col2im(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) shape = (2, 3, kh, kw, col_h, col_w) col = numpy.random.uniform(-1, 1, shape).astype(numpy.float32) if gpu: col2im = conv.col2im_gpu col_data = cuda.to_gpu(col) else: col2im = conv.col2im_cpu col_data = col img = col2im(col_data, sy, sx, ph, pw, self.h, self.w, dy=dy, dx=dx) img = cuda.to_cpu(img) self.assertEqual(img.shape, (2, 3, self.h, self.w)) for n in moves.range(2): for c in moves.range(3): for y in moves.range(self.h): for x in moves.range(self.w): v = numpy.float32(0.0) for ky in moves.range(kh): for kx in moves.range(kw): oy = (y + ph - ky * dy) // sy ox = (x + pw - kx * dx) // sx if (y + ph - ky * dy) % sy == 0 and \ (x + pw - kx * dx) % sx == 0 and \ 0 <= oy < col_h and \ 0 <= ox < col_w: v += col[n, c, ky, kx, oy, ox] self.assertAlmostEqual(img[n, c, y, x], v)
def calc_max_pooling2d(func, in_data, **kwargs): """[MaxPooling2D](https://docs.chainer.org/en/v4.3.0/reference/generated/chainer.functions.max_pooling_2d.html) Each output pixel is calculated by taking max of $k * k$ elements from the input ($k*k - 1$ FLOPs). Output size is calculated by [chainer.utils.get_conv_outsize](https://docs.chainer.org/en/v4.3.0/reference/util/generated/chainer.utils.get_conv_outsize.html). | Item | Value | |:-------|:------| | FLOPs | $$ \| y \| (k_{\mathrm{w}} k_{\mathrm{h}} - 1) $$ | | mread | $$\| x \|$$ | | mwrite | $$\| y \|$$ | | params | AvgPooling parameter `k`, `s` and `p` | """ x, = in_data kh, kw = int(func.kh), int(func.kw) sy, sx = int(func.sy), int(func.sx) ph, pw = int(func.ph), int(func.pw) batch_size, in_c, in_h, in_w = x.shape out_h = get_conv_outsize(in_h, kh, sy, ph, cover_all=func.cover_all) out_w = get_conv_outsize(in_w, kw, sx, pw, cover_all=func.cover_all) out_size = batch_size * in_c * out_h * out_w flops = out_size * int(kw * kh - 1) params = { 'k': kw if kw == kh else (kh, kw), 's': sx if sx == sy else (sy, sx), 'p': pw if pw == ph else (ph, pw) } return (flops, x.size, out_size, params)
def check_forward(self, x, kh, kw, sy, sx, ph, pw, dy, dx, gpu): x = x.copy() n, c, h, w = x.shape col = functions.im2col( x, (kh, kw), (sy, sx), (ph, pw), dilate=(dy, dx)).data col_h = get_conv_outsize(h, kh, sy, ph, d=dy) col_w = get_conv_outsize(w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (n, c * kh * kw, col_h, col_w)) col = col.reshape(n, c, kh, kw, col_h, col_w) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < h and 0 <= ox < w: testing.assert_allclose( col[:, :, ky, kx, y, x], self.x[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(2 <= n_in, n_in <= 3) x_type, w_type = in_types[:2] type_check.expect( x_type.dtype.kind == 'f', w_type.dtype.kind == 'f', x_type.ndim == 4, w_type.ndim == 4, x_type.shape[1] == w_type.shape[0] ) if self.outh is not None: type_check.expect( x_type.shape[2] == conv.get_conv_outsize(self.outh, w_type.shape[2], self.sy, self.ph), ) if self.outw is not None: type_check.expect( x_type.shape[3] == conv.get_conv_outsize(self.outw, w_type.shape[3], self.sx, self.pw), ) if type_check.eval(n_in) == 3: b_type = in_types[2] type_check.expect( b_type.dtype == x_type.dtype, b_type.ndim == 1, b_type.shape[0] == w_type.shape[1] )
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(2 <= n_in, n_in <= 3) x_type, w_type = in_types[:2] type_check.expect(x_type.dtype.kind == 'f', w_type.dtype.kind == 'f', x_type.ndim == self.ndim + 2, w_type.ndim == self.ndim + 2, x_type.shape[1] == w_type.shape[0]) if self.outs is not None: for i, (out, s, p, di) in enumerate( zip(self.outs, self.stride, self.pad, self.dilate)): lower_bound = conv.get_conv_outsize(out, w_type.shape[i + 2], s, p, d=di) upper_bound = conv.get_conv_outsize(out, w_type.shape[i + 2], s, p, cover_all=True, d=di) type_check.expect(lower_bound <= x_type.shape[i + 2], x_type.shape[i + 2] <= upper_bound) if type_check.eval(n_in) == 3: b_type = in_types[2] type_check.expect( b_type.dtype == x_type.dtype, b_type.ndim == 1, # Need to consider the case that group count > 1. # b_type.shape[0] == w_type.shape[1] )
def check_col2im(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) shape = (2, 3, kh, kw, col_h, col_w) col = numpy.random.uniform(-1, 1, shape).astype(self.dtype) if gpu: col2im = conv.col2im_gpu col_data = cuda.to_gpu(col) else: col2im = conv.col2im_cpu col_data = col img = col2im(col_data, sy, sx, ph, pw, self.h, self.w, dy=dy, dx=dx) img = cuda.to_cpu(img) self.assertEqual(img.shape, (2, 3, self.h, self.w)) for y in moves.range(self.h): for x in moves.range(self.w): v = numpy.zeros((2, 3), self.dtype) for ky in moves.range(kh): for kx in moves.range(kw): oy = (y + ph - ky * dy) // sy ox = (x + pw - kx * dx) // sx if ((y + ph - ky * dy) % sy == 0 and (x + pw - kx * dx) % sx == 0 and 0 <= oy < col_h and 0 <= ox < col_w): v += col[:, :, ky, kx, oy, ox] testing.assert_allclose(img[:, :, y, x], v)
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(n_in == 1) x_type = in_types[0] type_check.expect( x_type.dtype.kind == 'f', x_type.ndim == 4, x_type.shape == self.indexes.shape, ) if self.outh is not None: expected_h = conv.get_conv_outsize(self.outh, self.kh, self.sy, self.ph, cover_all=self.cover_all) type_check.expect(x_type.shape[2] == expected_h) if self.outw is not None: expected_w = conv.get_conv_outsize(self.outw, self.kw, self.sx, self.pw, cover_all=self.cover_all) type_check.expect(x_type.shape[3] == expected_w)
def _process_conv2d(self, function, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None batch_size, in_c, in_h, in_w = x.shape out_c, _, kh, kw = W.shape out_h = conv.get_conv_outsize(in_h, kh, function.sy, function.ph, cover_all=function.cover_all) out_w = conv.get_conv_outsize(in_w, kw, function.sx, function.pw, cover_all=function.cover_all) ops = 2 * batch_size * in_c * out_c * kw * kh * out_w * out_h # twice because of multiply-and-add if b is not None: ops += batch_size * out_c * out_w * out_h # bias self._print( '%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%f' % (function.label, batch_size, in_w, in_h, in_c, out_w, out_h, out_c, kw, kh, function.pw, function.sx, ops / 1e9)) self.total_ops += ops
def check_im2col(self, kh, kw, sy, sx, ph, pw, gpu): if gpu: im2col = conv.im2col_gpu img = cuda.to_gpu(self.img) else: im2col = conv.im2col_cpu img = self.img col = im2col(img, kh, kw, sy, sx, ph, pw) col_h = conv.get_conv_outsize(self.h, kh, sy, ph) col_w = conv.get_conv_outsize(self.w, kw, sx, pw) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for n in moves.range(2): for c in moves.range(3): for y in moves.range(col_h): for x in moves.range(col_w): for dy in moves.range(kh): for dx in moves.range(kw): oy = y * sy - ph + dy ox = x * sx - pw + dx if 0 <= oy < self.h and 0 <= ox < self.w: self.assertEqual(col[n, c, dy, dx, y, x], self.img[n, c, oy, ox]) else: self.assertEqual(col[n, c, dy, dx, y, x], 0)
def check_col2im(self, kh, kw, sy, sx, ph, pw, gpu): col_h = conv.get_conv_outsize(self.h, kh, sy, ph) col_w = conv.get_conv_outsize(self.w, kw, sx, pw) shape = (2, 3, kh, kw, col_h, col_w) col = numpy.random.uniform(-1, 1, shape).astype(numpy.float32) if gpu: col2im = conv.col2im_gpu col_data = cuda.to_gpu(col) else: col2im = conv.col2im_cpu col_data = col img = col2im(col_data, sy, sx, ph, pw, self.h, self.w) img = cuda.to_cpu(img) self.assertEqual(img.shape, (2, 3, self.h, self.w)) for n in moves.range(2): for c in moves.range(3): for y in moves.range(self.h): for x in moves.range(self.w): v = numpy.float32(0.0) for dy in moves.range(kh): for dx in moves.range(kw): oy = (y + ph - dy) // sy ox = (x + pw - dx) // sx if (y + ph - dy) % sy == 0 and \ (x + pw - dx) % sx == 0 and \ 0 <= oy < col_h and \ 0 <= ox < col_w: v += col[n, c, dy, dx, oy, ox] self.assertAlmostEqual(img[n, c, y, x], v)
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(2 <= n_in, n_in <= 3) x_type, w_type = in_types[:2] type_check.expect( x_type.dtype == numpy.float32, w_type.dtype == numpy.float32, x_type.ndim == 4, w_type.ndim == 4, x_type.shape[1] == w_type.shape[0] ) if self.outh is not None: type_check.expect( x_type.shape[2] == conv.get_conv_outsize(self.outh, w_type.shape[2], self.sy, self.ph), ) if self.outw is not None: type_check.expect( x_type.shape[3] == conv.get_conv_outsize(self.outw, w_type.shape[3], self.sx, self.pw), ) if n_in.eval() == 3: b_type = in_types[2] type_check.expect( b_type.dtype == numpy.float32, b_type.ndim == 1, b_type.shape[0] == w_type.shape[1] )
def _forward_ideep(self, x): self._in_shape = x[0].shape self._in_dtype = x[0].dtype self.retain_inputs((0,)) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' self.pd = self.sy * (y_h - 1) + self.kh - h - self.ph self.pr = self.sx * (y_w - 1) + self.kw - w - self.pw pp = intel64.ideep.pooling2DParam( (n, c, y_h, y_w), self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, intel64.ideep.pooling2DParam.pooling_max) y, self.indexes = intel64.ideep.pooling2D.Forward( intel64.ideep.array(x[0]), pp) return y,
def forward_gpu(self, inputs): if self._used_cudnn: x, = self.mpool2d.get_retained_inputs() return self._forward_gpu_compute_indexes_again((x.data, inputs[0])) else: x, = inputs n, c, h, w = x.shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x.dtype) cuda.elementwise( 'raw T in, raw S indexes, int32 h, int32 w, int32 out_h,' 'int32 out_w, int32 kh, int32 kw, int32 sy, int32 sx,' 'int32 ph, int32 pw', 'T out', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int index = indexes[i]; int max_y = max(0, out_y * sy - ph + index / kw); int max_x = max(0, out_x * sx - pw + index % kw); out = in[max_x + w * (max_y + h * c0)]; ''', 'max_pool_grad_fwd')( x.reduced_view(), self.indexes.reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y) return y,
def forward_gpu(self, x): if cudnn.enabled and self.use_cudnn: return super(AveragePooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph) y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw) y = cuda.empty((n, c, y_h, y_w), dtype=numpy.float32) coeff = 1. / (self.kh * self.kw) cuda.elementwise( ''' float* out, const float* in, int h, int w, int out_h, int out_w, int kh, int kw, int sy, int sx, int ph, int pw, float coeff ''', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); float val = 0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { val += in[x + offset_y]; } } out[i] = val * coeff; ''', 'avg_pool_fwd')(y, x[0], h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, coeff) return y,
def setUp(self): self.x = numpy.random.uniform( size=self.in_shape).astype(self.dtype) kh, kw = _pair(self.ksize) sy, sx = _pair(self.stride) ph, pw = _pair(self.pad) dy, dx = _pair(self.dilate) N, C, H, W = self.in_shape o_H = get_conv_outsize(H, kh, sy, ph, cover_all=self.cover_all, d=dy) o_W = get_conv_outsize(W, kw, sx, pw, cover_all=self.cover_all, d=dx) self.gy = numpy.random.uniform( size=(N, C * kh * kw, o_H, o_W)).astype(self.dtype) self.ggx = numpy.random.uniform( size=self.in_shape).astype(self.dtype) self.check_backward_options = {'atol': 5e-4, 'rtol': 5e-3} if self.dtype is numpy.float16: self.check_backward_options.update({'atol': 1e-3, 'rtol': 1e-2}) self.check_double_backward_options = {'atol': 5e-4, 'rtol': 5e-3} if self.dtype is numpy.float16: self.check_double_backward_options.update( {'atol': 1e-3, 'rtol': 1e-2})
def _set_cover_all(self, x, W): in_h, in_w = x.shape[2:] kh, kw = W.shape[2:] self.cover_all = (in_h != conv.get_conv_outsize( self.outh, kh, self.sy, self.ph, d=self.dy) or in_w != conv.get_conv_outsize( self.outw, kw, self.sx, self.pw, d=self.dx))
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(2 <= n_in, n_in <= 3) x_type, w_type = in_types[:2] type_check.expect( x_type.dtype.kind == 'f', w_type.dtype.kind == 'f', x_type.ndim == self.ndim + 2, w_type.ndim == self.ndim + 2, x_type.shape[1] == w_type.shape[0] ) if self.outs is not None: for i, (out, s, p, di) in enumerate(zip( self.outs, self.stride, self.pad, self.dilate)): lower_bound = conv.get_conv_outsize( out, w_type.shape[i + 2], s, p, d=di) upper_bound = conv.get_conv_outsize( out, w_type.shape[i + 2], s, p, cover_all=True, d=di) type_check.expect( lower_bound <= x_type.shape[i + 2], x_type.shape[i + 2] <= upper_bound) if type_check.eval(n_in) == 3: b_type = in_types[2] type_check.expect( b_type.dtype == x_type.dtype, b_type.ndim == 1, # Need to consider the case that group count > 1. # b_type.shape[0] == w_type.shape[1] )
def forward_gpu(self, x): n, c, h, w = x[0].shape out_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph) out_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw) out_c = self.W.shape[0] y = cuda.empty((n, out_c, out_h, out_w), dtype=self.dtype) if cuda.cudnn_enabled and self.use_cudnn: handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x[0]) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(self.W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx)) if self.b is not None: self.bias_desc = cudnn.create_tensor_descriptor( self.b[None, :, None, None]) algo = libcudnn.getConvolutionForwardAlgorithm( handle, x_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, self.max_workspace_size) workspace_size = libcudnn.getConvolutionForwardWorkspaceSize( handle, x_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, algo) workspace = cuda.empty( (max(workspace_size // 4, 1),), dtype=self.dtype) one = ctypes.c_float(1) zero = ctypes.c_float(0) libcudnn.convolutionForward( handle, one, x_desc.value, x[0].data.ptr, self.filter_desc.value, self.W.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero, y_desc.value, y.data.ptr) # TODO(beam2d): Support unshared bias if self.b is not None: libcudnn.addTensor( handle, libcudnn.CUDNN_ADD_SAME_C, one, self.bias_desc.value, self.b.data.ptr, one, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu( x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) # TODO(beam2d): Use streams W_mat = self.W.reshape(out_c, c * self.kh * self.kw) col_mats = self.col.reshape( n, c * self.kh * self.kw, out_h * out_w) y_mats = y.reshape(n, out_c, out_h * out_w) for i in moves.range(n): y_mats[i] = W_mat.dot(col_mats[i]) # TODO(beam2d): Support unshared bias if self.b is not None: y += self.b.reshape((1, out_c, 1, 1)) return y,
def _set_cover_all(self, x_shape, w_shape): _, _, kh, kw = w_shape _, _, in_h, in_w = x_shape self.cover_all = (in_h != conv.get_conv_outsize( self.outh, kh, self.sy, self.ph, d=self.dy) or in_w != conv.get_conv_outsize( self.outw, kw, self.sx, self.pw, d=self.dx))
def _set_cover_all(self, x, W): in_h, in_w = x.shape[2:] kh, kw = W.shape[2:] self.cover_all = ( in_h != conv.get_conv_outsize(self.outh, kh, self.sy, self.ph, d=self.dy) or in_w != conv.get_conv_outsize(self.outw, kw, self.sx, self.pw, d=self.dx))
def _create_cc(self, x, gy, hint, y, ws, ksize, stride, pad, cover_all, e): self.ksize = ksize self.stride = stride self.pad = pad self.cover_all = cover_all self.x = array(x, m.memory.nchw, e) gy = array(gy, m.memory.nchw, e) if self.alg_kind is pooling_max: gy_md = y.memory.get_primitive_desc().desc() else: gy_md = gy.memory.get_primitive_desc().desc() gx_md = m.desc(x.shape, m.memory.f32, m.memory.any) # x_md = self.x.memory.get_primitive_desc().desc() n, c, h, w = x.shape sy, sx = _pair(stride) kh, kw = _pair(ksize) p_upper, p_left = _pair(pad) yh = conv.get_conv_outsize(h, kh, sy, p_upper, cover_all=cover_all) assert yh > 0, 'Height in the output should be positive.' yw = conv.get_conv_outsize(w, kw, sx, p_left, cover_all=cover_all) assert yw > 0, 'Width in the output should be positive.' p_down = sy * (yh - 1) + kh - h - p_upper p_right = sx * (yw - 1) + kw - w - p_left cc_d = pooling_backward.desc(self.alg_kind, gx_md, gy_md, stride, ksize, (p_upper, p_left), (p_down, p_right), zero) cc_pd = pooling_backward.primitive_desc(cc_d, e, hint) gx = mdarray(cc_pd.diff_src_primitive_desc()) if self.alg_kind is pooling_max: # For max pooling reorder y if needed outputs = reorder_if_must(gy, y.memory.get_primitive_desc(), e, self.dag_) if len(outputs) == 2: self.reordered_gy, self.itm_arr = outputs[:2] else: self.reordered_gy = outputs[0] self.dag_.push_back( pooling_backward.pooling_backward( cc_pd, at(self.reordered_gy.memory), at(ws.memory), gx.memory)) else: # There is no workspace for average pooling self.dag_.push_back( pooling_backward.pooling_backward(cc_pd, at(gy.memory), gx.memory)) self._hint = hint self.gy = gy self.outputs = gx,
def forward_gpu(self, x): if (chainer.should_use_cudnn('>=auto') and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(MaxPooling2D, self).forward_gpu(x) self.retain_inputs(()) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes) return y,
def forward_gpu(self, x): if chainer.should_use_cudnn('>=auto'): self.retain_inputs((0,)) return super(MaxPooling2D, self).forward_gpu(x) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes) return y,
def infer_return(self, conv, x_type): ksize = make_pair(conv.ksize) stride = make_pair(conv.stride) pad = make_pair(conv.pad) dilate = make_pair(conv.dilate) shape_2 = get_conv_outsize( x_type.shape[2], ksize[0], stride[0], pad[0], d=dilate[0]) shape_3 = get_conv_outsize( x_type.shape[3], ksize[1], stride[1], pad[1], d=dilate[1]) ret_shape = (x_type.shape[0], conv.out_channels, shape_2, shape_3) return TyChainerVariable(x_type.dtype, shape=ret_shape)
def __call__(self, x): xp = cuda.get_array_module(x.data) l = conv.get_conv_outsize(x.data.shape[2], self.ksize[0], self.stride[0], self.pad[0]) h = conv.get_conv_outsize(x.data.shape[3], self.ksize[1], self.stride[1], self.pad[1]) w = conv.get_conv_outsize(x.data.shape[4], self.ksize[2], self.stride[2], self.pad[2]) shape = (len(x.data), self.out_channels, l, h, w) return variable.Variable(xp.random.uniform(-1, 1, shape).astype(x.data.dtype), volatile=x.volatile)
def forward_gpu(self, x): n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) y = cuda.cupy.zeros((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.zeros((n, c, y_h, y_w), dtype=numpy.int32) indices_unpooling = cuda.cupy.zeros((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes, S indices_unpooling', ''' int c0 = (int)floor(double(i) / double(out_h * out_w)); int out_y = int(floor(double(i) / double(out_w))) % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; int max_ind = in_x_0 + w * (in_y_0 + h * c0); for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; max_ind = x + offset_y; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; indices_unpooling = max_ind; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes, indices_unpooling) return (y, indices_unpooling, cuda.cupy.array([n, c, h, w]))
def forward_gpu(self, x): """ Commented away since we need the indexes for the unpooling process. if cuda.cudnn_enabled and self.use_cudnn: return super(MaxPooling2D, self).forward_gpu(x), self.indexes """ n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes) return y, self.indexes
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(n_in == 1) x_type = in_types[0] type_check.expect(x_type.dtype.kind == "f", x_type.ndim == 4) if self.outh is not None: expected_h = conv.get_conv_outsize(self.outh, self.kh, self.sy, self.ph, cover_all=self.cover_all) type_check.expect(x_type.shape[2] == expected_h) if self.outw is not None: expected_w = conv.get_conv_outsize(self.outw, self.kw, self.sx, self.pw, cover_all=self.cover_all) type_check.expect(x_type.shape[3] == expected_w)
def _get_out_size(self, inputs): x, W = inputs[:2] _, _, kh, kw = W.shape _, _, h, w = x.shape out_h = conv.get_conv_outsize( h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy) if out_h <= 0: raise RuntimeError('Height in the output should be positive.') out_w = conv.get_conv_outsize( w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx) if out_w <= 0: raise RuntimeError('Width in the output should be positive.') return out_h, out_w
def max_pooling_3d(x, ksize, stride=None, pad=0, use_cudnn=True): xp = cuda.get_array_module(x.data) if stride is None: stride = ksize ksize = _triplet(ksize) stride = _triplet(stride) pad = _triplet(pad) l = conv.get_conv_outsize(x.data.shape[2], ksize[0], stride[0], pad[0]) h = conv.get_conv_outsize(x.data.shape[3], ksize[1], stride[1], pad[1]) w = conv.get_conv_outsize(x.data.shape[4], ksize[2], stride[2], pad[2]) shape = (len(x.data), x.data.shape[1], l, h, w) return variable.Variable(xp.random.uniform(-1, 1, shape).astype(x.data.dtype), volatile=x.volatile)
def forward_gpu(self, x): if cudnn.enabled and self.use_cudnn: return super(MaxPooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) y = cuda.empty((n, c, y_h, y_w), dtype=numpy.float32) self.indexes = cuda.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( ''' float* out, int* indexes, const float* in, int h, int w, int out_h, int out_w, int kh, int kw, int sy, int sx, int ph, int pw ''', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); float maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out[i] = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes[i] = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(y, self.indexes, x[0], h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) return y,
def forward_gpu(self, x): if cudnn.enabled and self.use_cudnn: return super(MaxPooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph, self.cover_all) y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw, self.cover_all) y = cuda.empty((n, c, y_h, y_w), dtype=numpy.float32) self.indexes = cuda.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( ''' float* out, int* indexes, const float* in, int h, int w, int out_h, int out_w, int kh, int kw, int sy, int sx, int ph, int pw ''', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); float maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out[i] = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes[i] = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(y, self.indexes, x[0], h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw) return y,
def forward_gpu(self, x): if cuda.cudnn_enabled and self.use_cudnn: return super(MaxPooling2D, self).forward_gpu(x) n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes) return y,
def setUp(self): self.x = numpy.random.uniform(size=self.in_shape).astype(numpy.float32) kh, kw = _pair(self.ksize) sy, sx = _pair(self.stride) ph, pw = _pair(self.pad) dy, dx = _pair(self.dilate) N, C, H, W = self.in_shape o_H = get_conv_outsize(H, kh, sy, ph, cover_all=self.cover_all, d=dy) o_W = get_conv_outsize(W, kw, sx, pw, cover_all=self.cover_all, d=dx) self.gy = numpy.random.uniform(size=(N, C * kh * kw, o_H, o_W)).astype(numpy.float32)
def setUp(self): self.ndim = len(self.dims) self.ksize = (3, ) * self.ndim self.stride = (2, ) * self.ndim self.pad = (1, ) * self.ndim # Avoid unstability of numerical gradient x_shape = (2, 3) + self.dims self.x = numpy.arange(functools.reduce(mul, x_shape), dtype=self.dtype).reshape(x_shape) self.x = 2 * self.x / self.x.size - 1 outs = tuple( conv.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip(self.dims, self.ksize, self.stride, self.pad)) gy_shape = (2, 3) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) self.check_backward_options = {'eps': 2.0**-8} if self.dtype == numpy.float16: self.check_backward_options = { 'eps': 2.0**-8, 'atol': 1e-03, 'rtol': 1e-03 }
def _set_cover_all(self, x, W): x_shape = x.shape[2:] k_shape = W.shape[2:] self.cover_all = any( ix != conv.get_conv_outsize(oy, k, s, p) for (ix, oy, k, s, p) in zip(x_shape, self.outs, k_shape, self.stride, self.pad))
def setUp(self): self.ndim = len(self.dims) self.ksize = (3,) * self.ndim self.stride = (2,) * self.ndim self.pad = (1,) * self.ndim # Avoid unstability of numerical gradient x_shape = (2, 3) + self.dims self.x = numpy.arange( functools.reduce(mul, x_shape), dtype=self.dtype).reshape(x_shape) self.x = 2 * self.x / self.x.size - 1 outs = tuple(conv.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip( self.dims, self.ksize, self.stride, self.pad)) gy_shape = (2, 3) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) self.ggx = numpy.random.uniform( -1, 1, x_shape).astype(self.dtype) self.check_backward_options = {} if self.dtype == numpy.float16: self.check_backward_options = { 'atol': 1e-3, 'rtol': 1e-2} self.check_double_backward_options = { 'atol': 1e-3, 'rtol': 1e-2} else: self.check_backward_options = { 'atol': 1e-4, 'rtol': 1e-3} self.check_double_backward_options = { 'atol': 1e-4, 'rtol': 1e-3}
def check_type_forward(self, in_types): n_in = in_types.size() type_check.expect(2 <= n_in, n_in <= 3) x_type, w_type = in_types[:2] type_check.expect( x_type.dtype.kind == 'f', w_type.dtype.kind == 'f', x_type.ndim == self.ndim + 2, w_type.ndim == self.ndim + 2, x_type.shape[1] == w_type.shape[0] ) if self.outs is not None: for i, (out, s, p) in enumerate(zip( self.outs, self.stride, self.pad)): type_check.expect( x_type.shape[i + 2] == conv.get_conv_outsize(out, w_type.shape[i + 2], s, p) ) if type_check.eval(n_in) == 3: b_type = in_types[2] type_check.expect( b_type.dtype == x_type.dtype, b_type.ndim == 1, b_type.shape[0] == w_type.shape[1] )
def setUp(self): in_channels = 3 out_channels = 2 ndim = len(self.dims) ksize = (3,) * ndim self.stride = (2,) * ndim self.pad = (1,) * ndim W_scale = numpy.sqrt(1. / functools.reduce(mul, ksize, in_channels)) W_shape = (out_channels, in_channels) + ksize self.W = numpy.random.normal(0, W_scale, W_shape).astype(self.W_dtype) self.b = numpy.random.uniform(-1, 1, out_channels).astype(self.x_dtype) x_shape = (2, 3) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.x_dtype) gy_shape = (2, 2) + tuple( conv.get_conv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in zip(self.dims, ksize, self.stride, self.pad)) self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.x_dtype) self.check_forward_options = {} self.check_backward_options = {'dtype': numpy.float64} if self.x_dtype == numpy.float16 or self.W_dtype == numpy.float16: self.check_forward_options = {'atol': 5e-4, 'rtol': 5e-3} self.check_backward_options = { 'dtype': numpy.float64, 'atol': 2 ** -4, 'rtol': 2 ** -4}
def test_valid_insize(self): N = self.N c = self.c ksize = self.ksize stride = self.stride pad = self.pad outs = self.outsize cover_all = self.cover_all # Make input. dims = tuple( conv.get_conv_outsize(out, k, s, p, cover_all=cover_all) for (out, k, s, p) in zip(outs, ksize, stride, pad)) x_shape = (N, c) + dims x_data = numpy.random.uniform(-1, 1, x_shape).astype(numpy.float32) x = chainer.Variable(x_data) # Compute unpooling. y = functions.unpooling_nd(x, ksize, stride, pad, outsize=outs, cover_all=cover_all) # Test output's value. y_expected = expected_unpooling_nd(x_data, outs, ksize, stride, pad) testing.assert_allclose(y_expected, y.data)
def im2col_nd_cpu(img, ksize, stride, pad, pval=0, cover_all=False): n, c = img.shape[0:2] # (n, c, d_1, d_2, ..., d_N) dims = img.shape[2:] ndim = len(dims) assert ndim == len(ksize) == len(stride) == len(pad) outs = tuple(get_conv_outsize(d, k, s, p, cover_all) for (d, k, s, p) in zip(dims, ksize, stride, pad)) assert all(out > 0 for out in outs), 'Output sizes should be positive.' # Pad around image. pad_width = ((0, 0), (0, 0)) + tuple( (p, p + s - 1) for (s, p) in zip(stride, pad)) img = numpy.pad(img, pad_width, mode='constant', constant_values=(pval,)) # Make patch array with which we will compute correlation with filter. # shape: (n, c, k_1, k_2, ..., k_N, out_1, out_2, ..., out_N) shape = (n, c) + ksize + outs col = numpy.ndarray(shape, dtype=img.dtype) # Fill the patch array. colon = slice(None) for kxs in itertools.product(*[six.moves.range(k) for k in ksize]): # col[:, :, kx_1, kx_2, ..., kx_N, :, :, ..., :] col_index = (colon, colon) + kxs + (colon,) * ndim # img[:, :, kx_1:kx_lim_1:s_1, ..., kx_N:kx_lim_N:s_N] kx_lims = tuple(kx + s * out for (kx, s, out) in zip(kxs, stride, outs)) img_index = (colon, colon) + tuple( slice(kx, kx_lim, s) for (kx, kx_lim, s) in zip(kxs, kx_lims, stride)) col[col_index] = img[img_index] return col
def check_conv_outsize(self, size, k, s, p, d): # When cover_all == False, `outsize` is the maximum integer that # satisfies "(outsize - 1) * s + k <= w" w = size + p * 2 dk = k + (k - 1) * (d - 1) outsize = conv.get_conv_outsize(size, k, s, p, cover_all=False, d=d) self.assertTrue((outsize - 1) * s + dk <= w < outsize * s + dk)
def check_conv_outsize_cover_all(self, size, k, s, p, d): # When cover_all == True, `outsize` is the minimum integer that # satisfies "w <= (outsize - 1) * s + k" w = size + p * 2 dk = k + (k - 1) * (d - 1) outsize = conv.get_conv_outsize(size, k, s, p, cover_all=True, d=d) self.assertTrue((outsize - 2) * s + dk < w <= (outsize - 1) * s + dk)
def forward_gpu(self, x): self._used_cudnn = True # Implementation using cuDNN. x = cuda.cupy.ascontiguousarray(x[0]) n, c = x.shape[:2] dims = x.shape[2:] ys = tuple( conv.get_conv_outsize(d, k, s, p, self.cover_all) for d, k, s, p in six.moves.zip(dims, self.ksize, self.stride, self.pad)) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x.dtype) handle = cudnn.get_handle() pool_desc = self.create_pool_desc() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.poolingForward(handle, pool_desc.value, one.data, x_desc.value, x.data.ptr, zero.data, y_desc.value, y.data.ptr) self.retain_outputs((0, )) return y,
def setUp(self): self.ndim = len(self.dims) self.ksize = (3, ) * self.ndim self.stride = (2, ) * self.ndim self.pad = (1, ) * self.ndim x_shape = (2, 3) + self.dims self.x = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype) outs = tuple( conv.get_conv_outsize(d, k, s, p, False) for (d, k, s, p) in six.moves.zip(self.dims, self.ksize, self.stride, self.pad)) gy_shape = (2, 3) + outs self.gy = numpy.random.uniform(-1, 1, gy_shape).astype(self.dtype) self.ggx = numpy.random.uniform(-1, 1, x_shape).astype(self.dtype) self.check_forward_options = {} self.check_backward_options = {'eps': 1e-2} if self.dtype == numpy.float16: self.check_forward_options = {'atol': 5e-4, 'rtol': 5e-3} self.check_backward_options = { 'eps': 1e-2, 'atol': 5e-3, 'rtol': 5e-2 }
def forward_gpu(self, x): self._used_cudnn = True # Implementation using cuDNN. x = cuda.cupy.ascontiguousarray(x[0]) n, c = x.shape[:2] dims = x.shape[2:] ys = tuple(conv.get_conv_outsize(d, k, s, p, self.cover_all) for d, k, s, p in six.moves.zip( dims, self.ksize, self.stride, self.pad)) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x.dtype) handle = cudnn.get_handle() pool_desc = self.create_pool_desc() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.poolingForward( handle, pool_desc.value, one.data, x_desc.value, x.data.ptr, zero.data, y_desc.value, y.data.ptr) self.retain_outputs((0,)) return y,