def check_forward(self, x_data, t_data, w_data, samples_data): x = chainer.Variable(x_data) t = chainer.Variable(t_data) w = chainer.Variable(w_data) samples = chainer.Variable(samples_data) y = functions.black_out(x, t, w, samples, self.reduce) expect_y = numpy.empty((self.batch_size), dtype=numpy.float32) for b in range(self.batch_size): z = 0 for i in range(self.n_samples): w = self.samples[b, i] z += numpy.exp(self.W[w].dot(self.x[b])) y0 = self.W[self.t[b]].dot(self.x[b]) z += numpy.exp(y0) l = y0 - numpy.log(z) for i in range(self.n_samples): w = self.samples[b, i] l += numpy.log(1 - numpy.exp(self.W[w].dot(self.x[b])) / z) expect_y[b] = l if self.reduce == 'mean': loss = -numpy.sum(expect_y) / self.batch_size else: loss = -expect_y testing.assert_allclose(y.data, loss, atol=1.e-4)
def check_forward(self, x1_data, x2_data, y_expected): x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) y = functions.minimum(x1, x2) self.assertEqual(y.data.dtype, self.dtype) testing.assert_allclose( y_expected, y.data, **self.check_forward_options)
def check_rtol(self, x, y): x_cpu = cuda.to_cpu(x) y_cpu = cuda.to_cpu(y) max_ratio = numpy.max(numpy.abs(x_cpu - y_cpu) / y_cpu) with self.assertRaises(AssertionError): testing.assert_allclose(x, y, atol=0, rtol=max_ratio - 1) testing.assert_allclose(x, y, atol=0, rtol=max_ratio + 1)
def check_forward(self, x_data, axis=None): x = chainer.Variable(x_data) y = functions.logsumexp(x, axis=axis) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.log(numpy.exp(self.x).sum(axis=axis)) testing.assert_allclose( y_expect, y.data, **self.check_forward_option)
def check_compare_naive(self, args, stats, y_grad): def compute(f): x, gamma, beta = [chainer.Variable(v.copy()) for v in args] running_mean, running_var = [v.copy() for v in stats] y = f(x, gamma, beta, running_mean, running_var) y.grad = y_grad.copy() y.backward() return y.array, x.grad, gamma.grad, beta.grad def f_tested(x, gamma, beta, running_mean, running_var): return batch_renormalization.batch_renormalization( x, gamma, beta, self.rmax, self.dmax, eps=self.eps, running_mean=running_mean, running_var=running_var) def f_expected(x, gamma, beta, running_mean, running_var): return _naive_batch_renormalization( x, gamma, beta, self.rmax, self.dmax, self.eps, avg_mean=running_mean, avg_std=(self.eps + running_var) ** 0.5, axis=self.aggr_axes) tested = compute(f_tested) expected = compute(f_expected) # test forward testing.assert_allclose( tested[0], expected[0], **self.check_forward_options) # test backward for g, g_expected in zip(tested[1:], expected[1:]): testing.assert_allclose( g, g_expected, **self.check_backward_options)
def check_reference(self, x): # A returned value and an input refers the same memory. # See issue #488 def func(): return x, gx, = gradient_check.numerical_grad(func, (x,), (1,)) testing.assert_allclose(cuda.to_cpu(gx), 1)
def check_atol(self, x, y): x_cpu = cuda.to_cpu(x) y_cpu = cuda.to_cpu(y) max_abs_diff = numpy.max(numpy.abs(x_cpu - y_cpu)) with self.assertRaises(AssertionError): testing.assert_allclose(x, y, atol=max_abs_diff - 1, rtol=0) testing.assert_allclose(x, y, atol=max_abs_diff + 1, rtol=0)
def check_forward(self, h_data, xs_data, ws_data, bs_data): h = _wrap_variable(h_data) xs = _wrap_variable(xs_data) ws = _wrap_variable(ws_data) bs = _wrap_variable(bs_data) hy, ys = functions.n_step_gru( self.n_layers, self.dropout, h, ws, bs, xs) e_hy = self.hx.copy() for ind in range(self.length): x = self.xs[ind] batch = x.shape[0] for layer in range(self.n_layers): w = self.ws[layer] b = self.bs[layer] h_prev = e_hy[layer, :batch] # GRU z = sigmoid(x.dot(w[1].T) + h_prev.dot(w[4].T) + b[1] + b[4]) r = sigmoid(x.dot(w[0].T) + h_prev.dot(w[3].T) + b[0] + b[3]) h_bar = numpy.tanh(x.dot(w[2].T) + r * ((h_prev).dot(w[5].T) + b[5]) + b[2]) e_h = (1 - z) * h_bar + z * h_prev e_hy[layer, :batch] = e_h x = e_h testing.assert_allclose( ys[ind].data, x, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
def check_backward_consistency_regression(self, x_data, gy_data, use_cudnn=True): # Regression test to two-dimensional max pooling layer. if len(self.dims) != 2: return ksize = self.ksize stride = self.stride pad = self.pad xp = cuda.get_array_module(x_data) # Backward computation for N-dimensional max pooling layer. x_nd = chainer.Variable(xp.array(x_data)) func_nd = functions.MaxPoolingND(self.ndim, ksize, stride=stride, pad=pad, use_cudnn=use_cudnn, cover_all=self.cover_all) y_nd = func_nd(x_nd) y_nd.grad = gy_data y_nd.backward() # Backward computation for two-dimensional max pooling layer. x_2d = chainer.Variable(xp.array(x_data)) func_2d = functions.MaxPooling2D(ksize, stride=stride, pad=pad, use_cudnn=use_cudnn, cover_all=self.cover_all) y_2d = func_2d(x_2d) y_2d.grad = gy_data y_2d.backward() # Test that the two result gradients are close enough. testing.assert_allclose(x_nd.grad, x_2d.grad)
def check_forward(self, x_data, mask): x = chainer.Variable(x_data) y = self.link(x, train=True, mask=mask, use_batchwise_mask=self.use_batchwise_mask) self.assertEqual(y.data.dtype, self.x_dtype) testing.assert_allclose(self.y_expect, y.data, **self.check_forward_options)
def check_forward(self, h_data, xs_data, ws_data, bs_data): h = chainer.Variable(h_data) xs = [chainer.Variable(x) for x in xs_data] ws = [[chainer.Variable(w) for w in ws] for ws in ws_data] bs = [[chainer.Variable(b) for b in bs] for bs in bs_data] hy, ys = functions.n_step_bigru( self.n_layers, self.dropout, h, ws, bs, xs) xs_next = self.xs e_hy = self.hx.copy() for layer in range(self.n_layers): # forward di = 0 xf = [] layer_idx = layer * 2 + di w = self.ws[layer_idx] b = self.bs[layer_idx] for ind in range(self.length): x = xs_next[ind] batch = x.shape[0] h_prev = e_hy[layer_idx, :batch] # GRU z = sigmoid(x.dot(w[1].T) + h_prev.dot(w[4].T) + b[1] + b[4]) r = sigmoid(x.dot(w[0].T) + h_prev.dot(w[3].T) + b[0] + b[3]) h_bar = numpy.tanh(x.dot(w[2].T) + r * ((h_prev).dot(w[5].T) + b[5]) + b[2]) e_h = (1 - z) * h_bar + z * h_prev e_hy[layer_idx, :batch] = e_h xf.append(e_h) # backward di = 1 xb = [] layer_idx = layer * 2 + di w = self.ws[layer_idx] b = self.bs[layer_idx] for ind in reversed(range(self.length)): x = xs_next[ind] batch = x.shape[0] h_prev = e_hy[layer_idx, :batch] # GRU z = sigmoid(x.dot(w[1].T) + h_prev.dot(w[4].T) + b[1] + b[4]) r = sigmoid(x.dot(w[0].T) + h_prev.dot(w[3].T) + b[0] + b[3]) h_bar = numpy.tanh(x.dot(w[2].T) + r * ((h_prev).dot(w[5].T) + b[5]) + b[2]) e_h = (1 - z) * h_bar + z * h_prev e_hy[layer_idx, :batch] = e_h xb.append(e_h) xb.reverse() xs_next = [numpy.concatenate([hfi, hbi], axis=1) for (hfi, hbi) in zip(xf, xb)] for k, (ysi, xsi) in enumerate(zip(ys, xs_next)): testing.assert_allclose(ysi.data, xsi, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
def check_argmax(self, cost_data, xs_data): cost = chainer.Variable(cost_data) xs = [chainer.Variable(x) for x in xs_data] s, path = functions.loss.crf1d.argmax_crf1d(cost, xs) self.assertIsInstance(s, chainer.Variable) self.assertIsInstance(path, list) self.assertEqual(s.shape, (self.batches[0],)) self.assertEqual(len(path), len(self.batches)) for b, p in zip(self.batches, path): self.assertEqual(p.shape, (b,)) best_paths = [numpy.empty((length,), numpy.int32) for length in self.batches] for b, length in enumerate(self.lengths): best_path = None best_score = 0 for ys in itertools.product(range(self.n_label), repeat=length): score = self._calc_score(b, ys) if best_path is None or best_score < score: best_path = ys best_score = score for i, p in enumerate(best_path): best_paths[i][b] = p testing.assert_allclose(s.data[b], best_score) for t in range(len(self.batches)): numpy.testing.assert_array_equal( cuda.to_cpu(path[t]), best_paths[t])
def check_forward(self, x_data, t_data, w_data, sampler): x = chainer.Variable(x_data) t = chainer.Variable(t_data) w = chainer.Variable(w_data) y = functions.negative_sampling( x, t, w, sampler, self.sample_size, reduce=self.reduce) self.assertEqual(y.shape, self.gy.shape) samples = cuda.to_cpu(y.creator.samples) loss = numpy.empty((len(self.x),), numpy.float32) for i in six.moves.range(len(self.x)): ix = self.x[i] it = self.t[i] if it == -1: loss[i] = 0 else: iw = self.w[samples[i]] f = iw.dot(ix) # first one is positive example f[0] *= -1 loss[i] = numpy.logaddexp(f, 0).sum() if self.reduce == 'sum': loss = loss.sum() testing.assert_allclose(y.data, loss)
def check_forward(self, c_prev1_data, c_prev2_data, x1_data, x2_data): c_prev1 = chainer.Variable(c_prev1_data) c_prev2 = chainer.Variable(c_prev2_data) x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) c, h = functions.slstm(c_prev1, c_prev2, x1, x2) self.assertEqual(c.data.dtype, self.dtype) self.assertEqual(h.data.dtype, self.dtype) # Compute expected out a1_in = self.x1[:, [0, 4]] i1_in = self.x1[:, [1, 5]] f1_in = self.x1[:, [2, 6]] o1_in = self.x1[:, [3, 7]] a2_in = self.x2[:, [0, 4]] i2_in = self.x2[:, [1, 5]] f2_in = self.x2[:, [2, 6]] o2_in = self.x2[:, [3, 7]] c_expect = _sigmoid(i1_in) * numpy.tanh(a1_in) + \ _sigmoid(i2_in) * numpy.tanh(a2_in) + \ _sigmoid(f1_in) * self.c_prev1 + \ _sigmoid(f2_in) * self.c_prev2 h_expect = _sigmoid(o1_in + o2_in) * numpy.tanh(c_expect) testing.assert_allclose( c_expect, c.data, **self.check_forward_options) testing.assert_allclose( h_expect, h.data, **self.check_forward_options)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.unpooling_2d(x, self.ksize, outsize=self.outsize, cover_all=self.cover_all) self.assertEqual(y.data.dtype, self.dtype) y_data = cuda.to_cpu(y.data) self.assertEqual(self.gy.shape, y_data.shape) for i in six.moves.range(self.N): for c in six.moves.range(self.n_channels): outsize = self.outsize or self.expected_outsize assert y_data.shape[2:] == outsize if outsize == (5, 2): expect = numpy.zeros(outsize, dtype=self.dtype) expect[:2, :] = self.x[i, c, 0, 0] expect[2:4, :] = self.x[i, c, 1, 0] elif outsize == (4, 2): expect = numpy.array([ [self.x[i, c, 0, 0], self.x[i, c, 0, 0]], [self.x[i, c, 0, 0], self.x[i, c, 0, 0]], [self.x[i, c, 1, 0], self.x[i, c, 1, 0]], [self.x[i, c, 1, 0], self.x[i, c, 1, 0]], ]) elif outsize == (3, 1): expect = numpy.array([ [self.x[i, c, 0, 0]], [self.x[i, c, 0, 0]], [self.x[i, c, 1, 0]], ]) else: raise ValueError('Unsupported outsize: {}'.format(outsize)) testing.assert_allclose(expect, y_data[i, c])
def check_forward(self, x_data, use_cudnn='always'): x = chainer.Variable(x_data) with chainer.using_config('use_cudnn', use_cudnn): y = functions.tanh(x) self.assertEqual(y.data.dtype, self.dtype) y_expect = functions.tanh(chainer.Variable(self.x)) testing.assert_allclose(y_expect.data, y.data)
def check_backward_consistency_regression(self, x_data, gy_data): # Regression test to two-dimensional unpooling layer. ndim = len(self.dims) if ndim != 2: return ksize = self.ksize stride = self.stride pad = self.pad xp = backend.get_array_module(x_data) # Backward computation for N-dimensional unpooling layer. x_nd = chainer.Variable(xp.array(x_data)) y_nd = functions.unpooling_nd( x_nd, ksize, stride=stride, pad=pad, cover_all=self.cover_all) y_nd.grad = gy_data y_nd.backward() # Backward computation for two-dimensional unpooling layer. x_2d = chainer.Variable(xp.array(x_data)) y_2d = functions.unpooling_2d( x_2d, ksize, stride=stride, pad=pad, cover_all=self.cover_all) y_2d.grad = gy_data y_2d.backward() # Test that the two result gradients are close enough. opt = self.check_backward_options testing.assert_allclose( x_nd.grad, x_2d.grad, atol=opt['atol'], rtol=opt['rtol'])
def check_forward(self, x_data, t_data, use_cudnn=True): x = chainer.Variable(x_data) t = chainer.Variable(t_data) loss = functions.softmax_cross_entropy( x, t, use_cudnn=use_cudnn, normalize=self.normalize, cache_score=self.cache_score) self.assertEqual(loss.data.shape, ()) self.assertEqual(loss.data.dtype, self.dtype) self.assertEqual(hasattr(loss.creator, 'y'), self.cache_score) loss_value = float(cuda.to_cpu(loss.data)) # Compute expected value loss_expect = 0.0 count = 0 x = numpy.rollaxis(self.x, 1, self.x.ndim).reshape( (self.t.size, self.x.shape[1])) t = self.t.ravel() for xi, ti in six.moves.zip(x, t): if ti == -1: continue log_z = numpy.ufunc.reduce(numpy.logaddexp, xi) loss_expect -= (xi - log_z)[ti] count += 1 if self.normalize: if count == 0: loss_expect = 0.0 else: loss_expect /= count else: loss_expect /= len(t_data) testing.assert_allclose( loss_expect, loss_value, **self.check_forward_options)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.flatten(x) self.assertEqual(y.shape, self.g_shape) self.assertEqual(y.dtype, self.dtype) testing.assert_allclose(self.x.flatten(), y.data)
def check_forward(self, x_data, t_data): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) loss = functions.hinge(x_val, t_val, self.norm, self.reduce) if self.reduce == 'mean': self.assertEqual(loss.data.shape, ()) else: self.assertEqual(loss.data.shape, self.x.shape) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = cuda.to_cpu(loss.data) # Compute expected value for i in six.moves.range(self.x.shape[0]): self.x[i, self.t[i]] *= -1 for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): self.x[i, j] = max(0, 1.0 + self.x[i, j]) if self.norm == 'L1': loss_expect = self.x elif self.norm == 'L2': loss_expect = self.x ** 2 if self.reduce == 'mean': loss_expect = numpy.sum(loss_expect) / self.x.shape[0] testing.assert_allclose(loss_expect, loss_value)
def check_col2im(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) shape = (2, 3, kh, kw, col_h, col_w) col = numpy.random.uniform(-1, 1, shape).astype(self.dtype) if gpu: col2im = conv.col2im_gpu col_data = cuda.to_gpu(col) else: col2im = conv.col2im_cpu col_data = col img = col2im(col_data, sy, sx, ph, pw, self.h, self.w, dy=dy, dx=dx) img = cuda.to_cpu(img) self.assertEqual(img.shape, (2, 3, self.h, self.w)) for y in moves.range(self.h): for x in moves.range(self.w): v = numpy.zeros((2, 3), self.dtype) for ky in moves.range(kh): for kx in moves.range(kw): oy = (y + ph - ky * dy) // sy ox = (x + pw - kx * dx) // sx if ((y + ph - ky * dy) % sy == 0 and (x + pw - kx * dx) % sx == 0 and 0 <= oy < col_h and 0 <= ox < col_w): v += col[:, :, ky, kx, oy, ox] testing.assert_allclose(img[:, :, y, x], v)
def check_backward_consistency_regression(self, x_data, gy_data, use_cudnn='always'): # Regression test to two-dimensional average pooling layer. if len(self.dims) != 2: return ksize = self.ksize stride = self.stride pad = self.pad xp = cuda.get_array_module(x_data) # Backward computation for N-dimensional average pooling layer. x_nd = chainer.Variable(xp.array(x_data)) with chainer.using_config('use_cudnn', use_cudnn): func_nd = functions.AveragePoolingND(self.ndim, ksize, stride=stride, pad=pad) y_nd = func_nd.apply((x_nd,))[0] y_nd.grad = gy_data y_nd.backward() # Backward computation for two-dimensional average pooling layer. x_2d = chainer.Variable(xp.array(x_data)) with chainer.using_config('use_cudnn', use_cudnn): func_2d = functions.AveragePooling2D(ksize, stride=stride, pad=pad, cover_all=False) y_2d = func_2d.apply((x_2d,))[0] y_2d.grad = gy_data y_2d.backward() # Test that the two result gradients are close enough. testing.assert_allclose(x_nd.grad, x_2d.grad)
def check_forward(self, op, op_xp, x_data): x = chainer.Variable(x_data) y = op(x) self.assertEqual(x.data.dtype, y.data.dtype) v = op_xp(x_data) testing.assert_allclose( v, y.data, atol=1e-7, rtol=1e-7)
def check_forward(self, x_data, use_cudnn='always'): dims = self.dims ksize = self.ksize stride = self.stride pad = self.pad x = chainer.Variable(x_data) with chainer.using_config('use_cudnn', use_cudnn): y = functions.average_pooling_nd( x, ksize, stride, pad, self.pad_value) self.assertEqual(y.data.dtype, self.dtype) y_data = cuda.to_cpu(y.data) def denom(idx): if self.pad_value is None: s = 1 for slic in idx: s *= slic.stop - slic.start return s else: return functools.reduce(operator.mul, ksize) self.assertEqual(self.gy.shape, y_data.shape) patches = pooling_nd_helper.pooling_patches( dims, ksize, stride, pad, False) for k in six.moves.range(2): for c in six.moves.range(3): x = self.x[k, c] expect = numpy.array( [x[idx].sum() / denom(idx) for idx in patches]) expect = expect.reshape(y_data.shape[2:]) testing.assert_allclose( expect, y_data[k, c], **self.check_forward_options)
def check_forward(self, x_data, t_data): x = chainer.Variable(x_data) t = chainer.Variable(t_data) self.link.sample_data = self.link.sampler.sample((self.batch_size, self.n_samples)) y = self.link(x, t) expect_y = numpy.empty((self.batch_size), dtype=numpy.float32) samples = cuda.to_cpu(self.link.sample_data) for b in range(self.batch_size): z = 0 for i in range(self.n_samples): w = samples[b, i] z += numpy.exp(self.w[w].dot(self.x[b])) y0 = self.w[self.t[b]].dot(self.x[b]) z += numpy.exp(y0) l = y0 - numpy.log(z) for i in range(self.n_samples): w = samples[b, i] l += numpy.log(1 - numpy.exp(self.w[w].dot(self.x[b])) / z) expect_y[b] = l loss = -numpy.sum(expect_y) / self.batch_size testing.assert_allclose(y.data, loss, atol=1.0e-4)
def check_forward( self, h_data, xs_data, ws_data, bs_data): h = _wrap_variable(h_data) xs = _wrap_variable(xs_data) ws = _wrap_variable(ws_data) bs = _wrap_variable(bs_data) hy, ys = functions.n_step_rnn( self.n_layers, self.dropout, h, ws, bs, xs, activation=self.activation) e_hy = self.hx.copy() for ind in range(self.length): x = self.xs[ind] batch = x.shape[0] for layer in range(self.n_layers): w = self.ws[layer] b = self.bs[layer] h_prev = e_hy[layer, :batch] if self.activation == 'tanh': e_h = numpy.tanh(x.dot(w[0].T) + h_prev.dot(w[1].T) + b[0] + b[1]) elif self.activation == 'relu': e_h = _relu(x.dot(w[0].T) + h_prev.dot(w[1].T) + b[0] + b[1]) e_hy[layer, :batch] = e_h x = e_h testing.assert_allclose( ys[ind].data, x, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
def check_forward(self, x_data, xp): x = chainer.Variable(x_data) y = F.sign(x) v = xp.sign(x_data) assert x.data.dtype == y.data.dtype testing.assert_allclose(v, y.data, atol=1e-7, rtol=1e-7)
def check_forward(self, x_data, axis, weights): x = chainer.Variable(x_data) if self.use_weights: w = chainer.Variable(weights) w_data = self.w else: w = None w_data = None y = functions.average(x, axis=axis, weights=w, keepdims=self.keepdims) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.average( self.x, axis=axis, weights=w_data) if self.keepdims: # numpy.average does not support keepdims if axis is None: axis = list(six.moves.range(x_data.ndim)) elif isinstance(axis, int): axis = axis, shape = list(x_data.shape) for i in six.moves.range(len(shape)): if i in axis or i - len(shape) in axis: shape[i] = 1 y_expect = y_expect.reshape(shape) if self.dtype == numpy.float16: options = {'atol': 1e-3, 'rtol': 1e-3} else: options = {} self.assertEqual(y_expect.shape, y.shape) testing.assert_allclose(y_expect, y.data, **options)
def check_im2col(self, kh, kw, sy, sx, ph, pw, dy, dx, gpu): if gpu: im2col = conv.im2col_gpu img = cuda.to_gpu(self.img) else: im2col = conv.im2col_cpu img = self.img col = im2col(img, kh, kw, sy, sx, ph, pw, dy=dy, dx=dx) col_h = conv.get_conv_outsize(self.h, kh, sy, ph, d=dy) col_w = conv.get_conv_outsize(self.w, kw, sx, pw, d=dx) self.assertEqual(col.shape, (2, 3, kh, kw, col_h, col_w)) col = cuda.to_cpu(col) for y in moves.range(col_h): for x in moves.range(col_w): for ky in moves.range(kh): for kx in moves.range(kw): oy = y * sy - ph + ky * dy ox = x * sx - pw + kx * dx if 0 <= oy < self.h and 0 <= ox < self.w: testing.assert_allclose( col[:, :, ky, kx, y, x], self.img[:, :, oy, ox]) else: testing.assert_allclose( col[:, :, ky, kx, y, x], numpy.zeros((2, 3), self.dtype))
def check_forward( self, h_data, xs_data, ws_data, bs_data): h = _wrap_variable(h_data) xs = _wrap_variable(xs_data) ws = _wrap_variable(ws_data) bs = _wrap_variable(bs_data) hy, ys = functions.n_step_birnn( self.n_layers, self.dropout, h, ws, bs, xs, activation=self.activation) xs_next = self.xs e_hy = self.hx.copy() for layer in range(self.n_layers): # forward di = 0 xf = [] layer_idx = layer * 2 + di w = self.ws[layer_idx] b = self.bs[layer_idx] for ind in range(self.length): x = xs_next[ind] batch = x.shape[0] h_prev = e_hy[layer_idx, :batch] if self.activation == 'tanh': e_h = numpy.tanh(x.dot(w[0].T) + h_prev.dot(w[1].T) + b[0] + b[1]) elif self.activation == 'relu': e_h = _relu(x.dot(w[0].T) + h_prev.dot(w[1].T) + b[0] + b[1]) e_hy[layer_idx, :batch] = e_h xf.append(e_h) # backward di = 1 xb = [] layer_idx = layer * 2 + di w = self.ws[layer_idx] b = self.bs[layer_idx] for ind in reversed(range(self.length)): x = xs_next[ind] batch = x.shape[0] h_prev = e_hy[layer_idx, :batch] if self.activation == 'tanh': e_h = numpy.tanh(x.dot(w[0].T) + h_prev.dot(w[1].T) + b[0] + b[1]) elif self.activation == 'relu': e_h = _relu(x.dot(w[0].T) + h_prev.dot(w[1].T) + b[0] + b[1]) e_hy[layer_idx, :batch] = e_h xb.append(e_h) xb.reverse() xs_next = [numpy.concatenate([hfi, hbi], axis=1) for (hfi, hbi) in zip(xf, xb)] for k, (ysi, xsi) in enumerate(zip(ys, xs_next)): testing.assert_allclose(ysi.data, xsi, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4)
def test_max_pooling_3d(self): (x, ksize) = self._get_data(3) testing.assert_allclose( functions.max_pooling_nd(x, ksize).data, functions.max_pooling_3d(x, ksize).data)
def test_initialize_cpu(self): testing.assert_allclose(self.initial_gamma, self.link.gamma.data) testing.assert_allclose(self.initial_beta, self.link.beta.data) testing.assert_allclose(self.initial_avg_mean, self.link.avg_mean) testing.assert_allclose(self.initial_avg_var, self.link.avg_var)
def check_forward(self, x_data): x = chainer.Variable(x_data) with chainer.using_config('train', not self.test): y = self.link(x) testing.assert_allclose(self.y_expected, y.data)
def check_forward(self, c_data, h_data, x_data): x = chainer.Variable(x_data) h1 = self.link(x) c1_expect, h1_expect = _peephole(self.link, c_data, h_data, x_data) testing.assert_allclose(h1.data, h1_expect) testing.assert_allclose(self.link.c.data, c1_expect) testing.assert_allclose(self.link.h.data, h1_expect) h2 = self.link(x) c2_expect, h2_expect = _peephole(self.link, c1_expect, h1_expect, x_data) testing.assert_allclose(h2.data, h2_expect) testing.assert_allclose(self.link.c.data, c2_expect) testing.assert_allclose(self.link.h.data, h2_expect)
def check_forward(self, x_data): y = functions.squeeze(x_data, axis=self.axis) expected = numpy.squeeze(self.x, axis=self.axis) testing.assert_allclose(y.data, expected, **self.check_forward_options)
def check_forward(self, x_data): x = variable.Variable(x_data) y = func(x) self.assertEqual(y.data.dtype, x_data.dtype) y_expected = func_expected(cuda.to_cpu(x_data), dtype=x_data.dtype) testing.assert_allclose(y_expected, y.data, **self.forward_options)
def test_initialize_cpu(self): self.link(numpy.zeros(self.shape, dtype='f')) testing.assert_allclose(self.initial_gamma, self.link.gamma.data) testing.assert_allclose(self.initial_beta, self.link.beta.data)
def check_identical(self, x): testing.assert_allclose(x, x, atol=0, rtol=0)
def test_conv3d(self): (x, W, b) = self._get_data(3) testing.assert_allclose( F.convolution_nd(x, W, b).data, F.convolution_3d(x, W, b).data)
def test_initialize_gpu(self): self.link.to_gpu() self.link(cuda.cupy.zeros(self.shape, dtype='f')) testing.assert_allclose(numpy.ones(self.size), self.link.gamma.data) testing.assert_allclose( numpy.zeros(self.size), self.link.beta.data)
def check_forward(self, x_data, axis=None): x = chainer.Variable(x_data) y = functions.logsumexp(x, axis=axis) self.assertEqual(y.data.dtype, self.dtype) y_expect = numpy.log(numpy.exp(self.x).sum(axis=axis)) testing.assert_allclose(y_expect, y.data, **self.check_forward_option)
def check_forward(self, op, op_xp, x_data): x = chainer.Variable(x_data) y = op(x) self.assertEqual(x.data.dtype, y.data.dtype) testing.assert_allclose(op_xp(x_data), y.data, atol=1e-7, rtol=1e-7)
def check_forward(self, op, op_np, x_data): x = chainer.Variable(x_data) y = op(x) testing.assert_allclose(op_np(self.x), y.data, atol=1e-7, rtol=1e-7)
def test_average_pooling_1d(self): (x, ksize) = self._get_data(1) testing.assert_allclose( functions.average_pooling_nd(x, ksize).array, functions.average_pooling_1d(x, ksize).array)
def test_col2im_consistency(self): col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 1, 1) h, w = self.x.shape[2:] im_cpu = conv.col2im_cpu(col, 2, 2, 1, 1, h, w) im_gpu = conv.col2im_gpu(cuda.to_gpu(col), 2, 2, 1, 1, h, w) testing.assert_allclose(im_cpu, im_gpu.get())
def _run_trainer(self, extension, expect, optimizer=None): if optimizer is None: optimizer = self.optimizer extension.initialize(self.trainer) actual = [] for _ in expect: self.trainer.updater.update() actual.append(optimizer.x) if self.trigger(self.trainer): extension(self.trainer) testing.assert_allclose(actual[0], expect[0]) testing.assert_allclose(actual[1], expect[1]) testing.assert_allclose(actual[2], expect[2]) testing.assert_allclose(actual[3], expect[3]) testing.assert_allclose(actual[4], expect[4]) testing.assert_allclose(actual[5], expect[5])
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis( self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def check_forward(self, op, x1_data, x2_data): x1 = chainer.Variable(x1_data) x2 = chainer.Variable(x2_data) y = op(x1, x2) testing.assert_allclose(op(self.x1, self.x2), y.data)
def check_forward(self, h_data, c_data, xs_data, ws_data, bs_data, volatile): h = chainer.Variable(h_data, volatile=volatile) c = chainer.Variable(c_data, volatile=volatile) xs = [chainer.Variable(x, volatile=volatile) for x in xs_data] ws = [[chainer.Variable(w, volatile=volatile) for w in ws] for ws in ws_data] bs = [[chainer.Variable(b, volatile=volatile) for b in bs] for bs in bs_data] hy, cy, ys = functions.n_step_bilstm(self.n_layers, self.dropout, h, c, ws, bs, xs, use_cudnn=self.use_cudnn) xs_next = self.xs e_hy = self.hx.copy() e_cy = self.cx.copy() for layer in range(self.n_layers): # forward di = 0 xf = [] layer_idx = layer * 2 + di w = self.ws[layer_idx] b = self.bs[layer_idx] for ind in range(self.length): x = xs_next[ind] batch = x.shape[0] h_prev = e_hy[layer_idx, :batch] c_prev = e_cy[layer_idx, :batch] i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4]) f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5]) c_bar = numpy.tanh( x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6]) o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7]) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) e_hy[layer_idx, :batch] = e_h e_cy[layer_idx, :batch] = e_c xf.append(e_h) # backward di = 1 xb = [] layer_idx = layer * 2 + di w = self.ws[layer_idx] b = self.bs[layer_idx] for ind in reversed(range(self.length)): x = xs_next[ind] batch = x.shape[0] h_prev = e_hy[layer_idx, :batch] c_prev = e_cy[layer_idx, :batch] i = sigmoid(x.dot(w[0].T) + h_prev.dot(w[4].T) + b[0] + b[4]) f = sigmoid(x.dot(w[1].T) + h_prev.dot(w[5].T) + b[1] + b[5]) c_bar = numpy.tanh( x.dot(w[2].T) + h_prev.dot(w[6].T) + b[2] + b[6]) o = sigmoid(x.dot(w[3].T) + h_prev.dot(w[7].T) + b[3] + b[7]) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) e_hy[layer_idx, :batch] = e_h e_cy[layer_idx, :batch] = e_c xb.append(e_h) xb.reverse() xs_next = [ numpy.concatenate([hfi, hbi], axis=1) for (hfi, hbi) in zip(xf, xb) ] for k, (ysi, xsi) in enumerate(zip(ys, xs_next)): testing.assert_allclose(ysi.data, xsi, rtol=1e-4, atol=1e-4) testing.assert_allclose(hy.data, e_hy, rtol=1e-4, atol=1e-4) testing.assert_allclose(cy.data, e_cy, rtol=1e-4, atol=1e-4)
def test_im2col_consistency(self): col_cpu = conv.im2col_cpu(self.x, 3, 3, 2, 2, 1, 1) col_gpu = conv.im2col_gpu(cuda.to_gpu(self.x), 3, 3, 2, 2, 1, 1) testing.assert_allclose(col_cpu, col_gpu.get(), atol=0, rtol=0)
def check_forward(self, h_data, c_data, xs_data): if self.hidden_none: h = c = None else: h = chainer.Variable(h_data) c = chainer.Variable(c_data) xs = [chainer.Variable(x) for x in xs_data] hy, cy, ys = self.rnn(h, c, xs) self.assertEqual(hy.data.shape, h_data.shape) self.assertEqual(cy.data.shape, c_data.shape) self.assertEqual(len(xs), len(ys)) for x, y in zip(xs, ys): self.assertEqual(len(x.data), len(y.data)) self.assertEqual(y.data.shape[1], self.out_size * 2) self.rnn.to_cpu() for batch, seq in enumerate(self.xs): for layer in range(self.n_layer): # forward di = 0 layer_idx = layer * 2 + di p = self.rnn[layer_idx] h_prev = self.h[layer_idx, batch] c_prev = self.c[layer_idx, batch] hs_f = [] for x in seq: i = sigmoid( x.dot(p.w0.data.T) + h_prev.dot(p.w4.data.T) + p.b0.data + p.b4.data) f = sigmoid( x.dot(p.w1.data.T) + h_prev.dot(p.w5.data.T) + p.b1.data + p.b5.data) c_bar = numpy.tanh( x.dot(p.w2.data.T) + h_prev.dot(p.w6.data.T) + p.b2.data + p.b6.data) o = sigmoid( x.dot(p.w3.data.T) + h_prev.dot(p.w7.data.T) + p.b3.data + p.b7.data) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) h_prev = e_h c_prev = e_c hs_f.append(e_h) testing.assert_allclose(hy.data[layer_idx, batch], h_prev) testing.assert_allclose(cy.data[layer_idx, batch], c_prev) # backward di = 1 layer_idx = layer * 2 + di p = self.rnn[layer_idx] h_prev = self.h[layer_idx, batch] c_prev = self.c[layer_idx, batch] hs_b = [] for x in reversed(seq): i = sigmoid( x.dot(p.w0.data.T) + h_prev.dot(p.w4.data.T) + p.b0.data + p.b4.data) f = sigmoid( x.dot(p.w1.data.T) + h_prev.dot(p.w5.data.T) + p.b1.data + p.b5.data) c_bar = numpy.tanh( x.dot(p.w2.data.T) + h_prev.dot(p.w6.data.T) + p.b2.data + p.b6.data) o = sigmoid( x.dot(p.w3.data.T) + h_prev.dot(p.w7.data.T) + p.b3.data + p.b7.data) e_c = (f * c_prev + i * c_bar) e_h = o * numpy.tanh(e_c) h_prev = e_h c_prev = e_c hs_b.append(e_h) testing.assert_allclose(hy.data[layer_idx, batch], h_prev) testing.assert_allclose(cy.data[layer_idx, batch], c_prev) hs_b.reverse() seq = [ numpy.concatenate([hfi, hbi], axis=0) for (hfi, hbi) in zip(hs_f, hs_b) ] for y, ey in zip(ys[batch].data, seq): testing.assert_allclose(y, ey)
def check_set_state(self, c, h): self.link.set_state(c, h) self.assertIsInstance(self.link.c.data, self.link.xp.ndarray) testing.assert_allclose(c.data, self.link.c.data) self.assertIsInstance(self.link.h.data, self.link.xp.ndarray) testing.assert_allclose(h.data, self.link.h.data)
def test_identity_gpu(self): eye = cuda.to_gpu(_make_eye(self.x.shape)) x = chainer.Variable(cuda.to_gpu(self.x)) y = functions.matmul(x, functions.inv(x)) testing.assert_allclose(y.data, eye, **self.check_forward_options)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.fliplr(x) testing.assert_allclose(y.data, numpy.fliplr(self.x))
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.inv(x) testing.assert_allclose(_inv(self.x), y.data, **self.check_forward_options)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.rollaxis(x, self.axis, self.start) expect = numpy.rollaxis(self.x, self.axis, self.start) testing.assert_allclose(y.data, expect)