def check_double_backward( self, inputs, grad_outputs, grad_grad_inputs, use_cudnn='always'): if not self.c_contiguous: inputs = array._as_noncontiguous_array(inputs) grad_outputs = array._as_noncontiguous_array(grad_outputs) grad_grad_inputs = array._as_noncontiguous_array(grad_grad_inputs) x_data, W_data, b_data = inputs y_grad, = grad_outputs x_grad_grad, W_grad_grad, b_grad_grad = grad_grad_inputs args = (x_data, W_data) grad_grads = (x_grad_grad, W_grad_grad) if b_data is not None: args += (b_data,) grad_grads += (b_grad_grad,) def f(*args): return F.deconvolution_nd( *args, stride=self.stride, pad=self.pad, outsize=self.outsize, dilate=self.dilate, groups=self.groups) with chainer.using_config('use_cudnn', use_cudnn): with chainer.using_config('autotune', self.autotune): gradient_check.check_double_backward( f, args, y_grad, grad_grads, **self.check_double_backward_options)
def check_double_backward(self, x_data, g_data, gg_data): def f(x): return functions.swapaxes(x, self.axis1, self.axis2) gradient_check.check_double_backward( f, x_data, g_data, gg_data, dtype=numpy.float64, atol=5e-2, rtol=5e-3)
def check_double_backward(self, x_data, y_grad, ggx_data): def f(x): y = functions.get_item(x, self.slices) return y * y gradient_check.check_double_backward( f, (x_data,), y_grad, ggx_data, dtype='d')
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): return functions.space2depth(x, self.r) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.check_double_backward_options)
def test_full_double_backward_cpu(self): gradient_check.check_double_backward( functions.bilinear, (self.e1, self.e2, self.W, self.V1, self.V2, self.b), self.gy, (self.gge1, self.gge2, self.ggW, self.ggV1, self.ggV2, self.ggb), **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): options = {} if self.dtype == numpy.float16: options = {'atol': 5e-3, 'rtol': 5e-2} gradient_check.check_double_backward( lambda x: x ** 2, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **options)
def check_double_backward(self, x0_data, x1_data, gy_data, ggx0_data, ggx1_data): gradient_check.check_double_backward(functions.mean_squared_error, (x0_data, x1_data), gy_data, (ggx0_data, ggx1_data), eps=1e-2)
def check_double_backward(self, x_data, axis, y_grad, x_grad_grad): def f(x): return functions.cumsum(x, axis) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, x1_data, x2_data, y_grad, x1_grad_grad, x2_grad_grad): gradient_check.check_double_backward(F.arctan2, (x1_data, x2_data), y_grad, (x1_grad_grad, x2_grad_grad), dtype='d', **self.double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): gradient_check.check_double_backward(self.func, x_data, y_grad, x_grad_grad, dtype='d', **self.double_backward_options)
def check_double_backward(self, x_data, axis, y_grad, x_grad_grad): def f(x): return functions.flip(x, axis) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, **self.check_double_backward_options)
def check_double_backward(self, x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad): args = x_data, W_data grads = x_grad_grad, W_grad_grad if b_data is not None: args += b_data, grads += b_grad_grad, if self.use_batchwise_mask: mask_shape = (x_data.shape[0], ) + W_data.shape else: mask_shape = W_data.shape xp = cuda.get_array_module(x_data) mask = xp.random.rand(*mask_shape) >= self.ratio def f(x, W, b=None): return functions.simplified_dropconnect(x, W, b, self.ratio, self.train, mask, self.use_batchwise_mask) gradient_check.check_double_backward( f, args, y_grad, grads, eps=1e-2, **self.check_double_backward_options)
def check_double_backward(self, x_data, gy_data, ggx_data): def f(x): y = functions.pad(x, pad_width=self.pad_width, mode=self.mode) return y * y gradient_check.check_double_backward(f, x_data, gy_data, ggx_data, **self.check_backward_options)
def do_check(): inputs = self._generate_inputs() outputs = self._forward_expected(inputs) grad_outputs = self._generate_grad_outputs(outputs) grad_grad_inputs = self._generate_grad_grad_inputs(inputs) # Drop ggx corresponding to non-differentiable inputs. grad_grad_inputs = [ ggx for ggx in grad_grad_inputs if ggx.dtype.kind == 'f'] inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) inputs = self._to_noncontiguous_as_needed(inputs) grad_outputs = self._to_noncontiguous_as_needed(grad_outputs) grad_grad_inputs = ( self._to_noncontiguous_as_needed(grad_grad_inputs)) with backend_config: with FunctionTestError.raise_if_fail( 'double backward is not implemented correctly'): gradient_check.check_double_backward( f, inputs, grad_outputs, grad_grad_inputs, dtype=numpy.float64, detect_nondifferentiable=self.dodge_nondifferentiable, **self.check_double_backward_options)
def check_double_backward(self, x_data, axis, y_grad, x_grad_grad): def f(x): return functions.normalize(x, eps=self.eps, axis=axis) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, **self.check_double_backward_options)
def check_double_backward(self, x_data, W_data, gy_data, ggW_data): def f(W): return chainer.functions.embed_id( x_data, W, self.ignore_label) gradient_check.check_double_backward( f, W_data, gy_data, ggW_data, **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): return distributions.multivariate_normal._triangular_inv( x, lower=self.lower) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, **self.double_backward_options)
def check_double_backward( self, inputs, grad_outputs, grad_grad_inputs, backend_config): inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) if not self.c_contiguous: inputs = array._as_noncontiguous_array(inputs) grad_outputs = array._as_noncontiguous_array(grad_outputs) grad_grad_inputs = array._as_noncontiguous_array(grad_grad_inputs) x_data, W_data, b_data = inputs y_grad, = grad_outputs x_grad_grad, W_grad_grad, b_grad_grad = grad_grad_inputs args = (x_data, W_data) grad_grads = (x_grad_grad, W_grad_grad) if b_data is not None: args = args + (b_data,) grad_grads = grad_grads + (b_grad_grad,) def f(*args): return F.deconvolution_2d( *args, stride=self.stride, pad=self.pad, outsize=self.outsize, dilate=self.dilate, groups=self.groups) with backend_config: gradient_check.check_double_backward( f, args, y_grad, grad_grads, **self.check_double_backward_options)
def test_double_backward(self, backend_config): # TODO(niboshi): Support it if backend_config.use_chainerx and self.dtype == numpy.float16: raise unittest.SkipTest('ChainerX does not support float16') inputs = self.inputs grad_outputs = self.grad_outputs grad_grad_inputs = self.grad_grad_inputs inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) if not self.c_contiguous: inputs = testing.array._as_noncontiguous_array(inputs) grad_outputs = testing.array._as_noncontiguous_array(grad_outputs) grad_grad_inputs = ( testing.array._as_noncontiguous_array(grad_grad_inputs)) x, = inputs gy, = grad_outputs ggx, = grad_grad_inputs with backend_config: gradient_check.check_double_backward( functions.relu, x, gy, ggx, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): return functions.diagonal(x, *self.args) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, atol=1e-3, rtol=1e-2, dtype=numpy.float64)
def check_double_backward(self, x1, x2, g_data, ggx1, ggx2): x_data = (x1, x2) ggx_data = (ggx1, ggx2) gradient_check.check_double_backward( functions.squared_difference, x_data, g_data, ggx_data, dtype=numpy.float64, atol=1e-2, rtol=2e-2)
def check_double_backward(self, logit_data, x_data, y_grad, x_grad_grad): def f(logit): return distributions.bernoulli._bernoulli_log_prob( logit, x_data) gradient_check.check_double_backward( f, logit_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.backward_options)
def check_double_backward(self, inputs, grad_outputs, grad_grad_inputs, backend_config): if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) grad_outputs = cuda.to_gpu(grad_outputs) grad_grad_inputs = cuda.to_gpu(grad_grad_inputs) if not self.c_contiguous: inputs = _to_noncontiguous(inputs) grad_outputs = _to_noncontiguous(grad_outputs) grad_grad_inputs = _to_noncontiguous(grad_grad_inputs) def f(x): x = functions.relu(x) return x * x x, = inputs gy, = grad_outputs ggx, = grad_grad_inputs with backend_config: gradient_check.check_double_backward( f, x, gy, ggx, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad, use_cudnn='always'): if not self.c_contiguous: (x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad) = testing.array._as_noncontiguous_array( (x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad)) args = (x_data, W_data) grad_grads = (x_grad_grad, W_grad_grad) if b_data is not None: args += (b_data,) grad_grads += (b_grad_grad,) def f(*args): return F.convolution_nd( *args, stride=self.stride, pad=self.pad, cover_all=self.cover_all, dilate=self.dilate, groups=self.groups) with chainer.using_config('use_cudnn', use_cudnn): with chainer.using_config('autotune', self.autotune): gradient_check.check_double_backward( f, args, y_grad, grad_grads, dtype='d', atol=5e-3, rtol=5e-2)
def test_double_backward(self, backend_config): # TODO(niboshi): Support it if (backend_config.use_chainerx and numpy.float16 in (self.x_dtype, self.W_dtype)): raise unittest.SkipTest('ChainerX does not support float16') inputs = self.inputs grad_outputs = self.grad_outputs grad_grad_inputs = self.grad_grad_inputs if self.nobias: inputs = inputs[:-1] grad_grad_inputs = grad_grad_inputs[:-1] inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) if not self.c_contiguous: inputs = _to_noncontiguous(inputs) grad_outputs = _to_noncontiguous(grad_outputs) grad_grad_inputs = _to_noncontiguous(grad_grad_inputs) with backend_config: gradient_check.check_double_backward( self.forward, inputs, grad_outputs, grad_grad_inputs, **self.check_double_backward_options)
def check_double_backward(self, op, x_data, y_grad, y_grad_grad): def f(x): x = op(x) return x * x gradient_check.check_double_backward( f, x_data, y_grad, y_grad_grad, dtype=numpy.float64, atol=1e-7, rtol=1e-7)
def test_partial_double_backward_gpu(self): gradient_check.check_double_backward( functions.bilinear, (cuda.to_gpu(self.e1), cuda.to_gpu(self.e2), cuda.to_gpu(self.W)), cuda.to_gpu(self.gy), (cuda.to_gpu( self.gge1), cuda.to_gpu(self.gge2), cuda.to_gpu(self.ggW)), **self.check_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): return functions.leaky_relu(x, self.slope) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, x_data, W_data, gy_data, ggW_data): def f(W): y = chainer.functions.embed_id(x_data, W, self.ignore_label) return y * y gradient_check.check_double_backward( f, W_data, gy_data, ggW_data, **self.check_double_backward_options)
def check_double_backward(self, x0_data, x1_data, y_grad, gx0_grad, gx1_grad): gradient_check.check_double_backward(functions.absolute_error, (x0_data, x1_data), y_grad, (gx0_grad, gx1_grad), eps=1e-2, **self.double_backward_options)
def check_double_backward(self, x_data, g_data, gg_data): def f(x): y = functions.rollaxis(x, self.axis, self.start) return y * y gradient_check.check_double_backward( f, x_data, g_data, gg_data, **self.check_double_backward_options)
def test_batch_double_backward_gpu(self): x_data = cuda.to_gpu(self.x) y_grad = cuda.to_gpu(self.gy) x_grad_grad = cuda.to_gpu(self.ggx) gradient_check.check_double_backward( self.det, x_data, y_grad, x_grad_grad, **self.check_double_backward_options)
def check_double_backward(self, data, grads, gg): if len(data) == 1: return gradient_check.check_double_backward( functions.broadcast, data, grads, gg, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, inputs, grad_outputs, grad_grad_inputs, backend_config): inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) if not self.c_contiguous: inputs = array._as_noncontiguous_array(inputs) grad_outputs = array._as_noncontiguous_array(grad_outputs) grad_grad_inputs = array._as_noncontiguous_array(grad_grad_inputs) x_data, W_data, b_data = inputs y_grad, = grad_outputs x_grad_grad, W_grad_grad, b_grad_grad = grad_grad_inputs args = (x_data, W_data) grad_grads = (x_grad_grad, W_grad_grad) if b_data is not None: args = args + (b_data, ) grad_grads = grad_grads + (b_grad_grad, ) def f(*args): return F.deconvolution_2d(*args, stride=self.stride, pad=self.pad, outsize=self.outsize, dilate=self.dilate, groups=self.groups) with backend_config: gradient_check.check_double_backward( f, args, y_grad, grad_grads, **self.check_double_backward_options)
def check_double_backward( self, inputs, grad_outputs, grad_grad_inputs, backend_config): # TODO(sonots): Support it if backend_config.use_chainerx and self.dtype == numpy.float16: raise unittest.SkipTest('ChainerX does not support float16') # TODO(sonots): Cleanup to use testing.backend.get_array after # chainerx.asfortranarray is implemented. if (backend_config.use_cuda or (backend_config.use_chainerx and backend_config.chainerx_device.startswith('cuda:'))): inputs = cuda.to_gpu(inputs) grad_outputs = cuda.to_gpu(grad_outputs) grad_grad_inputs = cuda.to_gpu(grad_grad_inputs) if not self.c_contiguous: inputs = _to_fcontiguous(inputs) grad_outputs = _to_fcontiguous(grad_outputs) grad_grad_inputs = _to_fcontiguous(grad_grad_inputs) if backend_config.use_chainerx: inputs = chainer.backend.to_chainerx(inputs) grad_outputs = chainer.backend.to_chainerx(grad_outputs) grad_grad_inputs = chainer.backend.to_chainerx(grad_grad_inputs) def f(x): return functions.max_pooling_2d( x, 3, stride=2, pad=1, cover_all=self.cover_all) with backend_config: gradient_check.check_double_backward( f, inputs, grad_outputs, grad_grad_inputs, dtype='d', **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, ggx_data): def f(x): return functions.get_item(x, self.slices) gradient_check.check_double_backward( f, (x_data,), y_grad, ggx_data, dtype='d', **self.check_double_backward_options)
def check_double_backward(self, inputs, grad_outputs, grad_grad_inputs, backend_config): if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) grad_outputs = cuda.to_gpu(grad_outputs) grad_grad_inputs = cuda.to_gpu(grad_grad_inputs) if not self.c_contiguous: inputs = _to_fcontiguous(inputs) grad_outputs = _to_fcontiguous(grad_outputs) grad_grad_inputs = _to_fcontiguous(grad_grad_inputs) def f(x): return functions.max_pooling_2d(x, 3, stride=2, pad=1, cover_all=self.cover_all) with backend_config: gradient_check.check_double_backward( f, inputs, grad_outputs, grad_grad_inputs, dtype='d', **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, gx_grad): def f(x): x_min, x_max = self.x_min_max return functions.clip(x, x_min, x_max) gradient_check.check_double_backward( f, x_data, y_grad, gx_grad, dtype=numpy.float64, atol=1e-3)
def check_double_backward(self, x_data, g_data, gg_data): def f(x): return functions.rollaxis(x, self.axis, self.start) gradient_check.check_double_backward( f, x_data, g_data, gg_data, dtype='d', **self.check_double_backward_options)
def check_double_backward(self, x_data, y_data, z_grad, x_grad_grad, y_grad_grad): if self.right_const: def op(x): return operator.matmul(x, y_data.astype(x.dtype)) data = x_data, grad_grad = x_grad_grad, elif self.left_const: def op(y): return operator.matmul(x_data.astype(y.dtype), y) data = y_data, grad_grad = y_grad_grad, else: op = operator.matmul data = x_data, y_data grad_grad = x_grad_grad, y_grad_grad if self.dtype == numpy.float16: options = {'atol': 1e-3, 'rtol': 1e-2} else: options = {'atol': 1e-4, 'rtol': 1e-4} gradient_check.check_double_backward(op, data, z_grad, grad_grad, dtype=numpy.float64, **options)
def check_double_backward( self, x_data, y_data, z_grad, x_grad_grad, y_grad_grad): if self.right_const: def op(x): z = operator.matmul(x, y_data) return z * z data = x_data, grad_grad = x_grad_grad, elif self.left_const: def op(y): z = operator.matmul(x_data, y) return z * z data = y_data, grad_grad = y_grad_grad, else: def op(x, y): z = operator.matmul(x, y) return z * z data = x_data, y_data grad_grad = x_grad_grad, y_grad_grad if self.dtype == numpy.float16: options = {'atol': 1e-3, 'rtol': 1e-2} else: options = {'atol': 1e-4, 'rtol': 1e-4} gradient_check.check_double_backward( op, data, z_grad, grad_grad, dtype=numpy.float64, **options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): y = functions.tile(x, self.reps) return y * y gradient_check.check_double_backward(f, x_data, y_grad, x_grad_grad, **self.check_backward_options)
def test_partial_double_backward_gpu(self): gradient_check.check_double_backward( functions.bilinear, (cuda.to_gpu(self.e1), cuda.to_gpu(self.e2), cuda.to_gpu(self.W)), cuda.to_gpu(self.gy), (cuda.to_gpu(self.gge1), cuda.to_gpu(self.gge2), cuda.to_gpu(self.ggW)), **self.check_backward_options)
def check_double_backward(self, inputs, grad_outputs, grad_grad_inputs, backend_config): if self.nobias: inputs = inputs[:-1] grad_grad_inputs = grad_grad_inputs[:-1] inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) if not self.c_contiguous: inputs = testing.array._as_noncontiguous_array(inputs) grad_outputs = testing.array._as_noncontiguous_array(grad_outputs) grad_grad_inputs = testing.array._as_noncontiguous_array( grad_grad_inputs) def f(*args): return F.convolution_2d(*args, stride=self.stride, pad=self.pad, cover_all=self.cover_all, dilate=self.dilate, groups=self.groups) with backend_config: gradient_check.check_double_backward(f, inputs, grad_outputs, grad_grad_inputs, dtype='d', atol=5e-3, rtol=5e-2)
def do_check(): inputs = self._generate_inputs() outputs = self._forward_expected(inputs) grad_outputs = self._generate_grad_outputs(outputs) grad_grad_inputs = self._generate_grad_grad_inputs(inputs) # Drop ggx corresponding to non-differentiable inputs. # Generated `grad_grad_inputs`, the upstream gradients for the # double backward test, may contain `None` for omitted gradients. # These must be propagated to the gradient check. grad_grad_inputs = [ ggx for ggx in grad_grad_inputs if (ggx is None or ggx.dtype.kind == 'f') ] inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) inputs = self._to_noncontiguous_as_needed(inputs) grad_outputs = self._to_noncontiguous_as_needed(grad_outputs) grad_grad_inputs = ( self._to_noncontiguous_as_needed(grad_grad_inputs)) with backend_config: with FunctionTestError.raise_if_fail( 'double backward is not implemented correctly'): gradient_check.check_double_backward( f, inputs, grad_outputs, grad_grad_inputs, dtype=self.numerical_grad_dtype, detect_nondifferentiable=self.dodge_nondifferentiable, **self.check_double_backward_options)
def check_double_backward(self, xs_data, y_grad, xs_grad_grad): def func(*xs): return functions.dstack(xs) gradient_check.check_double_backward( func, xs_data, y_grad, xs_grad_grad, dtype='d', **self.check_double_backward_options)
def do_check(): inputs = self._generate_inputs() outputs = self._forward_expected(inputs) grad_outputs = self._generate_grad_outputs(outputs) grad_grad_inputs = self._generate_grad_grad_inputs(inputs) inputs = backend_config.get_array(inputs) grad_outputs = backend_config.get_array(grad_outputs) grad_grad_inputs = backend_config.get_array(grad_grad_inputs) inputs = self._to_noncontiguous_as_needed(inputs) grad_outputs = self._to_noncontiguous_as_needed(grad_outputs) grad_grad_inputs = ( self._to_noncontiguous_as_needed(grad_grad_inputs)) with backend_config: with FunctionTestError.raise_if_fail( 'double backward is not implemented correctly'): gradient_check.check_double_backward( f, inputs, grad_outputs, grad_grad_inputs, dtype=numpy.float64, detect_nondifferentiable=self.dodge_nondifferentiable, **self.check_double_backward_options)
def check_double_backward(self, rx, ix, rg, ig, grx, gix): def f(rx, ix): ry, iy = chainer.functions.fft((rx, ix)) return ry * ry, iy * iy gradient_check.check_double_backward( f, (rx, ix), (rg, ig), (grx, gix), dtype='d', atol=1e-2, rtol=1e-3)
def check_double_backward(self, x_data, y_grad, x_grad_grad, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn): gradient_check.check_double_backward( functions.sigmoid, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): y = functions.tile(x, self.reps) return y * y gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, **self.check_backward_options)
def check_double_backward(self, args, y_grad, x_grad_grad): def func(*args_): return functions.layer_normalization(*args_, eps=self.eps) gradient_check.check_double_backward( func, args, y_grad, x_grad_grad, eps=1e-2, **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): def f(x): return functions.elu(x, alpha=self.alpha) gradient_check.check_double_backward( f, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.check_backward_options)
def check_double_backward( self, x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad, b_grad_grad): args = x_data, W_data grads = x_grad_grad, W_grad_grad if b_data is not None: args += b_data, grads += b_grad_grad, if self.use_batchwise_mask: mask_shape = (x_data.shape[0],) + W_data.shape else: mask_shape = W_data.shape xp = cuda.get_array_module(x_data) mask = xp.random.rand(*mask_shape) >= self.ratio def f(x, W, b=None): return functions.simplified_dropconnect( x, W, b, self.ratio, self.train, mask, self.use_batchwise_mask) gradient_check.check_double_backward( f, args, y_grad, grads, eps=1e-2, **self.check_double_backward_options)
def check_double_backward(self, x_data, gy_data, ggx_data): gradient_check.check_double_backward( chainer.functions.tanh, x_data, gy_data, ggx_data, dtype=numpy.float64, **self.check_double_backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad): gradient_check.check_double_backward( distributions.utils._modified_xlogx, x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.backward_options)
def check_double_backward(self, x_data, y_grad, x_grad_grad, axis=None): gradient_check.check_double_backward( lambda x: functions.logsumexp(x, axis), x_data, y_grad, x_grad_grad, dtype=numpy.float64, **self.check_double_backward_option)