def test_double_backward_chainerx_cpu(self): inputs = [backend.to_chx(_) for _ in [self.x, self.W, self.b]] grad_outputs = [backend.to_chx(_) for _ in [self.gy]] grad_grad_inputs = [backend.to_chx(_) for _ in [self.ggx, self.ggW, self.ggb]] self.check_double_backward( inputs, grad_outputs, grad_grad_inputs, use_cudnn='never')
def test_double_backward_chainerx_cuda_nobias(self): self.check_double_backward( backend.to_chx(cuda.to_gpu(self.x)), backend.to_chx(cuda.to_gpu(self.W)), None, backend.to_chx(cuda.to_gpu(self.gy)), backend.to_chx(cuda.to_gpu(self.ggx)), backend.to_chx(cuda.to_gpu(self.ggW)), None)
def to_chx(self): """Converts parameter variables and persistent values to ChainerX \ without any copy. This method does not handle non-registered attributes. If some of such attributes must be copied to ChainerX, the link implementation must override this method to do so. Returns: self """ # NOQA if not chainerx.is_available(): raise RuntimeError('ChainerX is not available.') xp = self._device.xp if xp is chainerx: return self d = self.__dict__ for name in self._params: d[name].to_chx() for name in self._persistent: if not numpy.isscalar(d[name]): d[name] = backend.to_chx(d[name]) self._device = ( backend.ChainerxDevice.from_fallback_device(self._device)) return self
def update_core_chainerx(self, param): """Updates the ChainerX parameter. This method can be overridden to implement custom update logic. The default implementation is to convert the parameter to a memory-shared NumPy/CuPy parameter and call the corresponding update method. See :meth:`update_core` for details. Args: param (~chainer.Variable): Variable to be updated. """ grad_array = param.grad backend_name = param.array.device.backend.name if backend_name not in ('native', 'cuda'): raise RuntimeError( 'Default implementation of Optimizer.update_core_chainerx is ' 'only provided for native or cuda backends (actual: {}). ' 'Override Optimizer.update_core_chainerx() to implement ' 'custom update logic.'.format(backend_name)) # Convert state arrays to NumPy/CuPy chainerx_state_arrays = {} for state_name, st in self.state.items(): st = self.state[state_name] if isinstance(st, chainerx.ndarray): fallback_arr = backend.from_chx(st) self.state[state_name] = fallback_arr chainerx_state_arrays[state_name] = (st, fallback_arr) # Create a temporary parameter with memory-shared NumPy/CuPy array # If the ChainerX parameter has a cached NumPy/CuPy copy, use the # cache and avoid redundant conversion. Else, create the cache here # and use it. if param._chainerx_fallback_array is None: param._chainerx_fallback_array = backend.from_chx( param.array) temp_param = variable.Variable._init_unchecked( param._chainerx_fallback_array, is_chainerx_array=False) if grad_array is not None: temp_param._set_grad_without_check( backend.from_chx(grad_array)) # Update self.update_core(temp_param) # Restore state arrays for state_name, (arr, fallback_arr) in chainerx_state_arrays.items(): cur_arr = self.state[state_name] if cur_arr is not fallback_arr: # The optimizer altered the reference of the state, instead of # updating it in-place. We need to convert the new state back # to ChainerX. arr = backend.to_chx(cur_arr) self.state[state_name] = arr
def check_mix_xp(self, xp): xp_x1 = xp.random.randn(2, 3).astype(numpy.float32) xp_x2 = xp.random.randn(2, 3).astype(numpy.float32) x2 = backend.to_chx(xp_x2) y, = self.SimpleFunctionNode(xp).apply((xp_x1, x2)) assert isinstance(y.array, chainerx.ndarray) chainerx.testing.assert_array_equal( backend.CpuDevice().send(xp_x1 * xp_x2), y.array)
def test_double_backward_chainerx_cuda(self): self.check_double_backward( backend.to_chx(cuda.to_gpu(self.x)), backend.to_chx(cuda.to_gpu(self.W)), backend.to_chx(cuda.to_gpu(self.b)), backend.to_chx(cuda.to_gpu(self.gy)), backend.to_chx(cuda.to_gpu(self.ggx)), backend.to_chx(cuda.to_gpu(self.ggW)), backend.to_chx(cuda.to_gpu(self.ggb)))
def test_to_chx(self, backend_config): arr = backend_config.get_array(numpy.ones((2, 3), numpy.float32)) arr_converted = backend.to_chx(arr) src_device = backend_config.device assert isinstance(arr_converted, chainerx.ndarray) if src_device.xp is chainerx: assert arr is arr_converted elif src_device.xp is cuda.cupy: assert arr.device.id == arr_converted.device.index self.check_equal_memory_shared(arr, arr_converted)
def backward(self, target_input_indexes, grad_outputs): retained_inputs = self.get_retained_inputs() inputs = [None] * len(self.inputs) in_data = [None] * len(self.inputs) for retained, i_in in six.moves.zip( retained_inputs, self._input_indexes_to_retain): inputs[i_in] = retained in_data[i_in] = None if retained is None else retained.array in_data = tuple(in_data) grad_out_data = tuple([None if grad is None else grad.data for grad in grad_outputs]) is_chainerx_fallback_mode = self._is_chainerx_fallback_mode if is_chainerx_fallback_mode: # Convert input and output gradients to numpy/cupy in_data = backend.from_chx(in_data) grad_out_data = backend.from_chx(grad_out_data) # Call Function.backward with cuda.get_device_from_array(*(in_data + grad_out_data)): if is_chainerx_fallback_mode: # Enable attribute fallback with function_node._chainerx_attribute_fallback( self._function, self.chainerx_device): gxs = self._function.backward(in_data, grad_out_data) else: gxs = self._function.backward(in_data, grad_out_data) # Check gradients for x, gx in six.moves.zip(self.inputs, gxs): if gx is not None: variable._check_grad_type(self, x, True, gx) # Convert input gradients back to ChainerX if is_chainerx_fallback_mode: gxs = backend.to_chx(gxs) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) if g.xp is not chainerx: g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)
def forward(self, inputs): self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = backend.get_array_module(x) if self.running_mean is None: self.running_mean = xp.zeros_like(gamma, dtype=x.dtype) self.running_var = xp.zeros_like(gamma, dtype=x.dtype) self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis) self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis) if all(x.shape[i] == 1 for i in self.axis): if 0 in self.axis: warnings.warn( 'A batch with no more than one sample has been given' ' to F.batch_normalization. F.batch_normalization' ' will always output a zero tensor for such batches.' ' This could be caused by incorrect configuration in' ' your code (such as running evaluation while' ' chainer.config.train=True),' ' but could also happen in the last batch of training' ' if non-repeating iterator is used.', UserWarning) else: warnings.warn( 'F.batch_normalization received a batch with single' ' dimensions along all axes that are used for aggregating' ' statistics. F.batch_normalization' ' will always output a zero tensor for such batches.', UserWarning) # TODO(niboshi): Refactor calculation of expander and axis into a # function and call it just before they are used. # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. expander = [None for _ in range(x.ndim)] for i in self.key_axis: expander[i] = slice(None) expander = tuple(expander) self.expander = expander self.mode = _BNMode(x, gamma, self.key_axis) self.use_cudnn = self.mode.can_use_cudnn(xp) self.use_ideep = self.mode.can_use_ideep() if self.use_ideep: # TODO(niboshi): Refactor iDeep part into a separate method expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] y, self.mean, self.var, self.inv_std = ( intel64.ideep.batchNormalization.Forward( intel64.ideep.array(x.astype(gamma.dtype, copy=False)), intel64.ideep.array(gamma), intel64.ideep.array(beta), None, None, self.eps )) y = y.astype(x.dtype, copy=False) m = x.size // gamma.size adjust = m / max(m - 1., 1.) # Update running_mean if isinstance(self.running_mean, intel64.ideep.mdarray): self.running_mean.inplace_axpby( self.decay, (1 - self.decay), self.mean) else: self.running_mean *= self.decay self.running_mean += self.mean * (1 - self.decay) # Update running_var if isinstance(self.running_var, intel64.ideep.mdarray): self.running_var.inplace_axpby( self.decay, (1 - self.decay), self.var * adjust) else: self.running_var *= self.decay self.running_var += self.var * adjust * (1 - self.decay) if expand_dim: y = numpy.squeeze(y, axis=(2, 3)) elif self.use_cudnn: # self.mean and self.inv_std are used as buffers to save # intermediate results computed during forward pass. These buffers # are used to speed-up backward pass. y, self.mean, self.inv_std = ( cudnn.batch_normalization_forward_training( x, gamma, beta, self.running_mean, self.running_var, None, None, self.eps, self.decay, self.mode.is_for_conv2d, self.mode.get_cudnn_mode(), chainer.is_debug())) else: # Generic CPU and GPU implementation gamma = gamma[expander] beta = beta[expander] self.mean = x.mean(axis=self.axis, dtype=gamma.dtype) var = x.var(axis=self.axis, dtype=gamma.dtype) if xp is numpy: self.inv_std = numpy.reciprocal(numpy.sqrt( var + self.eps, dtype=gamma.dtype)) else: self.inv_std = cuda.cupyx.rsqrt(var + self.eps, dtype=gamma.dtype) y = _apply_bn_fwd(xp, x, self.mean[expander], self.inv_std[expander], gamma, beta) # Update running statistics m = x.size // gamma.size adjust = m / max(m - 1., 1.) # unbiased estimation xp = backend.get_array_module(self.running_mean, self.running_var) if xp is chainerx: self.running_mean, self.running_var = backend.from_chx( (self.running_mean, self.running_var)) self.running_mean *= self.decay self.running_mean += (1 - self.decay) * self.mean self.running_var *= self.decay self.running_var += (1 - self.decay) * adjust * var if xp is chainerx: self.running_mean = backend.to_chx(self.running_mean) self.running_var = backend.to_chx(self.running_var) return y,
def update_core_chainerx(self, param): """Updates the ChainerX parameter. This method can be overridden to implement custom update logic. The default implementation is to convert the parameter to a memory-shared NumPy/CuPy parameter and call the corresponding update method. See :meth:`update_core` for details. Args: param (~chainer.Variable): Variable to be updated. """ grad_array = param.grad backend_name = param.array.device.backend.name if backend_name == 'native': update_core = self.update_core_cpu elif backend_name == 'cuda': update_core = self.update_core_gpu else: raise RuntimeError( 'Default implementation of Optimizer.update_core_chainerx is ' 'only provided for native or cuda backends (actual: {}). ' 'Override Optimizer.update_core_chainerx() to implement ' 'custom update logic.'.format(backend_name)) # Convert state arrays to NumPy/CuPy chainerx_state_arrays = {} for state_name, st in self.state.items(): st = self.state[state_name] if isinstance(st, chainerx.ndarray): fallback_arr = backend.from_chx(st) self.state[state_name] = fallback_arr chainerx_state_arrays[state_name] = (st, fallback_arr) # Create a temporary parameter with memory-shared NumPy/CuPy array # If the ChainerX parameter has a cached NumPy/CuPy copy, use the # cache and avoid redundant conversion. Else, create the cache here # and use it. if param._chainerx_fallback_array is None: param._chainerx_fallback_array = backend.from_chx(param.array) temp_param = variable.Variable._init_unchecked( param._chainerx_fallback_array, is_chainerx_array=False) if grad_array is not None: temp_param._set_grad_without_check(backend.from_chx(grad_array)) # Update update_core(temp_param) # Restore state arrays for state_name, (arr, fallback_arr) in chainerx_state_arrays.items(): cur_arr = self.state[state_name] if cur_arr is not fallback_arr: # The optimizer altered the reference of the state, instead of # updating it in-place. We need to convert the new state back # to ChainerX. arr = backend.to_chx(cur_arr) self.state[state_name] = arr
def test_double_backward_chainerx_native_nobias(self): self.check_double_backward( backend.to_chx(self.x), backend.to_chx(self.W), None, backend.to_chx(self.gy), backend.to_chx(self.ggx), backend.to_chx(self.ggW), None)
def test_double_backward_chainerx_native(self): self.check_double_backward( backend.to_chx(self.x), backend.to_chx(self.W), backend.to_chx(self.b), backend.to_chx(self.gy), backend.to_chx(self.ggx), backend.to_chx(self.ggW), backend.to_chx(self.ggb))
def test_forward_chainerx_native(self): self.check_forward( backend.to_chx(self.x), backend.to_chx(self.t))
def test_forward_chainerx_cuda(self): self.check_forward( backend.to_chx(cuda.to_gpu(self.x)), backend.to_chx(cuda.to_gpu(self.t)))
def test_backward_chainerx_native(self): self.check_backward(backend.to_chx(self.x), backend.to_chx(self.W), backend.to_chx(self.b), backend.to_chx(self.gy))
def test_backward_chainerx_native_nobias(self): self.check_backward(backend.to_chx(self.x), backend.to_chx(self.W), None, backend.to_chx(self.gy))
def test_forward_chainerx_cuda_nobias(self): self.check_forward_consistency( lambda xs: backend.to_chx(cuda.to_gpu(xs)), nobias=True)
def visit_array(self, arr): assert isinstance(arr, chainer.get_array_types()) return backend.to_chx(arr)
def test_backward_chainerx_cpu(self): self.check_backward( backend.to_chx(self.x), backend.to_chx(self.W), backend.to_chx(self.b), backend.to_chx(self.gy))
def forward(self, inputs): self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = backend.get_array_module(x) if self.running_mean is None: self.running_mean = xp.zeros_like(gamma, dtype=x.dtype) self.running_var = xp.zeros_like(gamma, dtype=x.dtype) self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis) self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis) if all(x.shape[i] == 1 for i in self.axis): if 0 in self.axis: warnings.warn( 'A batch with no more than one sample has been given' ' to F.batch_normalization. F.batch_normalization' ' will always output a zero tensor for such batches.' ' This could be caused by incorrect configuration in' ' your code (such as running evaluation while' ' chainer.config.train=True),' ' but could also happen in the last batch of training' ' if non-repeating iterator is used.', UserWarning) else: warnings.warn( 'F.batch_normalization received a batch with single' ' dimensions along all axes that are used for aggregating' ' statistics. F.batch_normalization' ' will always output a zero tensor for such batches.', UserWarning) # TODO(niboshi): Refactor calculation of expander and axis into a # function and call it just before they are used. # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. expander = [None for _ in range(x.ndim)] for i in self.key_axis: expander[i] = slice(None) expander = tuple(expander) self.expander = expander self.mode = _BNMode(x, gamma, self.key_axis) self.use_cudnn = self.mode.can_use_cudnn(xp) self.use_ideep = self.mode.can_use_ideep() if self.use_ideep: # TODO(niboshi): Refactor iDeep part into a separate method expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] y, self.mean, self.var, self.inv_std = ( intel64.ideep.batchNormalization.Forward( intel64.ideep.array(x.astype(gamma.dtype, copy=False)), intel64.ideep.array(gamma), intel64.ideep.array(beta), None, None, self.eps)) y = y.astype(x.dtype, copy=False) m = x.size // gamma.size adjust = m / max(m - 1., 1.) # Update running_mean if isinstance(self.running_mean, intel64.ideep.mdarray): self.running_mean.inplace_axpby(self.decay, (1 - self.decay), self.mean) else: self.running_mean *= self.decay self.running_mean += self.mean * (1 - self.decay) # Update running_var if isinstance(self.running_var, intel64.ideep.mdarray): self.running_var.inplace_axpby(self.decay, (1 - self.decay), self.var * adjust) else: self.running_var *= self.decay self.running_var += self.var * adjust * (1 - self.decay) if expand_dim: y = numpy.squeeze(y, axis=(2, 3)) elif self.use_cudnn: # self.mean and self.inv_std are used as buffers to save # intermediate results computed during forward pass. These buffers # are used to speed-up backward pass. y, self.mean, self.inv_std = ( cudnn.batch_normalization_forward_training( x, gamma, beta, self.running_mean, self.running_var, None, None, self.eps, self.decay, self.mode.is_for_conv2d, self.mode.get_cudnn_mode(), chainer.is_debug())) else: # Generic CPU and GPU implementation interm_dtype = numpy.promote_types(x.dtype, gamma.dtype) gamma = gamma[expander].astype(interm_dtype, copy=False) beta = beta[expander].astype(interm_dtype, copy=False) self.mean = x.mean(axis=self.axis, dtype=interm_dtype) var = x.var(axis=self.axis, dtype=interm_dtype) if xp is numpy: self.inv_std = numpy.reciprocal( numpy.sqrt(var + self.eps, dtype=interm_dtype)) else: self.inv_std = cuda.cupyx.rsqrt(var + self.eps, dtype=interm_dtype) y = _apply_bn_fwd(xp, x, self.mean[expander], self.inv_std[expander], gamma, beta) # Update running statistics m = x.size // gamma.size adjust = m / max(m - 1., 1.) # unbiased estimation xp = backend.get_array_module(self.running_mean, self.running_var) if xp is chainerx: self.running_mean, self.running_var = backend.from_chx( (self.running_mean, self.running_var)) self.running_mean *= self.decay self.running_mean += (1 - self.decay) * self.mean self.running_var *= self.decay self.running_var += (1 - self.decay) * adjust * var if xp is chainerx: self.running_mean = backend.to_chx(self.running_mean) self.running_var = backend.to_chx(self.running_var) return y,
def test_backward_chainerx_gpu(self): self.check_backward( backend.to_chx(self.x).to_device('cuda'), backend.to_chx(self.W).to_device('cuda'), backend.to_chx(self.b).to_device('cuda'), backend.to_chx(self.gy).to_device('cuda'))
def test_double_backward_chainerx(self): self.check_double_backward( backend.to_chx(self.xs), backend.to_chx(self.g), backend.to_chx(self.ggs))