def update_core_chainerx(self, param): """Updates the ChainerX parameter. This method can be overridden to implement custom update logic. The default implementation is to convert the parameter to a memory-shared NumPy/CuPy parameter and call the corresponding update method. See :meth:`update_core` for details. Args: param (~chainer.Variable): Variable to be updated. """ grad_array = param.grad backend_name = param.array.device.backend.name if backend_name not in ('native', 'cuda'): raise RuntimeError( 'Default implementation of Optimizer.update_core_chainerx is ' 'only provided for native or cuda backends (actual: {}). ' 'Override Optimizer.update_core_chainerx() to implement ' 'custom update logic.'.format(backend_name)) # Convert state arrays to NumPy/CuPy chainerx_state_arrays = {} for state_name, st in self.state.items(): st = self.state[state_name] if isinstance(st, chainerx.ndarray): fallback_arr = backend.from_chx(st) self.state[state_name] = fallback_arr chainerx_state_arrays[state_name] = (st, fallback_arr) # Create a temporary parameter with memory-shared NumPy/CuPy array # If the ChainerX parameter has a cached NumPy/CuPy copy, use the # cache and avoid redundant conversion. Else, create the cache here # and use it. if param._chainerx_fallback_array is None: param._chainerx_fallback_array = backend.from_chx( param.array) temp_param = variable.Variable._init_unchecked( param._chainerx_fallback_array, is_chainerx_array=False) if grad_array is not None: temp_param._set_grad_without_check( backend.from_chx(grad_array)) # Update self.update_core(temp_param) # Restore state arrays for state_name, (arr, fallback_arr) in chainerx_state_arrays.items(): cur_arr = self.state[state_name] if cur_arr is not fallback_arr: # The optimizer altered the reference of the state, instead of # updating it in-place. We need to convert the new state back # to ChainerX. arr = backend.to_chx(cur_arr) self.state[state_name] = arr
def update_core_chainerx(self, param): """Updates the ChainerX parameter. This method can be overridden to implement custom update logic. The default implementation is to convert the parameter to a memory-shared NumPy/CuPy parameter and call the corresponding update method. See :meth:`update_core` for details. Args: param (~chainer.Variable): Variable to be updated. """ grad_array = param.grad backend_name = param.array.device.backend.name if backend_name not in ('native', 'cuda'): raise RuntimeError( 'Default implementation of Optimizer.update_core_chainerx is ' 'only provided for native or cuda backends (actual: {}). ' 'Override Optimizer.update_core_chainerx() to implement ' 'custom update logic.'.format(backend_name)) # Convert state arrays to NumPy/CuPy chainerx_state_arrays = {} for state_name, st in self.state.items(): st = self.state[state_name] if isinstance(st, chainerx.ndarray): fallback_arr = backend.from_chx(st) self.state[state_name] = fallback_arr chainerx_state_arrays[state_name] = (st, fallback_arr) # Create a temporary parameter with memory-shared NumPy/CuPy array # If the ChainerX parameter has a cached NumPy/CuPy copy, use the # cache and avoid redundant conversion. Else, create the cache here # and use it. if param._chainerx_fallback_array is None: param._chainerx_fallback_array = backend.from_chx(param.array) temp_param = variable.Variable._init_unchecked( param._chainerx_fallback_array, is_chainerx_array=False) if grad_array is not None: temp_param._set_grad_without_check(backend.from_chx(grad_array)) # Update self.update_core(temp_param) # Restore state arrays for state_name, (arr, fallback_arr) in chainerx_state_arrays.items(): cur_arr = self.state[state_name] if cur_arr is not fallback_arr: # The optimizer altered the reference of the state, instead of # updating it in-place. We need to convert the new state back # to ChainerX. arr = backend.to_chx(cur_arr) self.state[state_name] = arr
def backward(self, target_input_indexes, grad_outputs): retained_inputs = self.get_retained_inputs() inputs = [None] * len(self.inputs) in_data = [None] * len(self.inputs) for retained, i_in in six.moves.zip(retained_inputs, self._input_indexes_to_retain): inputs[i_in] = retained in_data[i_in] = None if retained is None else retained.array in_data = tuple(in_data) grad_out_data = tuple( [None if grad is None else grad.array for grad in grad_outputs]) is_chainerx_fallback_mode = self._is_chainerx_fallback_mode if is_chainerx_fallback_mode: # Convert input and output gradients to numpy/cupy in_data = backend.from_chx(in_data) grad_out_data = backend.from_chx(grad_out_data) # Call Function.backward with chainer.using_device( backend.get_device_from_array(*(in_data + grad_out_data))): if is_chainerx_fallback_mode: # Enable attribute fallback with function_node._chainerx_attribute_fallback( self._function, self.chainerx_device): gxs = self._function.backward(in_data, grad_out_data) else: gxs = self._function.backward(in_data, grad_out_data) # Check gradients for x, gx in six.moves.zip(self.inputs, gxs): if gx is not None: variable._check_grad_type(self, x, True, gx) # Convert input gradients back to ChainerX if is_chainerx_fallback_mode: gxs = backend.to_chx(gxs) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) if g.xp is not chainerx: g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)
def backward(self, target_input_indexes, grad_outputs): retained_inputs = self.get_retained_inputs() inputs = [None] * len(self.inputs) in_data = [None] * len(self.inputs) for retained, i_in in six.moves.zip( retained_inputs, self._input_indexes_to_retain): inputs[i_in] = retained in_data[i_in] = None if retained is None else retained.array in_data = tuple(in_data) grad_out_data = tuple([None if grad is None else grad.data for grad in grad_outputs]) is_chainerx_fallback_mode = self._is_chainerx_fallback_mode if is_chainerx_fallback_mode: # Convert input and output gradients to numpy/cupy in_data = backend.from_chx(in_data) grad_out_data = backend.from_chx(grad_out_data) # Call Function.backward with cuda.get_device_from_array(*(in_data + grad_out_data)): if is_chainerx_fallback_mode: # Enable attribute fallback with function_node._chainerx_attribute_fallback( self._function, self.chainerx_device): gxs = self._function.backward(in_data, grad_out_data) else: gxs = self._function.backward(in_data, grad_out_data) # Check gradients for x, gx in six.moves.zip(self.inputs, gxs): if gx is not None: variable._check_grad_type(self, x, True, gx) # Convert input gradients back to ChainerX if is_chainerx_fallback_mode: gxs = backend.to_chx(gxs) ret = [] for i in target_input_indexes: if gxs[i] is None: g = None else: # Intentionally not passing requires_grad=False so that # backprop routines can raise an error when a further backprop # is attempted against this gradient variable. g = variable.Variable(gxs[i]) if g.xp is not chainerx: g.node._old_style_grad_generator = self._function.label ret.append(g) return tuple(ret)
def forward_cpu(self, x): func = self.func ndim = func.ndim ksize = func.ksize stride = func.stride pad = func.pad cover_all = func.cover_all indexes = backend.from_chx(func.indexes) col = conv_nd.im2col_nd_cpu(x[0], ksize, stride, pad, pval=-float('inf'), cover_all=cover_all) n, c = col.shape[:2] mid = (len(col.shape) - 2) // 2 + 2 ksize = col.shape[2:mid] outs = col.shape[mid:] # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N) ksize_total = functools.reduce(mul, ksize) col_shape = (n, c) + (ksize_total, ) + outs col = col.reshape(col_shape) # (n, c, out_1, ..., out_N, k_1 * .. * k_N) col_indexes = (0, 1) + tuple(six.moves.range(3, 3 + ndim)) + (2, ) col = col.transpose(col_indexes) col = col.reshape(-1, ksize_total) indexes = indexes.ravel() col = col[numpy.arange(len(indexes)), indexes] return col.reshape((n, c) + outs),
def forward_gpu(self, gy): func = self.func if func.is_cudnn_used: return func.backward_cudnn(gy) ndim = func.ndim ksize = func.ksize stride = func.stride pad = func.pad in_shape = func._in_shape in_dtype = func._in_dtype indexes = backend.from_chx(func.indexes) n, c = in_shape[:2] dims = in_shape[2:] ys = gy[0].shape[2:] gx = cuda.cupy.empty(in_shape, in_dtype) in_params, out_params, operation, name = \ max_pooling_nd_kernel.MaxPoolingNDKernelBackward.generate(ndim) cuda.elementwise(in_params, out_params, operation, name)(gy[0].reduced_view(), indexes.reduced_view(), *(dims + ys + ksize + stride + pad + (gx, ))) return gx,
def forward_cpu(self, inputs): class_weight = backend.from_chx(self.class_weight) self.retain_inputs((0, 1)) x, t = inputs if chainer.is_debug(): _check_input_values(x, t, self.ignore_label) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = numpy.exp(log_y) if class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= _broadcast_to(class_weight.reshape(shape), x.shape) log_yd = numpy.rollaxis(log_y, 1) log_yd = log_yd.reshape(len(log_yd), -1) t_valid = t != self.ignore_label t = t * t_valid log_p = log_yd[t.ravel(), numpy.arange(t.size)] log_p *= t_valid.ravel() if self.reduce == 'mean': # deal with the case where the SoftmaxCrossEntropy is # unpickled from the old version if self.normalize: count = t_valid.sum() else: count = len(x) self._coeff = 1.0 / max(count, 1) y = log_p.sum(keepdims=True) * (-self._coeff) return y.reshape(()), else: return -log_p.reshape(t.shape),
def _chainerx_apply_fallback_preprocess(self, in_data, inputs): chainerx_in_data = in_data in_data = [] device = None for data, x in six.moves.zip(chainerx_in_data, inputs): if data is None: fallback_data = None else: # Use the cached fallback arrays as inputs if they exist. x_is_variable = isinstance(x, variable.Variable) if x_is_variable and x._chainerx_fallback_array is not None: fallback_data = x._chainerx_fallback_array if device is None: device = x.device else: fallback_data = backend.from_chx(data) if device is None: device = backend.ChainerxDevice(data.device) # Update the fallback cache if possible. if x_is_variable: x._chainerx_fallback_array = fallback_data in_data.append(fallback_data) in_data = tuple(in_data) return chainerx_in_data, in_data, device
def forward_gpu(self, gys): if self._used_cudnn: x, = self.apoolnd.get_retained_inputs() return self.apoolnd.backward_gpu((x.data, ), gys) is_pad_value_none = self.pad_value is None gy, = gys n, c = self._in_shape[:2] idims = self._in_shape[2:] odims = gy.shape[2:] if is_pad_value_none: coeff = self.apoolnd.coeff # This conversion from chainerx to cupy exists here for # double backward of chainerx on cuda. coeff = backend.from_chx(coeff) gy *= coeff gx = cuda.cupy.empty(self._in_shape, self._in_dtype) in_params, out_params, operation, name = \ average_pooling_nd_kernel.AveragePoolingNDKernelBackward.generate( self.ndim) cuda.elementwise(in_params, out_params, operation, name)( gy.reduced_view(), *(idims + odims + self.ksize + self.stride + self.pad + (gx, ))) if not is_pad_value_none: gx /= functools.reduce(operator.mul, self.ksize) return gx,
def _make_samples(self, t): size = int(t.shape[0]) # first one is the positive, and others are sampled negatives samples = self.sampler((size, self.sample_size + 1)) samples = backend.from_chx(samples) samples[:, 0] = t return samples
def forward_gpu(self, gys): if self._used_cudnn: x, = self.apoolnd.get_retained_inputs() return self.apoolnd.backward_gpu((x.data,), gys) is_pad_value_none = self.pad_value is None gy, = gys n, c = self._in_shape[:2] idims = self._in_shape[2:] odims = gy.shape[2:] if is_pad_value_none: coeff = self.apoolnd.coeff # This conversion from chainerx to cupy exists here for # double backward of chainerx on cuda. coeff = backend.from_chx(coeff) gy *= coeff gx = cuda.cupy.empty(self._in_shape, self._in_dtype) in_params, out_params, operation, name = \ average_pooling_nd_kernel.AveragePoolingNDKernelBackward.generate( self.ndim) cuda.elementwise(in_params, out_params, operation, name)( gy.reduced_view(), *(idims + odims + self.ksize + self.stride + self.pad + (gx,))) if not is_pad_value_none: gx /= functools.reduce(operator.mul, self.ksize) return gx,
def forward_gpu(self, inputs): t = backend.from_chx(self.t) # Workaround for ChainerX. gx = cuda.cupy.zeros(self.shape, self.dtype) gx = cuda.elementwise('S t, T gloss', 'raw T gx', 'int ind[] = {i, t}; gx[ind] = gloss;', 'getitem_bwd')(t, inputs[0], gx) return gx,
def forward_gpu(self, inputs): class_weight = backend.from_chx(self.class_weight) self.retain_inputs((0, 1)) cupy = cuda.cupy x, t = inputs if chainer.is_debug(): _check_input_values(x, t, self.ignore_label) if x.size == 0: y = cupy.zeros(t.shape, dtype=x.dtype) if self.cache_score: self.y = y if self.reduce == 'mean': return y.sum(), else: return y, log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = cupy.exp(log_y) if class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= cupy.broadcast_to(class_weight.reshape(shape), x.shape) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) if self.reduce == 'mean': # Reduction is performed in a promoted dtype reduc_dtype = _reduction_dtype(x.dtype) if self.normalize: count = (t != self.ignore_label).sum(dtype=reduc_dtype) count = cupy.maximum(1, count) coeff = 1. / count else: coeff = cupy.array(1. / max(1, len(t)), dtype=reduc_dtype) self._coeff = coeff ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw U coeff, ' 'S ignore_label', 'U out', 't == ignore_label ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = static_cast<U>(a * -coeff[0])', '0', 'crossent_fwd')(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.ignore_label) ret = ret.astype(log_y.dtype, copy=False) else: ret = cuda.elementwise( 'S t, raw T log_y, int32 n_channel, T ignore', 'T out', ''' if (t == ignore) { out = 0; } else { out = -log_y[i * n_channel + t]; } ''', 'softmax_crossent_no_reduce_fwd')(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label) ret = ret.reshape(t.shape) return ret,
def getattribute(self, name): value = sup.__getattribute__(name) if isinstance(value, chainerx.ndarray): fallback_arr = fallback_array_cache.get(name) if fallback_arr is None: fallback_arr = backend.from_chx(value) fallback_array_cache[name] = fallback_arr return fallback_arr return value
def output_data(self): """A tuple of the retained output arrays. It has the same length as the :attr:`outputs`. Elements that are not retained are set to ``None``. """ if self.node._is_chainerx_fallback_mode: return backend.from_chx(self.node.output_data) return self.node.output_data
def output_data(self): """A tuple of the retained output arrays. It has the same length as the :attr:`outputs`. Elements that are not retained are set to ``None``. """ if self.node._is_chainerx_fallback_mode: return backend.from_chx(self.node.output_data) return self.node.output_data
def forward(self, axis, gamma, x, x_layout, xp, expander, beta, eps, decay, running_mean, running_var): interm_dtype = numpy.promote_types(x.dtype, gamma.dtype) gamma = gamma[expander].astype(interm_dtype, copy=False) beta = beta[expander].astype(interm_dtype, copy=False) mean, var = self.get_mean_and_var(axis, gamma, x, xp, interm_dtype) if xp is numpy: inv_std = numpy.reciprocal(numpy.sqrt( var + eps, dtype=interm_dtype)) else: inv_std = cuda.cupyx.rsqrt( var + eps, dtype=interm_dtype) y = _apply_bn_fwd(xp, x, mean[expander], inv_std[expander], gamma, beta) # Update running statistics if given if running_mean is not None: m = x.size // gamma.size adjust = m / max(m - 1., 1.) # unbiased estimation xp = backend.get_array_module( running_mean, running_var) if xp is chainerx: running_mean, running_var = backend.from_chx( (running_mean, running_var)) if xp is numpy: running_mean *= decay running_mean += (1 - decay) * mean running_var *= decay running_var += (1 - decay) * adjust * var else: # running_mean and running_var has the same type as x # while mean and var is interm_dtype which is promoted from x cuda.elementwise( 'T mean, T var, U decay, U adjust', 'U r_mean, U r_var', ''' r_mean = r_mean * decay + mean * (1 - decay); r_var = r_var * decay + var * (1 - decay) * adjust; ''', 'update_mean_var')(mean, var, decay, adjust, running_mean, running_var) if xp is chainerx: running_mean = backend.to_chx(running_mean) running_var = backend.to_chx(running_var) y_layout = x_layout return y, y_layout, running_mean, running_var, mean, var, inv_std, None
def forward_gpu(self, inputs): t = backend.from_chx(self.t) # Workaround for ChainerX. gx = cuda.cupy.zeros(self.shape, self.dtype) gx = cuda.elementwise( 'S t, T gloss', 'raw T gx', 'int ind[] = {i, t}; gx[ind] = gloss;', 'getitem_bwd' )(t, inputs[0], gx) return gx,
def from_chx(self): """Converts parameter variables and persistent values from ChainerX \ to NumPy/CuPy devices without any copy.""" d = self.__dict__ for name in self._params: d[name].from_chx() for name in self._persistent: if not numpy.isscalar(d[name]): d[name] = backend.from_chx(d[name]) if isinstance(self._device, backend.ChainerxDevice): self._device = self._device.fallback_device return self
def from_chx(self): """Converts parameter variables and persistent values from ChainerX \ to NumPy/CuPy devices without any copy.""" d = self.__dict__ for name in self._params: d[name].from_chx() for name in self._persistent: if not numpy.isscalar(d[name]): d[name] = backend.from_chx(d[name]) if isinstance(self._device, backend.ChainerxDevice): self._device = self._device.fallback_device return self
def test_from_chx(self, backend_config): arr = backend_config.get_array(numpy.ones((2, 3), numpy.float32)) arr_converted = backend.from_chx(arr) src_device = backend_config.device if src_device.xp is chainerx: dst_xp = src_device.fallback_device.xp assert isinstance(arr_converted, dst_xp.ndarray) if dst_xp is cuda.cupy: assert arr_converted.device.id == src_device.device.index else: assert arr is arr_converted with backend_config: self.check_equal_memory_shared(arr, arr_converted)
def test_from_chx(self, backend_config): arr = backend_config.get_array(numpy.ones((2, 3), numpy.float32)) arr_converted = backend.from_chx(arr) src_device = backend_config.device if src_device.xp is chainerx: dst_xp = src_device.fallback_device.xp assert isinstance(arr_converted, dst_xp.ndarray) if dst_xp is cuda.cupy: assert arr_converted.device.id == src_device.device.index else: assert arr is arr_converted with backend_config: self.check_equal_memory_shared(arr, arr_converted)
def forward(self, xs): a = xs[0] b = xs[1] y = a.copy() xp = backend.get_array_module(a) slices = tuple([ backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s for s in self.slices]) if y[slices].shape != b.shape: raise ValueError( 'Chainer does not support automatic broadcasting ' 'of variables.') if xp is numpy: numpy.add.at(y, slices, b), else: cuda.cupyx.scatter_add(y, slices, b), return y,
def forward(self, xs): a = xs[0] b = xs[1] y = a.copy() xp = backend.get_array_module(a) slices = tuple([ backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s for s in self.slices ]) if y[slices].shape != b.shape: raise ValueError('Chainer does not support automatic broadcasting ' 'of variables.') if xp is numpy: numpy.add.at(y, slices, b), else: cuda.cupyx.scatter_add(y, slices, b), return y,
def forward_cpu(self, inputs): class_weight = backend.from_chx(self.class_weight) self.retain_inputs((0, 1)) x, t = inputs if x.ndim == t.ndim and x.shape == t.shape: self.soft_target = True if chainer.is_debug() and not self.soft_target: _check_input_values(x, t, self.ignore_label) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = numpy.exp(log_y) if self.soft_target: return self._soft_target_loss(numpy, x, t, log_y) if class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= _broadcast_to(class_weight.reshape(shape), x.shape) log_yd = numpy.rollaxis(log_y, 1) log_yd = log_yd.reshape(len(log_yd), -1) t_valid = t != self.ignore_label t = t * t_valid log_p = log_yd[t.ravel(), numpy.arange(t.size)] log_p *= t_valid.ravel() if self.reduce == 'mean': if self.normalize: count = t_valid.sum() else: count = len(x) self._coeff = 1.0 / max(count, 1) # Perform reduction in a promoted dtype reduc_dtype = _reduction_dtype(x.dtype) y = log_p.sum(keepdims=True, dtype=reduc_dtype) y = y * (-self._coeff) y = y.astype(x.dtype, copy=False) return y.reshape(()), else: return -log_p.reshape(t.shape),
def forward(self, inputs): slices = tuple([ backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s for s in self.slices ]) gy, = inputs xp = backend.get_array_module(*inputs) gx = xp.zeros(self._in_shape, gy.dtype) if xp is numpy: try: numpy.add.at(gx, slices, gy) except IndexError: done = False # In numpy<1.13, 0-dim boolean index is not supported in # numpy.add.at and it's supported for 0-dim arr in # arr.__getitem__. if not _numpy_supports_0d_bool_index and len(slices) == 1: idx = numpy.asanyarray(slices[0]) if idx.dtype == numpy.dtype(bool): # Convert the array and the mask to 1-dim. # numpy.add.at with them is supported in older numpy. numpy.add.at(gx[None], idx[None], gy) done = True if not done: msg = ''' GetItem does not support backward for this slices. The slices argument is not supported by numpy.add.at, while it is supported by numpy.ndarray.__getitem__. Please report this error to the issue tracker with the stack trace, the information of your environment, and your script: https://github.com/chainer/chainer/issues/new. ''' raise IndexError(msg) else: gx.scatter_add(slices, inputs[0]) return gx,
def forward_cpu(self, inputs): class_weight = backend.from_chx(self.class_weight) self.retain_inputs((0, 1)) x, t = inputs if chainer.is_debug(): _check_input_values(x, t, self.ignore_label) log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = numpy.exp(log_y) if class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= _broadcast_to(class_weight.reshape(shape), x.shape) log_yd = numpy.rollaxis(log_y, 1) log_yd = log_yd.reshape(len(log_yd), -1) t_valid = t != self.ignore_label t = t * t_valid log_p = log_yd[t.ravel(), numpy.arange(t.size)] log_p *= t_valid.ravel() if self.reduce == 'mean': # deal with the case where the SoftmaxCrossEntropy is # unpickled from the old version if self.normalize: count = t_valid.sum() else: count = len(x) self._coeff = 1.0 / max(count, 1) # Perform reduction in a promoted dtype reduc_dtype = _reduction_dtype(x.dtype) y = log_p.sum(keepdims=True, dtype=reduc_dtype) y = y * (-self._coeff) y = y.astype(x.dtype, copy=False) return y.reshape(()), else: return -log_p.reshape(t.shape),
def forward(self, inputs): slices = tuple([ backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s for s in self.slices]) gy, = inputs xp = backend.get_array_module(*inputs) gx = xp.zeros(self._in_shape, gy.dtype) if xp is numpy: try: numpy.add.at(gx, slices, gy) except IndexError: done = False # In numpy<1.13, 0-dim boolean index is not supported in # numpy.add.at and it's supported for 0-dim arr in # arr.__getitem__. if not _numpy_supports_0d_bool_index and len(slices) == 1: idx = numpy.asanyarray(slices[0]) if idx.dtype == numpy.dtype(bool): # Convert the array and the mask to 1-dim. # numpy.add.at with them is supported in older numpy. numpy.add.at(gx[None], idx[None], gy) done = True if not done: msg = ''' GetItem does not support backward for this slices. The slices argument is not supported by numpy.add.at, while it is supported by numpy.ndarray.__getitem__. Please report this error to the issue tracker with the stack trace, the information of your environment, and your script: https://github.com/chainer/chainer/issues/new. ''' raise IndexError(msg) else: gx.scatter_add(slices, inputs[0]) return gx,
def forward_gpu(self, inputs): func = self.func if func.is_cudnn_used: x = func.get_retained_inputs()[0].array return self._forward_gpu_compute_indexes_again((x, inputs[0])) ndim = func.ndim ksize = func.ksize stride = func.stride pad = func.pad cover_all = func.cover_all indexes = backend.from_chx(func.indexes) x, = inputs in_shape = x.shape in_dtype = x.dtype n, c = in_shape[:2] dims = in_shape[2:] ys = tuple( conv_nd.get_conv_outsize(d, k, s, p, cover_all) for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x.dtype) cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes in_params, out_params, operation, name = cls.generate(ndim) cuda.elementwise(in_params, out_params, operation, name)( x.reduced_view(), *(dims + ys + ksize + stride + pad + (indexes.reduced_view(), y))) self._in_shape = in_shape self._in_dtype = in_dtype return y,
def forward_gpu(self, gys): func = self.func if func.is_cudnn_used: return func.backward_cudnn(gys) ndim = func.ndim pad_value = func.pad_value ksize = func.ksize stride = func.stride pad = func.pad in_shape = func._in_shape in_dtype = func._in_dtype is_pad_value_none = pad_value is None gy, = gys n, c = in_shape[:2] idims = in_shape[2:] odims = gy.shape[2:] if is_pad_value_none: # This conversion from chainerx to cupy exists here for # double backward of chainerx on cuda. coeff = backend.from_chx(func.coeff) gy *= coeff gx = cuda.cupy.empty(in_shape, in_dtype) in_params, out_params, operation, name = \ average_pooling_nd_kernel.AveragePoolingNDKernelBackward.generate( ndim) cuda.elementwise(in_params, out_params, operation, name)(gy.reduced_view(), *(idims + odims + ksize + stride + pad + (gx, ))) if not is_pad_value_none: gx /= functools.reduce(operator.mul, ksize) return gx,
def forward_cpu(self, inputs): t = backend.from_chx(self.t) # Workaround for ChainerX. gx = numpy.zeros(self.shape, self.dtype) gx[six.moves.range(self.t.size), t] = inputs[0] return gx,
def forward(self, xs): slices = tuple([ backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s for s in self.slices ]) return utils.force_array(xs[0][slices]),
def visit_array(self, arr): assert isinstance(arr, chainer.get_array_types()) return backend.from_chx(arr)
def visit_array(self, arr): assert isinstance(arr, chainer.get_array_types()) return backend.from_chx(arr)
def forward(self, inputs): self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = backend.get_array_module(x) if self.running_mean is None: self.running_mean = xp.zeros_like(gamma, dtype=x.dtype) self.running_var = xp.zeros_like(gamma, dtype=x.dtype) self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis) self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis) if all(x.shape[i] == 1 for i in self.axis): if 0 in self.axis: warnings.warn( 'A batch with no more than one sample has been given' ' to F.batch_normalization. F.batch_normalization' ' will always output a zero tensor for such batches.' ' This could be caused by incorrect configuration in' ' your code (such as running evaluation while' ' chainer.config.train=True),' ' but could also happen in the last batch of training' ' if non-repeating iterator is used.', UserWarning) else: warnings.warn( 'F.batch_normalization received a batch with single' ' dimensions along all axes that are used for aggregating' ' statistics. F.batch_normalization' ' will always output a zero tensor for such batches.', UserWarning) # TODO(niboshi): Refactor calculation of expander and axis into a # function and call it just before they are used. # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. expander = [None for _ in range(x.ndim)] for i in self.key_axis: expander[i] = slice(None) expander = tuple(expander) self.expander = expander self.mode = _BNMode(x, gamma, self.key_axis) self.use_cudnn = self.mode.can_use_cudnn(xp) self.use_ideep = self.mode.can_use_ideep() if self.use_ideep: # TODO(niboshi): Refactor iDeep part into a separate method expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] y, self.mean, self.var, self.inv_std = ( intel64.ideep.batchNormalization.Forward( intel64.ideep.array(x.astype(gamma.dtype, copy=False)), intel64.ideep.array(gamma), intel64.ideep.array(beta), None, None, self.eps )) y = y.astype(x.dtype, copy=False) m = x.size // gamma.size adjust = m / max(m - 1., 1.) # Update running_mean if isinstance(self.running_mean, intel64.ideep.mdarray): self.running_mean.inplace_axpby( self.decay, (1 - self.decay), self.mean) else: self.running_mean *= self.decay self.running_mean += self.mean * (1 - self.decay) # Update running_var if isinstance(self.running_var, intel64.ideep.mdarray): self.running_var.inplace_axpby( self.decay, (1 - self.decay), self.var * adjust) else: self.running_var *= self.decay self.running_var += self.var * adjust * (1 - self.decay) if expand_dim: y = numpy.squeeze(y, axis=(2, 3)) elif self.use_cudnn: # self.mean and self.inv_std are used as buffers to save # intermediate results computed during forward pass. These buffers # are used to speed-up backward pass. y, self.mean, self.inv_std = ( cudnn.batch_normalization_forward_training( x, gamma, beta, self.running_mean, self.running_var, None, None, self.eps, self.decay, self.mode.is_for_conv2d, self.mode.get_cudnn_mode(), chainer.is_debug())) else: # Generic CPU and GPU implementation gamma = gamma[expander] beta = beta[expander] self.mean = x.mean(axis=self.axis, dtype=gamma.dtype) var = x.var(axis=self.axis, dtype=gamma.dtype) if xp is numpy: self.inv_std = numpy.reciprocal(numpy.sqrt( var + self.eps, dtype=gamma.dtype)) else: self.inv_std = cuda.cupyx.rsqrt(var + self.eps, dtype=gamma.dtype) y = _apply_bn_fwd(xp, x, self.mean[expander], self.inv_std[expander], gamma, beta) # Update running statistics m = x.size // gamma.size adjust = m / max(m - 1., 1.) # unbiased estimation xp = backend.get_array_module(self.running_mean, self.running_var) if xp is chainerx: self.running_mean, self.running_var = backend.from_chx( (self.running_mean, self.running_var)) self.running_mean *= self.decay self.running_mean += (1 - self.decay) * self.mean self.running_var *= self.decay self.running_var += (1 - self.decay) * adjust * var if xp is chainerx: self.running_mean = backend.to_chx(self.running_mean) self.running_var = backend.to_chx(self.running_var) return y,
def forward(self, xs): slices = tuple([ backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s for s in self.slices]) return utils.force_array(xs[0][slices]),
def forward_gpu(self, inputs): class_weight = backend.from_chx(self.class_weight) self.retain_inputs((0, 1)) cupy = cuda.cupy x, t = inputs if chainer.is_debug(): _check_input_values(x, t, self.ignore_label) if x.size == 0: y = cupy.zeros(t.shape, dtype=x.dtype) if self.cache_score: self.y = y if self.reduce == 'mean': return y.sum(), else: return y, log_y = log_softmax._log_softmax(x) if self.cache_score: self.y = cupy.exp(log_y) if class_weight is not None: shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)] log_y *= cupy.broadcast_to(class_weight.reshape(shape), x.shape) log_y = cupy.rollaxis(log_y, 1, log_y.ndim) if self.reduce == 'mean': # Reduction is performed in a promoted dtype reduc_dtype = _reduction_dtype(x.dtype) if self.normalize: count = (t != self.ignore_label).sum(dtype=reduc_dtype) count = cupy.maximum(1, count) coeff = 1. / count else: coeff = cupy.array(1. / max(1, len(t)), dtype=reduc_dtype) self._coeff = coeff ret = cuda.reduce( 'S t, raw T log_y, int32 n_channel, raw U coeff, ' 'S ignore_label', 'U out', 't == ignore_label ? T(0) : log_y[_j * n_channel + t]', 'a + b', 'out = static_cast<U>(a * -coeff[0])', '0', 'crossent_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self._coeff, self.ignore_label) ret = ret.astype(log_y.dtype, copy=False) else: ret = cuda.elementwise( 'S t, raw T log_y, int32 n_channel, T ignore', 'T out', ''' if (t == ignore) { out = 0; } else { out = -log_y[i * n_channel + t]; } ''', 'softmax_crossent_no_reduce_fwd' )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label) ret = ret.reshape(t.shape) return ret,
def forward(self, inputs): self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = backend.get_array_module(x) if self.running_mean is None: self.running_mean = xp.zeros_like(gamma, dtype=x.dtype) self.running_var = xp.zeros_like(gamma, dtype=x.dtype) self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis) self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis) if all(x.shape[i] == 1 for i in self.axis): if 0 in self.axis: warnings.warn( 'A batch with no more than one sample has been given' ' to F.batch_normalization. F.batch_normalization' ' will always output a zero tensor for such batches.' ' This could be caused by incorrect configuration in' ' your code (such as running evaluation while' ' chainer.config.train=True),' ' but could also happen in the last batch of training' ' if non-repeating iterator is used.', UserWarning) else: warnings.warn( 'F.batch_normalization received a batch with single' ' dimensions along all axes that are used for aggregating' ' statistics. F.batch_normalization' ' will always output a zero tensor for such batches.', UserWarning) # TODO(niboshi): Refactor calculation of expander and axis into a # function and call it just before they are used. # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. expander = [None for _ in range(x.ndim)] for i in self.key_axis: expander[i] = slice(None) expander = tuple(expander) self.expander = expander self.mode = _BNMode(x, gamma, self.key_axis) self.use_cudnn = self.mode.can_use_cudnn(xp) self.use_ideep = self.mode.can_use_ideep() if self.use_ideep: # TODO(niboshi): Refactor iDeep part into a separate method expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] y, self.mean, self.var, self.inv_std = ( intel64.ideep.batchNormalization.Forward( intel64.ideep.array(x.astype(gamma.dtype, copy=False)), intel64.ideep.array(gamma), intel64.ideep.array(beta), None, None, self.eps)) y = y.astype(x.dtype, copy=False) m = x.size // gamma.size adjust = m / max(m - 1., 1.) # Update running_mean if isinstance(self.running_mean, intel64.ideep.mdarray): self.running_mean.inplace_axpby(self.decay, (1 - self.decay), self.mean) else: self.running_mean *= self.decay self.running_mean += self.mean * (1 - self.decay) # Update running_var if isinstance(self.running_var, intel64.ideep.mdarray): self.running_var.inplace_axpby(self.decay, (1 - self.decay), self.var * adjust) else: self.running_var *= self.decay self.running_var += self.var * adjust * (1 - self.decay) if expand_dim: y = numpy.squeeze(y, axis=(2, 3)) elif self.use_cudnn: # self.mean and self.inv_std are used as buffers to save # intermediate results computed during forward pass. These buffers # are used to speed-up backward pass. y, self.mean, self.inv_std = ( cudnn.batch_normalization_forward_training( x, gamma, beta, self.running_mean, self.running_var, None, None, self.eps, self.decay, self.mode.is_for_conv2d, self.mode.get_cudnn_mode(), chainer.is_debug())) else: # Generic CPU and GPU implementation gamma = gamma[expander] beta = beta[expander] self.mean = x.mean(axis=self.axis, dtype=gamma.dtype) var = x.var(axis=self.axis, dtype=gamma.dtype) if xp is numpy: self.inv_std = numpy.reciprocal( numpy.sqrt(var + self.eps, dtype=gamma.dtype)) else: self.inv_std = cuda.cupyx.rsqrt(var + self.eps, dtype=gamma.dtype) y = _apply_bn_fwd(xp, x, self.mean[expander], self.inv_std[expander], gamma, beta) # Update running statistics m = x.size // gamma.size adjust = m / max(m - 1., 1.) # unbiased estimation xp = backend.get_array_module(self.running_mean, self.running_var) if xp is chainerx: self.running_mean, self.running_var = backend.from_chx( (self.running_mean, self.running_var)) self.running_mean *= self.decay self.running_mean += (1 - self.decay) * self.mean self.running_var *= self.decay self.running_var += (1 - self.decay) * adjust * var if xp is chainerx: self.running_mean = backend.to_chx(self.running_mean) self.running_var = backend.to_chx(self.running_var) return y,
def forward_cpu(self, inputs): t = backend.from_chx(self.t) # Workaround for ChainerX. gx = numpy.zeros(self.shape, self.dtype) gx[six.moves.range(self.t.size), t] = inputs[0] return gx,