def forward(self, inputs): self.retain_inputs((0, 1)) x, gamma, beta = inputs x_layout, _, _ = self.input_layouts self.output_layouts = (x_layout,) self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis) self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis) x_shape = memory_layouts._transpose_shape(x.shape, x_layout, None) if all(x_shape[i] == 1 for i in self.axis): if 0 in self.axis: warnings.warn( 'A batch with no more than one sample has been given' ' to F.batch_normalization. F.batch_normalization' ' will always output a zero tensor for such batches.' ' This could be caused by incorrect configuration in' ' your code (such as running evaluation while' ' chainer.config.train=True),' ' but could also happen in the last batch of training' ' if non-repeating iterator is used.', UserWarning) else: warnings.warn( 'F.batch_normalization received a batch with single' ' dimensions along all axes that are used for aggregating' ' statistics. F.batch_normalization' ' will always output a zero tensor for such batches.', UserWarning) # TODO(niboshi): Refactor calculation of expander and axis into a # function and call it just before they are used. # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. expander = [None for _ in range(x.ndim)] for i in self.key_axis: expander[i] = slice(None) expander = tuple(expander) expander = memory_layouts._transpose_shape(expander, None, x_layout) self.expander = expander xp = backend.get_array_module(x) self._impl = self._impl_selector(self, inputs) ( y, y_layout, self.running_mean, self.running_var, self.mean, self.var, self.inv_std, self.forward_data) = ( self._impl.forward( axis=self.axis, gamma=gamma, x=x, x_layout=x_layout, xp=xp, expander=expander, beta=beta, eps=self.eps, decay=self.decay, running_mean=self.running_mean, running_var=self.running_var)) self.output_layouts = (y_layout,) return y,
def _forward_cudnn(self, x, W, b, input_layouts): x_layout, w_layout = input_layouts self.output_layouts = (x_layout, ) n = len(x) _, c, _, _ = memory_layouts._transpose_shape(W.shape, w_layout, None) y_raw_shape = memory_layouts._transpose_shape( (n, c * self.groups, self.outh, self.outw), None, x_layout) y = cuda.cupy.empty(y_raw_shape, dtype=x.dtype) pad = (self.ph, self.pw) stride = (self.sy, self.sx) dilation = (self.dy, self.dx) deterministic = configuration.config.cudnn_deterministic auto_tune = configuration.config.autotune tensor_core = configuration.config.use_cudnn_tensor_core cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) cudnn_w_layout = cuda._get_cudnn_tensor_layout_w(w_layout) cuda.cudnn.convolution_backward_data(W, x, b, y, pad, stride, dilation, self.groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core, d_layout=cudnn_x_layout, w_layout=cudnn_w_layout) return y,
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) # only retain x and W if len(inputs) == 2: (x, W), b = inputs, None x_layout, w_layout = self.input_layouts else: x, W, b = inputs x_layout, w_layout, _ = self.input_layouts x_shape = memory_layouts._transpose_shape(x.shape, x_layout, None) w_shape = memory_layouts._transpose_shape(W.shape, w_layout, None) self._calc_out_size(x_shape, w_shape) self._set_cover_all(x_shape, w_shape) use_cudnn = (chainer.should_use_cudnn('>=auto') and not self.cover_all and x.dtype == W.dtype and ((self.dy == 1 and self.dx == 1) or (_cudnn_version >= 6000 and not configuration.config.cudnn_deterministic)) and (self.groups <= 1 or _cudnn_version >= 7000)) if use_cudnn: # cuDNN implementation return self._forward_cudnn(x, W, b, (x_layout, w_layout)) elif self.groups > 1: return self._forward_grouped_convolution(x, W, b) else: return self._forward_gpu_core(x, W, b)
def forward_cpu(self, inputs): if ((self.dy == 1 and self.dx == 1) and intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(inputs)): self._use_ideep = True self.retain_inputs((0, 1)) # only retain x and W if len(inputs) == 2: (x, W), b = inputs, None x_layout, w_layout = self.input_layouts else: x, W, b = inputs x_layout, w_layout, _ = self.input_layouts x_shape = memory_layouts._transpose_shape(x.shape, x_layout, None) w_shape = memory_layouts._transpose_shape(W.shape, w_layout, None) self._calc_out_size(x_shape, w_shape) if self.groups > 1: # Grouped convolution implementation return self._forward_grouped_convolution(x, W, b) elif (intel64.should_use_ideep('>=auto') and intel64.inputs_all_ready(inputs)): # iDeep implementation self._use_ideep = True return self._forward_ideep(x, W, b) else: return self._forward_cpu_core(x, W, b)
def _forward_cudnn(self, x, gy): x_layout, gy_layout = self.input_layouts w_layout = self.w_layout w_raw_shape = memory_layouts._transpose_shape(self.W_shape, None, w_layout) gW = cuda.cupy.empty(w_raw_shape, dtype=self.W_dtype) pad = (self.ph, self.pw) stride = (self.sy, self.sx) dilation = (self.dy, self.dx) deterministic = configuration.config.cudnn_deterministic auto_tune = configuration.config.autotune tensor_core = configuration.config.use_cudnn_tensor_core cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) cudnn_w_layout = cuda._get_cudnn_tensor_layout_w(w_layout) cuda.cudnn.convolution_backward_filter(x, gy, gW, pad, stride, dilation, self.groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core, d_layout=cudnn_x_layout, w_layout=cudnn_w_layout) return gW,
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) # retain only x and W if len(inputs) == 2: (x, W), b = inputs, None x_layout, w_layout = self.input_layouts else: x, W, b = inputs x_layout, w_layout, _ = self.input_layouts x_shape = memory_layouts._transpose_shape(x.shape, x_layout, None) w_shape = memory_layouts._transpose_shape(W.shape, w_layout, None) n, _, h, w = x_shape out_c, _, kh, kw = w_shape out_h, out_w = self._get_out_size(x_shape, w_shape) y_raw_shape = memory_layouts._transpose_shape((n, out_c, out_h, out_w), None, x_layout) y = cuda.cupy.empty(y_raw_shape, dtype=x.dtype) use_cudnn = (chainer.should_use_cudnn('>=auto') and not self.cover_all and x.dtype == W.dtype and ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000) and (self.groups <= 1 or _cudnn_version >= 7000)) if self.cudnn_fast and not use_cudnn: raise RuntimeError('\'cudnn_fast\' requires cuDNN to work') if use_cudnn: # cuDNN implementation return self._forward_cudnn(x, W, b, y, (x_layout, w_layout)) elif self.groups > 1: return self._forward_grouped_convolution(x, W, b) else: return self._forward_gpu_core(x, W, b)