def setUp(self): self.xs = [cuda.cupy.random.uniform( -1, 1, (b, self.in_size)).astype('f') for b in self.batches] h_shape = (self.n_layers * 2, self.batches[0], self.out_size) self.hx = cuda.cupy.random.uniform(-1, 1, h_shape).astype('f') self.ws = [] self.bs = [] for i in range(self.n_layers): for di in [0, 1]: weights = [] biases = [] for j in range(6): if i == 0 and j < 3: w_in = self.in_size elif i > 0 and j < 3: w_in = self.out_size * 2 else: w_in = self.out_size weights.append(cuda.cupy.random.uniform( -1, 1, (self.out_size, w_in)).astype('f')) biases.append(cuda.cupy.random.uniform( -1, 1, (self.out_size,)).astype('f')) self.ws.append(weights) self.bs.append(biases) self.dys = [cuda.cupy.random.uniform( -1, 1, (b, self.out_size * 2)).astype('f') for b in self.batches] self.dhy = cuda.cupy.random.uniform(-1, 1, h_shape).astype('f') with chainer.using_config('use_cudnn', self.use_cudnn): self.expect = chainer.should_use_cudnn('>=auto', 5000)
def setUp(self): batches = 2 in_channels_a_group = 3 out_channels_a_group = 2 in_channels = in_channels_a_group * self.group out_channels = out_channels_a_group * self.group kh, kw = (3, 3) self.stride = 2 self.pad = (int(kh / 2) * self.dilate, int(kw / 2) * self.dilate) self.x = cuda.cupy.random.uniform( -1, 1, (batches, in_channels, 4, 3)).astype(self.dtype) self.W = cuda.cupy.random.normal( 0, numpy.sqrt(1. / (kh * kw * in_channels_a_group)), (out_channels, in_channels_a_group, kh, kw)).astype(self.dtype) self.gy = cuda.cupy.random.uniform( -1, 1, (batches, out_channels, 2, 2)).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.should_call_cudnn = chainer.should_use_cudnn('>=auto') if self.dilate > 1 and cuda.cuda.cudnn.getVersion() < 6000: self.should_call_cudnn = False if self.group > 1 and cuda.cuda.cudnn.getVersion() < 7000: self.should_call_cudnn = False self.can_use_tensor_core = True if self.dilate > 1: self.can_use_tensor_core = False
def test_call_cudnn_forward(self): with chainer.using_config('use_cudnn', self.use_cudnn): with mock.patch('cupy.cudnn.cudnn.poolingForward') as func: self.forward() self.assertEqual(func.called, chainer.should_use_cudnn('>=auto') and self.ndim > 1)
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) # only retain x and W if len(inputs) == 2: (x, W), b = inputs, None else: x, W, b = inputs self._calc_out_size(x, W) self._set_cover_all(x, W) use_cudnn = ( chainer.should_use_cudnn('>=auto') and not self.cover_all and x.dtype == W.dtype and ((self.dy == 1 and self.dx == 1) or (_cudnn_version >= 6000 and not configuration.config.cudnn_deterministic)) and (self.groups <= 1 or _cudnn_version >= 7000) ) if use_cudnn: # cuDNN implementation return self._forward_cudnn(x, W, b) elif self.groups > 1: return self._forward_grouped_convolution(x, W, b) else: return self._forward_gpu_core(x, W, b)
def _use_cudnn(self, x, gy): return ( chainer.should_use_cudnn('>=auto') and not self.cover_all and x.dtype == self.W_dtype and gy.dtype == self.W_dtype and self.ndim > 1)
def forward(self, inputs): if inputs[0].shape[1] % self.groups != 0: raise ValueError('The number of channels {} is not divisible by ' '\'groups\' argument {}.' .format(inputs[0].shape[1], self.groups)) xp = backend.get_array_module(*inputs) if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000): return self.forward_cudnn(inputs) self.retain_inputs((0, 1)) x, gamma, beta = inputs orig_shape = x.shape batch_size, channels = orig_shape[:2] groups = self.groups reduced_shape = (batch_size * groups, -1) x = x.reshape(reduced_shape) self.mean = x.mean(axis=1) x_hat = x - self.mean[:, None] var = (x_hat * x_hat).mean(axis=1) var += self.eps self.inv_std = var del var xp.sqrt(self.inv_std, out=self.inv_std, dtype=x.dtype) xp.reciprocal(self.inv_std, out=self.inv_std) x_hat *= self.inv_std[:, None] y = x_hat.reshape((batch_size, channels, -1)) y *= gamma[:, None] y += beta[:, None] y = y.reshape(orig_shape) return y,
def forward_gpu(self, inputs): if not chainer.should_use_cudnn('>=auto', 5000): return self._forward(inputs) x, grid = inputs out_shape = x.shape[:2] + grid.shape[2:] y = cuda.cupy.empty(out_shape, dtype=x.dtype) shape = numpy.array(out_shape, dtype=numpy.int32) x = cuda.cupy.ascontiguousarray(x) grid_t = cuda.cupy.transpose(grid, (0, 2, 3, 1)) grid_t = cuda.cupy.ascontiguousarray(grid_t) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) self.st_desc =\ cuda.cupy.cudnn.create_spatial_transformer_descriptor( _sampler_type, grid.dtype, len(shape), shape.ctypes.data) one = numpy.array(1, dtype=x.dtype).ctypes zero = numpy.array(0, dtype=x.dtype).ctypes libcudnn.spatialTfSamplerForward( handle, self.st_desc.value, one.data, x_desc.value, x.data.ptr, grid_t.data.ptr, zero.data, y_desc.value, y.data.ptr) return y,
def backward_gpu(self, inputs, grad_outputs): if not chainer.should_use_cudnn('>=auto', 5000): return self._backward(inputs, grad_outputs) x, grid = inputs gy, = grad_outputs grid_t = cuda.cupy.transpose(grid, (0, 2, 3, 1)) grid_t = cuda.cupy.ascontiguousarray(grid_t) x = cuda.cupy.ascontiguousarray(x) gy = cuda.cupy.ascontiguousarray(gy) gx = cuda.cupy.empty_like(x) ggrid_t = cuda.cupy.empty_like(grid_t) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) dx_desc = cudnn.create_tensor_descriptor(gx) dy_desc = cudnn.create_tensor_descriptor(gy) one = numpy.array(1, dtype=x.dtype).ctypes zero = numpy.array(0, dtype=x.dtype).ctypes libcudnn.spatialTfSamplerBackward( handle, self.st_desc.value, one.data, x_desc.value, x.data.ptr, zero.data, dx_desc.value, gx.data.ptr, one.data, dy_desc.value, gy.data.ptr, grid_t.data.ptr, zero.data, ggrid_t.data.ptr) ggrid = cuda.cupy.transpose(ggrid_t, axes=(0, 3, 1, 2)) return gx, ggrid
def forward_gpu(self, x): if (chainer.should_use_cudnn('>=auto') and pooling_nd._check_cudnn_acceptable_type(x[0].dtype)): # With cuDNN v3 or greater, use cuDNN implementation for inputs # with spatial dimensions of two or more. if _cudnn_version >= 3000 and self.ndim >= 2: return super(AveragePoolingND, self).forward_gpu(x) # With cuDNN v2, use cuDNN implementation only for inputs with # spatial dimensions of two. elif self.ndim == 2: return super(AveragePoolingND, self).forward_gpu(x) self.retain_inputs(()) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c = x[0].shape[:2] dims = x[0].shape[2:] ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, cover_all=self.cover_all) for (d, k, s, p) in six.moves.zip( dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x[0].dtype) coeff = 1. / functools.reduce(operator.mul, self.ksize) in_params, out_params, operation, name = \ average_pooling_nd_kernel.AveragePoolingNDKernelForward.generate( self.ndim) cuda.elementwise(in_params, out_params, operation, name)( x[0].reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (coeff, y))) return y,
def forward_gpu(self, x): if (chainer.should_use_cudnn('==always', 5000) and x[0].flags.c_contiguous and self.mask is None): self._use_cudnn = True if hasattr(self, 'states'): # if we already have a dropout mask, # the forward operation is equal to backward. return cuda.get_cudnn_dropout_states().backward( None, x[0], self.dropout_ratio, self.states), self.states, y = cuda.get_cudnn_dropout_states().forward( None, x[0], self.dropout_ratio) return y, else: if self.mask is not None: y = x[0] * self.mask else: rand = cuda.cupy.random.rand(*x[0].shape, dtype=numpy.float32) scale = x[0].dtype.type(1. / (1 - self.dropout_ratio)) self.mask, y = cuda.elementwise( 'T x, R r, T scale, T ratio', 'T mask, T y', ''' mask = (r >= ratio) * scale; y = x * mask; ''', 'dropout_fwd', )(x[0], rand, scale, self.dropout_ratio) return y,
def __init__(self, comm, eps=2e-5, mean=None, var=None, decay=0.9): chainer.utils.experimental( 'chainermn.functions.MultiNodeBatchNormalizationFunction') self.comm = comm self.running_mean = mean self.running_var = var # Note: cuDNN v5 requires that eps be greater than 1e-5. Otherwise, an # error will occur. # See CUDNN_BN_MIN_EPSILON value in cudnn.h to verify minimum allowable # value. self.eps = eps if chainer.should_use_cudnn('>=auto'): if eps < 1e-5: msg = 'cuDNN does not allow an eps value less than 1e-5.' raise RuntimeError(msg) self.mean_cache = None self.decay = decay # We need to delay importing MPI4py (and momdules that import MPI4py) import chainermn.communicators._memory_utility as memory_utility_module from mpi4py import MPI as mpi4py_module self.memory_utility_module = memory_utility_module self.mpi4py_module = mpi4py_module
def __init__(self, eps=2e-5, mean=None, var=None, decay=0.9, axis=None): self.running_mean = mean self.running_var = var # Note: cuDNN requires that eps be greater than or equals to # CUDNN_BN_MIN_EPSILON. Otherwise, an error will occur. # See CUDNN_BN_MIN_EPSILON value in cudnn.h to verify minimum allowable # value. self.eps = eps if chainer.should_use_cudnn('>=auto'): if eps < libcudnn.CUDNN_BN_MIN_EPSILON: raise RuntimeError( 'cuDNN does not allow an eps value ' 'less than {}.'.format(libcudnn.CUDNN_BN_MIN_EPSILON)) self.decay = decay if isinstance(axis, collections_abc.Sequence): for i in range(1, len(axis)): if axis[i - 1] >= axis[i]: msg = 'numbers in axis must be sorted in ascending order' raise RuntimeError(msg) elif isinstance(axis, int): axis = axis, elif axis is not None: raise RuntimeError('axis must be int, tuple of int or None') self.axis = axis
def __init__(self, comm, eps=2e-5, mean=None, var=None, decay=0.9, communication_backend='auto'): chainer.utils.experimental( 'chainermn.functions.MultiNodeBatchNormalizationFunction') self.comm = comm self.running_mean = mean self.running_var = var # Note: cuDNN v5 requires that eps be greater than 1e-5. Otherwise, an # error will occur. # See CUDNN_BN_MIN_EPSILON value in cudnn.h to verify minimum allowable # value. self.eps = eps if chainer.should_use_cudnn('>=auto'): if eps < 1e-5: msg = 'cuDNN does not allow an eps value less than 1e-5.' raise RuntimeError(msg) self.mean_cache = None self.decay = decay selected_communication_backend = \ get_communication_backend(comm, communication_backend) if selected_communication_backend == 'nccl': self._backend = _NcclBackend(comm) else: self._backend = _MpiBackend(comm)
def can_use_cudnn(self, xp): # TODO(bkvogel): Check for float16 support again in next cuDNN version. # cuDNN v5 batch normalization does not seem to support float16. return (xp is not numpy and chainer.should_use_cudnn('>=auto', 5000) and self.cudnn_dim_ok and self.cudnn_dtype_ok)
def forward(self, inputs): self.retain_inputs((0, 1)) y, gy = inputs xp = backend.get_array_module(*y) if xp is not numpy and chainer.should_use_cudnn('>=auto'): oz_dtype = 'd' if y[0].dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes handle = cudnn.get_handle() gx = xp.empty_like(y) gx_tensor4d = cuda.cupy.ascontiguousarray( gx.reshape(_get_tensor4d_shape(self.axis, gx.shape))) gy = cuda.cupy.ascontiguousarray(gy) desc = cudnn.create_tensor_descriptor(gx_tensor4d) cudnn_mode = _get_cudnn_mode(gx_tensor4d.shape) libcudnn.softmaxBackward( handle, _algorithm, cudnn_mode, one.data, desc.value, y.data.ptr, desc.value, gy.data.ptr, zero.data, desc.value, gx.data.ptr) else: gx = y * gy sumdx = gx.sum(axis=self.axis, keepdims=True) gx -= y * sumdx return gx,
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) # retain only x and W if len(inputs) == 2: (x, W), b = inputs, None else: x, W, b = inputs out_c, _, kh, kw = W.shape n, _, h, w = x.shape out_h, out_w = self._get_out_size(inputs) y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) use_cudnn = ( chainer.should_use_cudnn('>=auto') and not self.cover_all and x.dtype == W.dtype and ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000) and (self.groups <= 1 or _cudnn_version >= 7000) ) if use_cudnn: # cuDNN implementation return self._forward_cudnn(x, W, b, y) elif self.groups > 1: return self._forward_grouped_convolution(x, W, b) else: return self._forward_gpu_core(x, W, b)
def forward_gpu(self, x): if chainer.should_use_cudnn('>=auto') and 2 <= self.ndim <= 3: # With cuDNN v3 or greater, use cuDNN implementation for inputs # with spatial dimensions of two or more. return super(MaxPoolingND, self).forward_gpu(x) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c = x[0].shape[:2] dims = x[0].shape[2:] ys = tuple(conv_nd.get_conv_outsize(d, k, s, p, self.cover_all) for (d, k, s, p) in six.moves.zip( dims, self.ksize, self.stride, self.pad)) # (n, c, y_1, y_2, ..., y_N) y_shape = (n, c) + ys y = cuda.cupy.empty(y_shape, dtype=x[0].dtype) self.indexes = cuda.cupy.empty(y_shape, dtype=numpy.int32) in_params, out_params, operation, name = \ max_pooling_nd_kernel.MaxPoolingNDKernelForward.generate(self.ndim) cuda.elementwise(in_params, out_params, operation, name)( x[0].reduced_view(), *(dims + ys + self.ksize + self.stride + self.pad + (y, self.indexes))) return y,
def setUp(self): self.x = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype) self.gy = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.expect = chainer.should_use_cudnn('==always') and ( cuda.cudnn.cudnn.getVersion() >= 3000 or self.dtype != numpy.float16)
def backward(self, indexes, gy): y = self.get_retained_outputs()[0] if chainer.should_use_cudnn('==always') and self._use_cudnn: x = self.get_retained_inputs()[0] return ReLUGrad3(x, y).apply((gy[0],)) else: return ReLUGrad2(y).apply((gy[0],))
def backward(self, indexes, grad_outputs): x, = self.get_retained_inputs() if chainer.should_use_cudnn('==always') and self._use_cudnn: y = self.get_retained_outputs()[0] return ClippedReLUGrad3(x.data, y.data, self.cap).apply( grad_outputs) else: return ClippedReLUGrad2(x.data, self.cap).apply(grad_outputs)
def check_call_cudnn_backward(self, use_cudnn): with chainer.using_config('use_cudnn', use_cudnn): expect = chainer.should_use_cudnn('>=auto', 5000) hy, ys = self.call_forward(True) hy.grad = _to_gpu(self.dhy) with mock.patch('cupy.cuda.cudnn.RNNBackwardWeights') as func: hy.backward() assert func.called == expect
def _log_softmax(x, axis=1): if chainer.should_use_cudnn('>=auto'): xp = backend.get_array_module(x) if xp is cuda.cupy: return cudnn.softmax_forward(x, axis, _algorithm) log_z = logsumexp(x, axis) y = x - log_z return y
def test_call_cudnn_backward(self): with chainer.using_config('use_cudnn', self.use_cudnn): expect = chainer.should_use_cudnn('>=auto') and self.ndim > 1 y = self.forward() # should be consistent to forward regardless of use_cudnn config y.grad = self.gy with testing.patch('cupy.cudnn.pooling_backward') as func: y.backward() assert func.called == expect
def test_call_cudnn_backrward(self): with chainer.using_config('use_cudnn', self.use_cudnn): y = self.forward() y.grad = self.gy patch = 'cupy.cudnn.convolution_backward_data' with mock.patch(patch) as func: y.backward() self.assertEqual(func.called, chainer.should_use_cudnn('>=auto'))
def forward(self, inputs): self.retain_inputs((0, 1)) y, gy = inputs xp = self._x_xp if xp is cuda.cupy and chainer.should_use_cudnn('>=auto'): gx = cudnn.softmax_backward(y, gy, self.axis, _algorithm) else: gx = gy - xp.exp(y) * gy.sum(axis=self.axis, keepdims=True) return gx,
def forward_gpu(self, inputs): x, = inputs if chainer.should_use_cudnn('>=auto') and x.flags.c_contiguous: self._use_cudnn = True y = cudnn.activation_forward(x, _mode) else: y = cuda.cupy.maximum(x, 0, dtype=x.dtype) self.retain_outputs((0,)) return y,
def test_call_cudnn_backward(self): with chainer.using_config('use_cudnn', self.use_cudnn): expect = chainer.should_use_cudnn('>=auto') y = self.forward() y.grad = self.gy # should be consistent to forward regardless of use_cudnn config with mock.patch('cupy.cudnn.cudnn.poolingBackward') as func: y.backward() self.assertEqual(func.called, expect)
def forward_gpu(self, x): if (chainer.should_use_cudnn('>=auto') and pooling_2d._check_cudnn_acceptable_type(x[0].dtype)): return super(MaxPooling2D, self).forward_gpu(x) self.retain_inputs(()) self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c, h, w = x[0].shape y_h = conv.get_conv_outsize( h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize( w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) cuda.elementwise( 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', 'T out, S indexes', ''' int c0 = i / (out_h * out_w); int out_y = i / out_w % out_h; int out_x = i % out_w; int in_y_0 = max(0, out_y * sy - ph); int in_y_1 = min(h, out_y * sy + kh - ph); int in_x_0 = max(0, out_x * sx - pw); int in_x_1 = min(w, out_x * sx + kw - pw); T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; int argmax_y = in_y_0; int argmax_x = in_x_0; for (int y = in_y_0; y < in_y_1; ++y) { int offset_y = w * (y + h * c0); for (int x = in_x_0; x < in_x_1; ++x) { float v = in[x + offset_y]; if (maxval < v) { maxval = v; argmax_y = y; argmax_x = x; } } } out = maxval; int argmax_ky = argmax_y + ph - out_y * sy; int argmax_kx = argmax_x + pw - out_x * sx; indexes = argmax_kx + kw * argmax_ky; ''', 'max_pool_fwd')(x[0].reduced_view(), h, w, y_h, y_w, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, y, self.indexes) return y,
def forward_gpu(self, x): if chainer.should_use_cudnn('==always') and x[0].flags.c_contiguous: y = cudnn.activation_forward(x[0], _mode) else: y = cuda.cupy.empty_like(x[0]) cuda.cupy.tanh(x[0], out=y) self.retain_inputs(()) self.retain_outputs((0,)) return y,
def backward_gpu(self, x, gy): y = self.output_data[0] if chainer.should_use_cudnn('==always') and self._use_cudnn: gx = cudnn.activation_backward(x[0], y, gy[0], _mode) else: gx = cuda.elementwise( 'T y, T gy', 'T gx', 'gx = y > 0 ? gy : (T)0', 'relu_bwd')(y, gy[0]) return gx,
def _use_cudnn(self, x, W): return (not self.cover_all and chainer.should_use_cudnn('>=auto') and self.ndim > 1 and x.dtype == W.dtype)
def _use_cudnn(self, x, W): return (chainer.should_use_cudnn('>=auto') and self.ndim > 1 and x.dtype == W.dtype)
def test_no_cudnn_available(self): with chainer.using_config('use_cudnn', 'always'): self.assertFalse(chainer.should_use_cudnn('==always')) self.assertFalse(chainer.should_use_cudnn('>=auto'))
def check_call_cudnn_forward_inference(self, use_cudnn): with chainer.using_config('use_cudnn', use_cudnn): expect = chainer.should_use_cudnn('>=auto', 5000) with testing.patch('cupy.cuda.cudnn.RNNForwardInference') as func: self.call_forward(False) assert func.called == expect
def n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): The number of layers. dropout_ratio(float): Dropout ratio. hx (:class:`~chainer.Variable`): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is the number of layers and is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N`` is the dimension of the hidden units. cx (:class:`~chainer.Variable`): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents the weights for the i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` corresponds to :math:`W_j` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(N, I)``-shape as they are multiplied with input variables, where ``I`` is the size of the input and ``N`` is the dimension of the hidden units. All other matrices are ``(N, N)``-shaped. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represents the biases for the i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` corresponds to :math:`b_j` in the equation. The shape of each matrix is ``(N,)``. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the mini-batch size for time ``t``. The sequences must be transposed. :func:`~chainer.functions.transpose_sequence` can be used to transpose a list of :class:`~chainer.Variable`\\ s each representing a sequence. When sequences has different lengths, they must be sorted in descending order of their lengths before transposing. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This function returns a tuple containing three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is the same as ``hx``. - ``cy`` is an updated cell states whose shape is the same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is the mini-batch size for time ``t``. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) # Check input size consistency with xs and ws here. x_in = xs[0].shape[1] w_in = ws[0][0].shape[1] if x_in != w_in: raise ValueError('Inconsistent input size in input values and weight ' 'parameters: {} != {}'.format(x_in, w_in)) xp = backend.get_array_module(hx, hx.data) use_cuda = xp is cuda.cupy or ( xp is chainerx and hx.device.device.backend.name == 'cuda') directions = 1 if use_bi_direction: directions = 2 combined = _combine_inputs(hx, cx, ws, bs, xs, n_layers, directions) has_chainerx_array, combined = _extract_apply_in_data(combined) hx_chx, cx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs( combined, n_layers, len(xs), directions) if has_chainerx_array and xp is chainerx and dropout_ratio == 0: if use_bi_direction: hy, cy, ys = chainerx.n_step_bilstm( n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx) else: hy, cy, ys = chainerx.n_step_lstm( n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx) hy = variable.Variable._init_unchecked( hy, requires_grad=hy.is_backprop_required(), is_chainerx_array=True) cy = variable.Variable._init_unchecked( cy, requires_grad=cy.is_backprop_required(), is_chainerx_array=True) ys = [variable.Variable._init_unchecked( y, requires_grad=y.is_backprop_required(), is_chainerx_array=True) for y in ys] return hy, cy, ys elif use_cuda and chainer.should_use_cudnn('>=auto', 5000): lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) with chainer.using_device(xs.device): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) w = n_step_rnn.cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, 'lstm', ws, bs) if use_bi_direction: rnn = NStepBiLSTM else: rnn = NStepLSTM hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, cy, ys else: return n_step_rnn.n_step_rnn_impl( _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)
def can_use_cudnn(self, xp): # TODO(bkvogel): Check for float16 support again in next cuDNN version. # cuDNN v5 batch normalization does not seem to support float16. return (xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000) and self.cudnn_dim_ok and self.cudnn_dtype_ok)
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) # only retain x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not all([isinstance(i, cuda.ndarray) for i in inputs]): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] n, in_c, in_h, in_w = x.shape c = W.shape[1] # out_c if self.outh is None: self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph, d=self.dy) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw, d=self.dx) assert self.outw > 0, 'Width in the output should be positive.' self._set_cover_all(x, W) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and x.dtype == W.dtype and ((self.dy == 1 and self.dx == 1) or _cudnn_version_ >= 6000)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y = cuda.cupy.empty((n, c, self.outh, self.outw), dtype=x.dtype) y_desc = cudnn.create_tensor_descriptor(y) filter_desc = cudnn.create_filter_descriptor(W) conv_param = (self.ph, self.pw), (self.sy, self.sx), x.dtype dilation = (self.dy, self.dx) conv_desc = cudnn.create_convolution_descriptor( *conv_param, dilation=dilation, use_tensor_core=use_tensor_core) if b is not None: bias_desc = cudnn.create_tensor_descriptor( b[None, :, None, None]) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = np.array(1, dtype=oz_dtype).ctypes zero = np.array(0, dtype=oz_dtype).ctypes workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') if configuration.config.cudnn_deterministic: algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 elif configuration.config.autotune and _cudnn_version_ >= 5000: algo = get_algorithm( W, x, y, conv_param + (dilation,), handle, filter_desc, x_desc, conv_desc, y_desc, workspace) else: algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, filter_desc.value, x_desc.value, conv_desc.value, y_desc.value, _bwd_data_pref, workspace_size) if use_tensor_core: # Only CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 supports # Tensor-Core in cuDNN7 algo = libcudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 libcudnn.convolutionBackwardData_v3( handle, one.data, filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) if b is not None: cudnn.add_tensor( handle, one.data, bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = cuda.cupy.rollaxis(gcol, 3) y = conv.col2im_gpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw, dy=self.dy, dx=self.dx) if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) x, gy = inputs _, out_c, out_h, out_w = gy.shape n, c, h, w = x.shape if (self.cover_all or not chainer.should_use_cudnn('>=auto') or x.dtype != self.W_dtype or ((self.dy > 1 or self.dx > 1) and _cudnn_version < 6000)): col = conv.im2col_gpu(x, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) gW = cuda.cupy.tensordot(gy, col, ((0, 2, 3), (0, 4, 5))).astype(self.W_dtype, copy=False) return gW, gW = cuda.cupy.empty((out_c, c, self.kh, self.kw), dtype=self.W_dtype) x = cuda.cupy.ascontiguousarray(x) gy = cuda.cupy.ascontiguousarray(gy) use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) gy_desc = cudnn.create_tensor_descriptor(gy) filter_desc = cudnn.create_filter_descriptor(gW) conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx), x.dtype, dilation=(self.dy, self.dx), use_tensor_core=use_tensor_core) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') if configuration.config.cudnn_deterministic: algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 else: algo = libcudnn.getConvolutionBackwardFilterAlgorithm( handle, x_desc.value, gy_desc.value, conv_desc.value, filter_desc.value, _bwd_filter_pref, workspace_size) if use_tensor_core: # Only CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 supports # Tensor-Core in cuDNN7. algo = libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 libcudnn.convolutionBackwardFilter_v3(handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, filter_desc.value, gW.data.ptr) return gW,
def setUp(self): self.x = cuda.cupy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) self.t = cuda.cupy.random.randint(0, 3, (4, 3)).astype(numpy.int32) with chainer.using_config('use_cudnn', self.use_cudnn): self.expect = chainer.should_use_cudnn('==always')
def setUp(self): self.x = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype) self.gy = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype) with chainer.using_config('use_cudnn', self.use_cudnn): self.expect = chainer.should_use_cudnn('==always')
def forward_gpu(self, inputs): a = cuda.to_gpu(self.a) b = cuda.to_gpu(self.b) assert chainer.should_use_cudnn('==always') return cudnn.activation_backward(a, b, inputs[0], _mode),
def _use_cudnn(self, x, W): return (not self.cover_all and chainer.should_use_cudnn('>=auto') and self.ndim > 1 and _check_cudnn_acceptable_type(x.dtype, W.dtype))
def test_call_cudnn_forward(self): with chainer.using_config('use_cudnn', self.use_cudnn): with testing.patch('cupy.cuda.cudnn.poolingForward') as func: self.forward() self.assertEqual(func.called, chainer.should_use_cudnn('>=auto'))
def backward(self, x, gy): if chainer.should_use_cudnn('==always', 5000) and self._use_cudnn: return DropoutGradCuDNN(self.states, self.dropout_ratio).apply(gy) else: return DropoutGrad(self.mask).apply(gy)
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction, **kwargs): """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction) Base function for Stack RNN/BiRNN functions. This function is used at :func:`chainer.functions.n_step_birnn` and :func:`chainer.functions.n_step_rnn`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimention of hidden units. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing two matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing two vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimention of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-directional RNN. Returns: tuple: This functions returns a tuple concaining three elements, ``hy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) activation_list = ['tanh', 'relu'] if activation not in activation_list: candidate = ','.join(activation_list) raise ValueError('Invalid activation: "%s". Please select from [%s]' % (activation, candidate)) xp = cuda.get_array_module(hx) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple( itertools.chain((hx, ), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) if use_bi_direction: # Bi-directional RNN if activation == 'tanh': rnn = NStepBiRNNTanh(n_layers, states) elif activation == 'relu': rnn = NStepBiRNNReLU(n_layers, states) else: # Uni-directional RNN if activation == 'tanh': rnn = NStepRNNTanh(n_layers, states) elif activation == 'relu': rnn = NStepRNNReLU(n_layers, states) ret = rnn(*inputs) hy, = ret[:1] ys = ret[1:] return hy, ys else: direction = 2 if use_bi_direction else 1 hx = split_axis.split_axis(hx, n_layers * direction, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] xws = [_stack_weight([w[0]]) for w in ws] hws = [_stack_weight([w[1]]) for w in ws] xbs = [_stack_weight([b[0]]) for b in bs] hbs = [_stack_weight([b[1]]) for b in bs] xs_next = xs hy = [] for layer in six.moves.range(n_layers): def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = ( linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list # Forward RNN h, h_forward = _one_directional_loop(di=0) hy.append(h) if use_bi_direction: # Backward RNN h, h_backward = _one_directional_loop(di=1) h_backward.reverse() # Concat xs_next = [ concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in six.moves.zip(h_forward, h_backward) ] hy.append(h) else: # Uni-directional RNN xs_next = h_forward ys = xs_next hy = stack.stack(hy) return hy, tuple(ys)
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimention of hidden units. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimention of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple(itertools.chain( (hx, ), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) if use_bi_direction: rnn = NStepBiGRU(n_layers, states) else: rnn = NStepGRU(n_layers, states) ret = rnn(*inputs) hy, = ret[:1] ys = ret[1:] return hy, ys else: direction = 2 if use_bi_direction else 1 hx = split_axis.split_axis(hx, n_layers * direction, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws] hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws] xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs] hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs] xs_next = xs hy = [] for layer in six.moves.range(n_layers): def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list # Forward GRU h, h_forward = _one_directional_loop(di=0) hy.append(h) if use_bi_direction: # Backward GRU h, h_backward = _one_directional_loop(di=1) h_backward.reverse() # Concat xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in six.moves.zip(h_forward, h_backward)] hy.append(h) else: # Uni-directional GRU xs_next = h_forward ys = xs_next hy = stack.stack(hy) return hy, tuple(ys)
def forward_gpu(self, inputs): assert chainer.should_use_cudnn('==always') return cudnn.activation_backward(self.x, self.y, inputs[0], _mode, self.cap),
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = backend.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states, use_bi_direction, 'gru', ws, bs) if use_bi_direction: rnn = NStepBiGRU else: rnn = NStepGRU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all) assert out_h > 0, 'Height in the output should be positive.' out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all) assert out_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx), x.dtype) if b is not None: self.bias_desc = cudnn.create_tensor_descriptor( b[None, :, None, None]) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, x_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.convolutionForward( handle, one.data, x_desc.value, x.data.ptr, self.filter_desc.value, W.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) # TODO(beam2d): Support unshared bias if b is not None: cudnn.add_tensor( handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) y = cuda.cupy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,
def check_call_cudnn_forward_training(self, use_cudnn): with chainer.using_config('use_cudnn', use_cudnn): expect = chainer.should_use_cudnn('>=auto', 5000) with testing.patch('cupy.cudnn.rnn_forward_training') as func: self.call_forward(True) assert func.called == expect
def backward_gpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] _, out_c, out_h, out_w = gy.shape n, c, h, w = x.shape kh, kw = W.shape[2:] gW = cuda.cupy.empty_like(W) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) gy = cuda.cupy.ascontiguousarray(gy) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) gy_desc = cudnn.create_tensor_descriptor(gy) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes gx = cuda.cupy.empty_like(x) if _cudnn_version >= 3000: workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') if configuration.config.cudnn_deterministic: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 # NOQA else: algo = libcudnn.getConvolutionBackwardFilterAlgorithm( handle, x_desc.value, gy_desc.value, self.conv_desc.value, self.filter_desc.value, _bwd_filter_pref, workspace_size) libcudnn.convolutionBackwardFilter_v3( handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, self.filter_desc.value, gW.data.ptr) if configuration.config.cudnn_deterministic: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 # NOQA else: algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, gy_desc.value, self.conv_desc.value, x_desc.value, _bwd_data_pref, workspace_size) libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, W.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, x_desc.value, gx.data.ptr) else: if configuration.config.cudnn_deterministic: raise ValueError( "`cudnn_deterministic` option must be False " "if the backpropagation of " "chainer.functions.Convolution2D " "uses cuDNN and cuDNN versions < v3. " "Turn off cudnn_deterministic option with " "`chainer.using_config('cudnn_deterministic', False)` " "context.") libcudnn.convolutionBackwardFilter_v2( handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, self.filter_desc.value, gW.data.ptr) libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, W.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, x_desc.value, gx.data.ptr) if b is not None: gb = cuda.cupy.empty_like(b) libcudnn.convolutionBackwardBias( handle, one.data, gy_desc.value, gy.data.ptr, zero.data, self.bias_desc.value, gb.data.ptr) else: gW = cuda.cupy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = cuda.cupy.rollaxis(gcol, 3) gx = conv.col2im_gpu( gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is not None: gb = gy.sum(axis=(0, 2, 3)) if b is None: return gx, gW else: return gx, gW, gb
def backward_gpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None gy = grad_outputs[0] _, out_c, out_h, out_w = gy.shape n, c, h, w = x.shape kh, kw = W.shape[2:] dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1) gW = cuda.cupy.empty_like(W) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw), dtype=x.dtype) pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x out_h_s1 = h + 2 * self.ph - dkh + 1 out_w_s1 = w + 2 * self.pw - dkw + 1 out_sh = out_h + (out_h - 1) * (self.sy - 1) out_sw = out_w + (out_w - 1) * (self.sx - 1) gy_ph = (h + dkh - out_sh - 1) / 2 gy_pw = (w + dkw - out_sw - 1) / 2 pad_gy = cuda.cupy.zeros((n, out_c, h + dkh - 1, w + dkw - 1), dtype=x.dtype) pad_gy[:, :, gy_ph:gy_ph + out_sh:self.sy, gy_pw:gy_pw + out_sw:self.sx] = gy for j in moves.range(kh): for i in moves.range(kw): xji = cuda.cupy.ascontiguousarray( pad_x[:, :, j * self.dy:j * self.dy + out_h_s1, i * self.dx:i * self.dx + out_w_s1]) gyji = cuda.cupy.ascontiguousarray( pad_gy[:, :, j * self.dy:j * self.dy + h, i * self.dx:i * self.dx + w]) Wji = cuda.cupy.ascontiguousarray(W[:, :, -1::-1, -1::-1][:, :, j:j + 1, i:i + 1]) if i == 0 and j == 0: x = cuda.cupy.ascontiguousarray(x) gy = cuda.cupy.ascontiguousarray(gy) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) xji_desc = cudnn.create_tensor_descriptor(xji) gy_desc = cudnn.create_tensor_descriptor(gy) gyji_desc = cudnn.create_tensor_descriptor(gyji) conv_desc_data = cudnn.create_convolution_descriptor( (0, 0), (1, 1), xji.dtype) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes gx = cuda.cupy.zeros_like(x) gWji = cuda.cupy.empty((out_c, c, 1, 1), dtype=W.dtype) if _cudnn_version >= 4000: workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') algo_filter = ( libcudnn.getConvolutionBackwardFilterAlgorithm( handle, xji_desc.value, gy_desc.value, self.conv_desc.value, self.filter_desc.value, _bwd_filter_pref, workspace_size)) algo_data = ( libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, gyji_desc.value, conv_desc_data.value, x_desc.value, _bwd_data_pref, workspace_size)) if _cudnn_version >= 4000: libcudnn.convolutionBackwardFilter_v3( handle, one.data, xji_desc.value, xji.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo_filter, workspace.data.ptr, workspace_size, zero.data, self.filter_desc.value, gWji.data.ptr) libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, Wji.data.ptr, gyji_desc.value, gyji.data.ptr, conv_desc_data.value, algo_data, workspace.data.ptr, workspace_size, one.data, x_desc.value, gx.data.ptr) else: libcudnn.convolutionBackwardFilter_v2( handle, one.data, xji_desc.value, xji.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, self.filter_desc.value, gWji.data.ptr) libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, Wji.data.ptr, gyji_desc.value, gyji.data.ptr, conv_desc_data.value, one.data, x_desc.value, gx.data.ptr) gW[:, :, j:j + 1, i:i + 1] = gWji if b is not None: gb = cuda.cupy.empty_like(b) libcudnn.convolutionBackwardBias(handle, one.data, gy_desc.value, gy.data.ptr, zero.data, self.bias_desc.value, gb.data.ptr) else: gW = cuda.cupy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = cuda.cupy.rollaxis(gcol, 3) gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w, dy=self.dy, dx=self.dx) if b is not None: gb = gy.sum(axis=(0, 2, 3)) if b is None: return gx, gW else: return gx, gW, gb
def test_higher_version_required(self): with chainer.using_config('use_cudnn', 'always'): self.assertFalse(chainer.should_use_cudnn( '>=auto', cuda.cuda.cudnn.getVersion() + 1))
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1) out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy) out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx) y = cuda.cupy.zeros((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw), dtype=x.dtype) pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x out_h_s1 = h + 2 * self.ph - dkh + 1 out_w_s1 = w + 2 * self.pw - dkw + 1 for j in moves.range(kh): for i in moves.range(kw): xji = cuda.cupy.ascontiguousarray( pad_x[:, :, j * self.dy:j * self.dy + out_h_s1, i * self.dx:i * self.dx + out_w_s1]) Wji = cuda.cupy.ascontiguousarray(W[:, :, j:j + 1, i:i + 1]) if i == 0 and j == 0: handle = cudnn.get_handle() xji_desc = cudnn.create_tensor_descriptor(xji) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(Wji) self.conv_desc = cudnn.create_convolution_descriptor( (0, 0), (self.sy, self.sx), xji.dtype) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, xji_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes libcudnn.convolutionForward( handle, one.data, xji_desc.value, xji.data.ptr, self.filter_desc.value, Wji.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, one.data, y_desc.value, y.data.ptr) if b is not None: b = cuda.cupy.ascontiguousarray(b) self.bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None]) cudnn.add_tensor(handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = cuda.cupy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction, **kwargs): """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction) Base function for Stack RNN/BiRNN functions. This function is used at :func:`chainer.functions.n_step_birnn` and :func:`chainer.functions.n_step_rnn`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing two matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing two vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-directional RNN. Returns: tuple: This function returns a tuple containing three elements, ``hy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) activation_list = ['tanh', 'relu'] if activation not in activation_list: candidate = ','.join(activation_list) raise ValueError('Invalid activation: "%s". Please select from [%s]' % (activation, candidate)) xp = backend.get_array_module(hx) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) rnn_mode = 'rnn_%s' % activation w = cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, rnn_mode, ws, bs) if use_bi_direction: # Bi-directional RNN if activation == 'tanh': rnn = NStepBiRNNTanh elif activation == 'relu': rnn = NStepBiRNNReLU else: # Uni-directional RNN if activation == 'tanh': rnn = NStepRNNTanh elif activation == 'relu': rnn = NStepRNNReLU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: def f(x, h, c, w, b): xw, hw = w xb, hb = b rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb) if activation == 'tanh': return tanh.tanh(rnn_in), None elif activation == 'relu': return relu.relu(rnn_in), None hy, _, ys = n_step_rnn_impl( f, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def forward(self, inputs): assert chainer.should_use_cudnn('==always') gy, = inputs return cudnn.activation_backward(self.x, self.y, gy, _mode),
def n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): The number of layers. dropout_ratio(float): Dropout ratio. hx (~chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is the number of layers and is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N`` is the dimension of the hidden units. cx (~chainer.Variable): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents the weights for the i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` corresponds to :math:`W_j` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(I, N)``-shape as they are multiplied with input variables, where ``I`` is the size of the input and ``N`` is the dimension of the hidden units. All other matrices are ``(N, N)``-shaped. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represents the biases for the i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` corresponds to :math:`b_j` in the equation. The shape of each matrix is ``(N,)``. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the mini-batch size for time ``t``. The sequences must be transposed. :func:`~chainer.functions.transpose_sequence` can be used to transpose a list of :class:`~chainer.Variable`\\ s each representing a sequence. When sequences has different lengths, they must be sorted in descending order of their lengths before transposing. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This function returns a tuple containing three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is the same as ``hx``. - ``cy`` is an updated cell states whose shape is the same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is the mini-batch size for time ``t``. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): handle = cudnn.get_handle() states = cuda.get_cudnn_dropout_states() cudnn.set_dropout_descriptor(states._desc, handle, dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) w = n_step_rnn.cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, 'lstm', ws, bs) if use_bi_direction: rnn = NStepBiLSTM else: rnn = NStepLSTM hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, cy, ys else: return n_step_rnn.n_step_rnn_impl( _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)
def n_step_lstm_base(n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimention of hidden units. cx (chainer.Variable): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimention of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This functions returns a tuple concaining three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``cy`` is an updated cell states whose shape is same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple( itertools.chain((hx, cx), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) if use_bi_direction: rnn = NStepBiLSTM(n_layers, states) else: rnn = NStepLSTM(n_layers, states) ret = rnn(*inputs) hy, cy = ret[:2] ys = ret[2:] return hy, cy, ys else: direction = 2 if use_bi_direction else 1 split_size = n_layers * direction hx = split_axis.split_axis(hx, split_size, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] cx = split_axis.split_axis(cx, split_size, axis=0, force_tuple=True) cx = [reshape.reshape(c, c.shape[1:]) for c in cx] xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] xs_next = xs hy = [] cy = [] for layer in six.moves.range(n_layers): def _one_directional_loop(di): # di=0, forward LSTM # di=1, backward LSTM h_list = [] c_list = [] layer_idx = direction * layer + di h = hx[layer_idx] c = cx[layer_idx] if di == 0: xs_list = xs_next else: xs_list = reversed(xs_next) counter = 0 for x in xs_list: counter += 1 batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None c_rest = None if layer != 0: x = dropout.dropout(x, ratio=dropout_ratio) if counter == 4: lstm_in = linear.linear(x, xws[layer_idx], xbs[layer_idx]) else: lstm_in = linear.linear( x, xws[layer_idx], xbs[layer_idx]) + linear.linear( h, hws[layer_idx], hbs[layer_idx]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_list.append(h_bar) c_list.append(c_bar) return h, c, h_list, c_list h, c, h_forward, c_forward = _one_directional_loop(di=0) hy.append(h) cy.append(c) if use_bi_direction: # BiLSTM h, c, h_backward, c_backward = _one_directional_loop(di=1) hy.append(h) cy.append(c) h_backward.reverse() # concat xs_next = [ concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in zip(h_forward, h_backward) ] else: # Uni-directional RNN xs_next = h_forward ys = xs_next hy = stack.stack(hy) cy = stack.stack(cy) return hy, cy, tuple(ys)
def _use_cudnn(self, x, gy): return (chainer.should_use_cudnn('>=auto') and not self.cover_all and x.dtype == self.W_dtype and gy.dtype == self.W_dtype and self.ndim > 1)
def forward_gpu(self, inputs): self.retain_inputs((0, 1)) # retain only x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not all([isinstance(i, cuda.ndarray) for i in inputs]): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy) assert out_h > 0, 'Height in the output should be positive.' out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx) assert out_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and x.dtype == W.dtype and ((self.dy == 1 and self.dx == 1) or _cudnn_version >= 6000)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) use_tensor_core = chainer.should_use_cudnn_tensor_core(x.dtype) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) filter_desc = cudnn.create_filter_descriptor(W) conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx), x.dtype, dilation=(self.dy, self.dx), use_tensor_core=use_tensor_core) if b is not None: bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None]) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, x_desc.value, filter_desc.value, conv_desc.value, y_desc.value, _fwd_pref, workspace_size) if use_tensor_core: # Only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM # supports Tensor-Core in cuDNN7. algo = libcudnn.CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM # NOQA oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.convolutionForward(handle, one.data, x_desc.value, x.data.ptr, filter_desc.value, W.data.ptr, conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) # TODO(beam2d): Support unshared bias if b is not None: cudnn.add_tensor(handle, one.data, bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col col = conv.im2col_gpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = cuda.cupy.tensordot(col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,