def forward_gpu(self, x): if (chainer.should_use_cudnn('==always', 5000) and x[0].flags.c_contiguous and self.mask is None): self._use_cudnn = True if hasattr(self, 'states'): # if we already have a dropout mask, # the forward operation is equal to backward. return cuda.get_cudnn_dropout_states().backward( None, x[0], self.dropout_ratio, self.states), self.states, y = cuda.get_cudnn_dropout_states().forward( None, x[0], self.dropout_ratio) return y, else: if self.mask is not None: y = x[0] * self.mask else: rand = cuda.cupy.random.rand(*x[0].shape, dtype=numpy.float32) scale = x[0].dtype.type(1. / (1 - self.dropout_ratio)) self.mask, y = cuda.elementwise( 'T x, R r, T scale, T ratio', 'T mask, T y', ''' mask = (r >= ratio) * scale; y = x * mask; ''', 'dropout_fwd', )(x[0], rand, scale, self.dropout_ratio) return y,
def forward_gpu(self, x): if (chainer.should_use_cudnn('==always', 5000) and x[0].flags.c_contiguous and self.mask is None): self._use_cudnn = True handle = cudnn.get_handle() if hasattr(self, 'states'): # if we already have a dropout mask, # the forward operation is equal to backward. return cuda.get_cudnn_dropout_states().backward( handle, x[0], self.dropout_ratio, self.states), self.states, y = cuda.get_cudnn_dropout_states().forward( handle, x[0], self.dropout_ratio) return y, else: if self.mask is not None: y = x[0] * self.mask else: rand = cuda.cupy.random.rand(*x[0].shape, dtype=numpy.float32) scale = x[0].dtype.type(1. / (1 - self.dropout_ratio)) self.mask, y = cuda.elementwise( 'T x, R r, T scale, T ratio', 'T mask, T y', ''' mask = (r >= ratio) * scale; y = x * mask; ''', 'dropout_fwd', )(x[0], rand, scale, self.dropout_ratio) return y,
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction, **kwargs): """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction) Base function for Stack RNN/BiRNN functions. This function is used at :func:`chainer.functions.n_step_birnn` and :func:`chainer.functions.n_step_rnn`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing two matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing two vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-directional RNN. Returns: tuple: This function returns a tuple containing three elements, ``hy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) activation_list = ['tanh', 'relu'] if activation not in activation_list: candidate = ','.join(activation_list) raise ValueError('Invalid activation: "%s". Please select from [%s]' % (activation, candidate)) xp = backend.get_array_module(hx) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) rnn_mode = 'rnn_%s' % activation w = cudnn_rnn_weight_concat(n_layers, states, use_bi_direction, rnn_mode, ws, bs) if use_bi_direction: # Bi-directional RNN if activation == 'tanh': rnn = NStepBiRNNTanh elif activation == 'relu': rnn = NStepBiRNNReLU else: # Uni-directional RNN if activation == 'tanh': rnn = NStepRNNTanh elif activation == 'relu': rnn = NStepRNNReLU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: def f(x, h, c, w, b): xw, hw = w xb, hb = b rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb) if activation == 'tanh': return tanh.tanh(rnn_in), None elif activation == 'relu': return relu.relu(rnn_in), None hy, _, ys = n_step_rnn_impl(f, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, \ use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (:class:`~chainer.Variable`): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = backend.get_array_module(hx, hx.data) directions = 1 if use_bi_direction: directions = 2 combined = _combine_inputs(hx, ws, bs, xs, n_layers, directions) has_chainerx_array, combined = _extract_apply_in_data(combined) hx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(combined, n_layers, len(xs), directions) if has_chainerx_array and xp is chainerx and dropout_ratio == 0: if use_bi_direction: hy, ys = chainerx.n_step_bigru(n_layers, hx_chx, ws_chx, bs_chx, xs_chx) else: hy, ys = chainerx.n_step_gru(n_layers, hx_chx, ws_chx, bs_chx, xs_chx) hy = variable.Variable._init_unchecked( hy, requires_grad=hy.is_backprop_required(), is_chainerx_array=True) ys = [ variable.Variable._init_unchecked( y, requires_grad=y.is_backprop_required(), is_chainerx_array=True) for y in ys ] return hy, ys if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000): lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) with chainer.using_device(xs.device): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states, use_bi_direction, 'gru', ws, bs) if use_bi_direction: rnn = NStepBiGRU else: rnn = NStepGRU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def n_step_lstm_base(n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): The number of layers. dropout_ratio(float): Dropout ratio. hx (:class:`~chainer.Variable`): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is the number of layers and is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N`` is the dimension of the hidden units. cx (:class:`~chainer.Variable`): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents the weights for the i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` corresponds to :math:`W_j` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(I, N)``-shape as they are multiplied with input variables, where ``I`` is the size of the input and ``N`` is the dimension of the hidden units. All other matrices are ``(N, N)``-shaped. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represents the biases for the i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` corresponds to :math:`b_j` in the equation. The shape of each matrix is ``(N,)``. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the mini-batch size for time ``t``. The sequences must be transposed. :func:`~chainer.functions.transpose_sequence` can be used to transpose a list of :class:`~chainer.Variable`\\ s each representing a sequence. When sequences has different lengths, they must be sorted in descending order of their lengths before transposing. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This function returns a tuple containing three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is the same as ``hx``. - ``cy`` is an updated cell states whose shape is the same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is the mini-batch size for time ``t``. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) # Check input size consistency with xs and ws here. x_in = xs[0].shape[1] w_in = ws[0][0].shape[1] if x_in != w_in: raise ValueError('Inconsistent input size in input values and weight ' 'parameters: {} != {}'.format(x_in, w_in)) xp = backend.get_array_module(hx, hx.data) # TODO(imanishi): Support ChainerX n_step_rnn use_cuda = xp is cuda.cupy or (xp is chainerx and hx.device.device.backend.name == 'cuda') if use_cuda and chainer.should_use_cudnn('>=auto', 5000): lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) with chainer.using_device(xs.device): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states, use_bi_direction, 'lstm', ws, bs) if use_bi_direction: rnn = NStepBiLSTM else: rnn = NStepLSTM hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, cy, ys else: return n_step_rnn.n_step_rnn_impl(_lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)
def n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): The number of layers. dropout_ratio(float): Dropout ratio. hx (~chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is the number of layers and is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N`` is the dimension of the hidden units. cx (~chainer.Variable): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents the weights for the i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` corresponds to :math:`W_j` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(I, N)``-shape as they are multiplied with input variables, where ``I`` is the size of the input and ``N`` is the dimension of the hidden units. All other matrices are ``(N, N)``-shaped. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represents the biases for the i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` corresponds to :math:`b_j` in the equation. The shape of each matrix is ``(N,)``. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the mini-batch size for time ``t``. The sequences must be transposed. :func:`~chainer.functions.transpose_sequence` can be used to transpose a list of :class:`~chainer.Variable`\\ s each representing a sequence. When sequences has different lengths, they must be sorted in descending order of their lengths before transposing. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This function returns a tuple containing three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is the same as ``hx``. - ``cy`` is an updated cell states whose shape is the same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is the mini-batch size for time ``t``. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): handle = cudnn.get_handle() states = cuda.get_cudnn_dropout_states() cudnn.set_dropout_descriptor(states._desc, handle, dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) w = n_step_rnn.cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, 'lstm', ws, bs) if use_bi_direction: rnn = NStepBiLSTM else: rnn = NStepLSTM hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, cy, ys else: return n_step_rnn.n_step_rnn_impl( _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)
def forward(self, inputs): return cuda.get_cudnn_dropout_states().backward( None, inputs[0], self.dropout_ratio, self.states),
def forward(self, inputs): handle = cudnn.get_handle() return cuda.get_cudnn_dropout_states().backward( handle, inputs[0], self.dropout_ratio, self.states),
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction, **kwargs): """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction) Base function for Stack RNN/BiRNN functions. This function is used at :func:`chainer.functions.n_step_birnn` and :func:`chainer.functions.n_step_rnn`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. .. warning:: ``train`` and ``use_cudnn`` arguments are not supported anymore since v2. Instead, use ``chainer.using_config('train', train)`` and ``chainer.using_config('use_cudnn', use_cudnn)`` respectively. See :func:`chainer.using_config`. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing two matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing two vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-directional RNN. Returns: tuple: This function returns a tuple containing three elements, ``hy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) activation_list = ['tanh', 'relu'] if activation not in activation_list: candidate = ','.join(activation_list) raise ValueError('Invalid activation: "%s". Please select from [%s]' % (activation, candidate)) xp = cuda.get_array_module(hx) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): handle = cudnn.get_handle() states = cuda.get_cudnn_dropout_states() cudnn.set_dropout_descriptor(states._desc, handle, dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) rnn_mode = 'rnn_%s' % activation w = cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, rnn_mode, ws, bs) if use_bi_direction: # Bi-directional RNN if activation == 'tanh': rnn = NStepBiRNNTanh elif activation == 'relu': rnn = NStepBiRNNReLU else: # Uni-directional RNN if activation == 'tanh': rnn = NStepRNNTanh elif activation == 'relu': rnn = NStepRNNReLU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: def f(x, h, c, w, b): xw, hw = w xb, hb = b rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb) if activation == 'tanh': return tanh.tanh(rnn_in), None elif activation == 'relu': return relu.relu(rnn_in), None hy, _, ys = n_step_rnn_impl( f, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ # NOQA if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = backend.get_array_module(hx, hx.data) if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) w = n_step_rnn.cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, 'gru', ws, bs) if use_bi_direction: rnn = NStepBiGRU else: rnn = NStepGRU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: hy, _, ys = n_step_rnn.n_step_rnn_impl( _gru, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys