def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, \ use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (:class:`~chainer.Variable`): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = backend.get_array_module(hx, hx.data) directions = 1 if use_bi_direction: directions = 2 combined = _combine_inputs(hx, ws, bs, xs, n_layers, directions) has_chainerx_array, combined = _extract_apply_in_data(combined) hx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(combined, n_layers, len(xs), directions) if has_chainerx_array and xp is chainerx and dropout_ratio == 0: if use_bi_direction: hy, ys = chainerx.n_step_bigru(n_layers, hx_chx, ws_chx, bs_chx, xs_chx) else: hy, ys = chainerx.n_step_gru(n_layers, hx_chx, ws_chx, bs_chx, xs_chx) hy = variable.Variable._init_unchecked( hy, requires_grad=hy.is_backprop_required(), is_chainerx_array=True) ys = [ variable.Variable._init_unchecked( y, requires_grad=y.is_backprop_required(), is_chainerx_array=True) for y in ys ] return hy, ys if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000): lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) with chainer.using_device(xs.device): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states, use_bi_direction, 'gru', ws, bs) if use_bi_direction: rnn = NStepBiGRU else: rnn = NStepGRU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys
def n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs): """Base function for Stack LSTM/BiLSTM functions. This function is used at :func:`chainer.functions.n_step_lstm` and :func:`chainer.functions.n_step_bilstm`. This function's behavior depends on following arguments, ``activation`` and ``use_bi_direction``. Args: n_layers(int): The number of layers. dropout_ratio(float): Dropout ratio. hx (:class:`~chainer.Variable`): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is the number of layers and is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N`` is the dimension of the hidden units. cx (:class:`~chainer.Variable`): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents the weights for the i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` corresponds to :math:`W_j` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(N, I)``-shape as they are multiplied with input variables, where ``I`` is the size of the input and ``N`` is the dimension of the hidden units. All other matrices are ``(N, N)``-shaped. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represents the biases for the i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` corresponds to :math:`b_j` in the equation. The shape of each matrix is ``(N,)``. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the mini-batch size for time ``t``. The sequences must be transposed. :func:`~chainer.functions.transpose_sequence` can be used to transpose a list of :class:`~chainer.Variable`\\ s each representing a sequence. When sequences has different lengths, they must be sorted in descending order of their lengths before transposing. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. use_bi_direction (bool): If ``True``, this function uses Bi-directional LSTM. Returns: tuple: This function returns a tuple containing three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is the same as ``hx``. - ``cy`` is an updated cell states whose shape is the same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is the mini-batch size for time ``t``. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.n_step_lstm` :func:`chainer.functions.n_step_bilstm` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) # Check input size consistency with xs and ws here. x_in = xs[0].shape[1] w_in = ws[0][0].shape[1] if x_in != w_in: raise ValueError('Inconsistent input size in input values and weight ' 'parameters: {} != {}'.format(x_in, w_in)) xp = backend.get_array_module(hx, hx.data) use_cuda = xp is cuda.cupy or ( xp is chainerx and hx.device.device.backend.name == 'cuda') directions = 1 if use_bi_direction: directions = 2 combined = _combine_inputs(hx, cx, ws, bs, xs, n_layers, directions) has_chainerx_array, combined = _extract_apply_in_data(combined) hx_chx, cx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs( combined, n_layers, len(xs), directions) if has_chainerx_array and xp is chainerx and dropout_ratio == 0: if use_bi_direction: hy, cy, ys = chainerx.n_step_bilstm( n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx) else: hy, cy, ys = chainerx.n_step_lstm( n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx) hy = variable.Variable._init_unchecked( hy, requires_grad=hy.is_backprop_required(), is_chainerx_array=True) cy = variable.Variable._init_unchecked( cy, requires_grad=cy.is_backprop_required(), is_chainerx_array=True) ys = [variable.Variable._init_unchecked( y, requires_grad=y.is_backprop_required(), is_chainerx_array=True) for y in ys] return hy, cy, ys elif use_cuda and chainer.should_use_cudnn('>=auto', 5000): lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) with chainer.using_device(xs.device): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) w = n_step_rnn.cudnn_rnn_weight_concat( n_layers, states, use_bi_direction, 'lstm', ws, bs) if use_bi_direction: rnn = NStepBiLSTM else: rnn = NStepLSTM hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, cy, ys else: return n_step_rnn.n_step_rnn_impl( _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction)