예제 #1
0
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction,
                    **kwargs):
    """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, \
use_bi_direction)

    Base function for Stack GRU/BiGRU functions.

    This function is used at  :func:`chainer.functions.n_step_bigru` and
    :func:`chainer.functions.n_step_gru`.
    This function's behavior depends on argument ``use_bi_direction``.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (:class:`~chainer.Variable`):
            Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimension of hidden units. Because of bi-direction, the
            first dimension length is ``2S``.
        ws (list of list of :class:`~chainer.Variable`): Weight matrices.
            ``ws[i]`` represents weights for i-th layer.
            Each ``ws[i]`` is a list containing six matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of :class:`~chainer.Variable`): Bias vectors.
            ``bs[i]`` represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing six vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimension of
            hidden units.
        xs (list of :class:`~chainer.Variable`):
            A list of :class:`~chainer.Variable` holding input values.
            Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this function supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-direction GRU.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs,
            train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    xp = backend.get_array_module(hx, hx.data)

    directions = 1
    if use_bi_direction:
        directions = 2

    combined = _combine_inputs(hx, ws, bs, xs, n_layers, directions)
    has_chainerx_array, combined = _extract_apply_in_data(combined)
    hx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(combined, n_layers,
                                                      len(xs), directions)

    if has_chainerx_array and xp is chainerx and dropout_ratio == 0:
        if use_bi_direction:
            hy, ys = chainerx.n_step_bigru(n_layers, hx_chx, ws_chx, bs_chx,
                                           xs_chx)
        else:
            hy, ys = chainerx.n_step_gru(n_layers, hx_chx, ws_chx, bs_chx,
                                         xs_chx)
        hy = variable.Variable._init_unchecked(
            hy,
            requires_grad=hy.is_backprop_required(),
            is_chainerx_array=True)
        ys = [
            variable.Variable._init_unchecked(
                y,
                requires_grad=y.is_backprop_required(),
                is_chainerx_array=True) for y in ys
        ]
        return hy, ys

    if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000):
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)
        with chainer.using_device(xs.device):
            states = cuda.get_cudnn_dropout_states()
            states.set_dropout_ratio(dropout_ratio)

        w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states,
                                               use_bi_direction, 'gru', ws, bs)

        if use_bi_direction:
            rnn = NStepBiGRU
        else:
            rnn = NStepGRU

        hy, ys = rnn(n_layers, states, lengths)(hx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, ys

    else:
        hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio,
                                               hx, None, ws, bs, xs,
                                               use_bi_direction)
        return hy, ys
예제 #2
0
def n_step_lstm_base(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction,
        **kwargs):
    """Base function for Stack LSTM/BiLSTM functions.

    This function is used at :func:`chainer.functions.n_step_lstm` and
    :func:`chainer.functions.n_step_bilstm`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    Args:
        n_layers(int): The number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (:class:`~chainer.Variable`):
            Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is the number of layers and
            is equal to ``n_layers``, ``B`` is the mini-batch size, and ``N``
            is the dimension of the hidden units.
        cx (:class:`~chainer.Variable`): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of :class:`~chainer.Variable`): Weight matrices.
            ``ws[i]`` represents the weights for the i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` corresponds to :math:`W_j` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` are ``(N, I)``-shape as they
            are multiplied with input variables, where ``I`` is the size of
            the input and ``N`` is the dimension of the hidden units. All
            other matrices are ``(N, N)``-shaped.
        bs (list of list of :class:`~chainer.Variable`): Bias vectors.
            ``bs[i]`` represents the biases for the i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` corresponds to :math:`b_j` in the equation.
            The shape of each matrix is ``(N,)``.
        xs (list of :class:`~chainer.Variable`):
            A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is the
            mini-batch size for time ``t``. The sequences must be transposed.
            :func:`~chainer.functions.transpose_sequence` can be used to
            transpose a list of :class:`~chainer.Variable`\\ s each
            representing a sequence.
            When sequences has different lengths, they must be
            sorted in descending order of their lengths before transposing.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        use_bi_direction (bool): If ``True``, this function uses Bi-directional
            LSTM.

    Returns:
        tuple: This function returns a tuple containing three elements,
        ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is the same as
              ``hx``.
            - ``cy`` is an updated cell states whose shape is the same as
              ``cx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              the mini-batch size for time ``t``. Note that ``B_t`` is the same
              value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.n_step_lstm`
       :func:`chainer.functions.n_step_bilstm`

    """
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs, train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    # Check input size consistency with xs and ws here.
    x_in = xs[0].shape[1]
    w_in = ws[0][0].shape[1]
    if x_in != w_in:
        raise ValueError('Inconsistent input size in input values and weight '
                         'parameters: {} != {}'.format(x_in, w_in))

    xp = backend.get_array_module(hx, hx.data)

    use_cuda = xp is cuda.cupy or (
        xp is chainerx and hx.device.device.backend.name == 'cuda')

    directions = 1
    if use_bi_direction:
        directions = 2

    combined = _combine_inputs(hx, cx, ws, bs, xs, n_layers, directions)
    has_chainerx_array, combined = _extract_apply_in_data(combined)
    hx_chx, cx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(
        combined, n_layers, len(xs), directions)

    if has_chainerx_array and xp is chainerx and dropout_ratio == 0:
        if use_bi_direction:
            hy, cy, ys = chainerx.n_step_bilstm(
                n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx)
        else:
            hy, cy, ys = chainerx.n_step_lstm(
                n_layers, hx_chx, cx_chx, ws_chx, bs_chx, xs_chx)
        hy = variable.Variable._init_unchecked(
            hy, requires_grad=hy.is_backprop_required(),
            is_chainerx_array=True)
        cy = variable.Variable._init_unchecked(
            cy, requires_grad=cy.is_backprop_required(),
            is_chainerx_array=True)
        ys = [variable.Variable._init_unchecked(
            y, requires_grad=y.is_backprop_required(),
            is_chainerx_array=True)
            for y in ys]
        return hy, cy, ys
    elif use_cuda and chainer.should_use_cudnn('>=auto', 5000):
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)
        with chainer.using_device(xs.device):
            states = cuda.get_cudnn_dropout_states()
            states.set_dropout_ratio(dropout_ratio)

        w = n_step_rnn.cudnn_rnn_weight_concat(
            n_layers, states, use_bi_direction, 'lstm', ws, bs)

        if use_bi_direction:
            rnn = NStepBiLSTM
        else:
            rnn = NStepLSTM

        hy, cy, ys = rnn(n_layers, states, lengths)(hx, cx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, cy, ys

    else:
        return n_step_rnn.n_step_rnn_impl(
            _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs,
            use_bi_direction)