Esempio n. 1
0
 def forward_chainerx(self, inputs):
     h, ws, bs, xs = self.process_input(inputs)
     out = chainerx.n_step_gru(self.n_layers, h, ws, bs, xs)
     rets = []
     rets.append(out[0])
     for i in range(len(out[1])):
         rets.append(out[1][i])
     return tuple(rets)
Esempio n. 2
0
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction,
                    **kwargs):
    """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, \
use_bi_direction)

    Base function for Stack GRU/BiGRU functions.

    This function is used at  :func:`chainer.functions.n_step_bigru` and
    :func:`chainer.functions.n_step_gru`.
    This function's behavior depends on argument ``use_bi_direction``.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (:class:`~chainer.Variable`):
            Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimension of hidden units. Because of bi-direction, the
            first dimension length is ``2S``.
        ws (list of list of :class:`~chainer.Variable`): Weight matrices.
            ``ws[i]`` represents weights for i-th layer.
            Each ``ws[i]`` is a list containing six matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of :class:`~chainer.Variable`): Bias vectors.
            ``bs[i]`` represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing six vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimension of
            hidden units.
        xs (list of :class:`~chainer.Variable`):
            A list of :class:`~chainer.Variable` holding input values.
            Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this function supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-direction GRU.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """
    if kwargs:
        argument.check_unexpected_kwargs(
            kwargs,
            train='train argument is not supported anymore. '
            'Use chainer.using_config',
            use_cudnn='use_cudnn argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

    xp = backend.get_array_module(hx, hx.data)

    directions = 1
    if use_bi_direction:
        directions = 2

    combined = _combine_inputs(hx, ws, bs, xs, n_layers, directions)
    has_chainerx_array, combined = _extract_apply_in_data(combined)
    hx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(combined, n_layers,
                                                      len(xs), directions)

    if has_chainerx_array and xp is chainerx and dropout_ratio == 0:
        if use_bi_direction:
            hy, ys = chainerx.n_step_bigru(n_layers, hx_chx, ws_chx, bs_chx,
                                           xs_chx)
        else:
            hy, ys = chainerx.n_step_gru(n_layers, hx_chx, ws_chx, bs_chx,
                                         xs_chx)
        hy = variable.Variable._init_unchecked(
            hy,
            requires_grad=hy.is_backprop_required(),
            is_chainerx_array=True)
        ys = [
            variable.Variable._init_unchecked(
                y,
                requires_grad=y.is_backprop_required(),
                is_chainerx_array=True) for y in ys
        ]
        return hy, ys

    if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000):
        lengths = [len(x) for x in xs]
        xs = chainer.functions.concat(xs, axis=0)
        with chainer.using_device(xs.device):
            states = cuda.get_cudnn_dropout_states()
            states.set_dropout_ratio(dropout_ratio)

        w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states,
                                               use_bi_direction, 'gru', ws, bs)

        if use_bi_direction:
            rnn = NStepBiGRU
        else:
            rnn = NStepGRU

        hy, ys = rnn(n_layers, states, lengths)(hx, w, xs)
        sections = numpy.cumsum(lengths[:-1])
        ys = chainer.functions.split_axis(ys, sections, 0)
        return hy, ys

    else:
        hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio,
                                               hx, None, ws, bs, xs,
                                               use_bi_direction)
        return hy, ys