def forward_chainerx(self, inputs): h, ws, bs, xs = self.process_input(inputs) out = chainerx.n_step_bigru(self.n_layers, h, ws, bs, xs) rets = [] rets.append(out[0]) for i in range(len(out[1])): rets.append(out[1][i]) return tuple(rets)
def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, use_bi_direction, **kwargs): """n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs, \ use_bi_direction) Base function for Stack GRU/BiGRU functions. This function is used at :func:`chainer.functions.n_step_bigru` and :func:`chainer.functions.n_step_gru`. This function's behavior depends on argument ``use_bi_direction``. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (:class:`~chainer.Variable`): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimension of hidden units. Because of bi-direction, the first dimension length is ``2S``. ws (list of list of :class:`~chainer.Variable`): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing six matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 3`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of :class:`~chainer.Variable`): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing six vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimension of hidden units. xs (list of :class:`~chainer.Variable`): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this function supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. activation (str): Activation function name. Please select ``tanh`` or ``relu``. use_bi_direction (bool): If ``True``, this function uses Bi-direction GRU. .. seealso:: :func:`chainer.functions.n_step_rnn` :func:`chainer.functions.n_step_birnn` """ if kwargs: argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config', use_cudnn='use_cudnn argument is not supported anymore. ' 'Use chainer.using_config') argument.assert_kwargs_empty(kwargs) xp = backend.get_array_module(hx, hx.data) directions = 1 if use_bi_direction: directions = 2 combined = _combine_inputs(hx, ws, bs, xs, n_layers, directions) has_chainerx_array, combined = _extract_apply_in_data(combined) hx_chx, ws_chx, bs_chx, xs_chx = _seperate_inputs(combined, n_layers, len(xs), directions) if has_chainerx_array and xp is chainerx and dropout_ratio == 0: if use_bi_direction: hy, ys = chainerx.n_step_bigru(n_layers, hx_chx, ws_chx, bs_chx, xs_chx) else: hy, ys = chainerx.n_step_gru(n_layers, hx_chx, ws_chx, bs_chx, xs_chx) hy = variable.Variable._init_unchecked( hy, requires_grad=hy.is_backprop_required(), is_chainerx_array=True) ys = [ variable.Variable._init_unchecked( y, requires_grad=y.is_backprop_required(), is_chainerx_array=True) for y in ys ] return hy, ys if xp is cuda.cupy and chainer.should_use_cudnn('>=auto', 5000): lengths = [len(x) for x in xs] xs = chainer.functions.concat(xs, axis=0) with chainer.using_device(xs.device): states = cuda.get_cudnn_dropout_states() states.set_dropout_ratio(dropout_ratio) w = n_step_rnn.cudnn_rnn_weight_concat(n_layers, states, use_bi_direction, 'gru', ws, bs) if use_bi_direction: rnn = NStepBiGRU else: rnn = NStepGRU hy, ys = rnn(n_layers, states, lengths)(hx, w, xs) sections = numpy.cumsum(lengths[:-1]) ys = chainer.functions.split_axis(ys, sections, 0) return hy, ys else: hy, _, ys = n_step_rnn.n_step_rnn_impl(_gru, n_layers, dropout_ratio, hx, None, ws, bs, xs, use_bi_direction) return hy, ys