Beispiel #1
0
    def __call__(self, x, src_hidden=None):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.
            src_hidden (~chainer.Variable): A batch of corresponding source language LSTM output.
                                            Which is taken using word alignments.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)

        # Attention_in is the input vector of corresponding words from source language
        # which is derived by word alignments
        attention_in = None
        if self.use_attention:
            attention_in = self.attention(src_hidden)

        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype), volatile="auto")
        if attention_in is not None:
            self.c, self.h = lstm.lstm(self.c, lstm_in + attention_in)
        else:
            self.c, self.h = lstm.lstm(self.c, lstm_in)
        return self.h
Beispiel #2
0
 def __call__(self, frame, prev_word, state, dropout_flag, dropout_ratio):
     i1 = self.xi1(dropout(frame, dropout_ratio, dropout_flag))
     c1, h1 = lstm(state['c1'], self.ih1(i1) + self.hh1(state['h1']))
     i2 = self.xi2(prev_word)
     concat = array.concat.concat((i2, h1))
     c2, h2 = lstm(state['c2'], self.ih2(concat) + self.hh2(state['h2']))
     state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
     return state
Beispiel #3
0
 def decode_nonatt(self, frame, prev_word, state):
     i1 = self.xi1(frame)
     c1, h1 = lstm(state['c1'], self.ih1(i1) + self.hh1(state['h1']))
     i2 = self.xi2(prev_word)
     concat = array.concat.concat((i2, h1))
     c2, h2 = lstm(state['c2'], self.ih2(concat) + self.hh2(state['h2']))
     y = self.hy(h2)
     state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
     return y, state
Beispiel #4
0
    def forward(self, c, h, x):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous time step.
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
            ``c_new`` represents new cell state, and ``h_new`` is updated
            output of LSTM units.

        """
        if self.upward.W.data is None:
            in_size = x.size // x.shape[0]
            with cuda.get_device_from_id(self._device_id):
                self.upward._initialize_params(in_size)
                self._initialize_params()

        lstm_in = self.upward(x)
        if h is not None:
            lstm_in += self.lateral(h)
        if c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                c = variable.Variable(
                    xp.zeros((x.shape[0], self.state_size), dtype=x.dtype))
        return lstm.lstm(c, lstm_in)
    def forward(self, x):
        x, a = x

        if self.upward.W.data is None:
            with cuda.get_device_from_id(self._device_id):
                in_size = functools.reduce(operator.mul, x.shape[1:], 1)
                self.upward._initialize_params(in_size)
                self._initialize_params()

        if self.Wa.W.array is None:
            in_size = a.size // a.shape[0]
            with cuda.get_device_from_id(self._device_id):
                self.Wa._initialize_params(in_size)
                self._initialize_fusion_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == h_size:
                vt = self.Wh(self.h) * self.Wa(a)
                lstm_in += self.lateral(vt)
            else:
                raise NotImplementedError()
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm_activation.lstm(self.c, lstm_in)
        self.h = y
        return y
Beispiel #6
0
    def __call__(self, c, h, x):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous time step.
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
                ``c_new`` represents new cell state, and ``h_new`` is updated
                output of LSTM units.

        """
        if self.upward.has_uninitialized_params:
            in_size = x.size // len(x.data)
            self.upward._initialize_params(in_size)
            self._initialize_params()

        lstm_in = self.upward(x)
        if h is not None:
            lstm_in += self.lateral(h)
        if c is None:
            xp = self.xp
            c = variable.Variable(
                xp.zeros((x.shape[0], self.state_size), dtype=x.dtype),
                volatile='auto')
        return lstm.lstm(c, lstm_in)
Beispiel #7
0
    def forward(self, c, h, x):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous time step.
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
            ``c_new`` represents new cell state, and ``h_new`` is updated
            output of LSTM units.

        """
        if self.upward.W.array is None:
            in_size = x.size // x.shape[0]
            with chainer.using_device(self.device):
                self.upward._initialize_params(in_size)
                self._initialize_params()

        lstm_in = self.upward(x)
        if h is not None:
            lstm_in += self.lateral(h)
        if c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                c = variable.Variable(
                    xp.zeros((x.shape[0], self.state_size), dtype=x.dtype))
        return lstm.lstm(c, lstm_in)
Beispiel #8
0
def _lstm(x, h, c, w, b):
    xw = _stack_weight([w[2], w[0], w[1], w[3]])
    hw = _stack_weight([w[6], w[4], w[5], w[7]])
    xb = _stack_weight([b[2], b[0], b[1], b[3]])
    hb = _stack_weight([b[6], b[4], b[5], b[7]])
    lstm_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
    c_bar, h_bar = lstm.lstm(c, lstm_in)
    return h_bar, c_bar
Beispiel #9
0
def _lstm(x, h, c, w, b):
    xw = _stack_weight([w[2], w[0], w[1], w[3]])
    hw = _stack_weight([w[6], w[4], w[5], w[7]])
    xb = _stack_weight([b[2], b[0], b[1], b[3]])
    hb = _stack_weight([b[6], b[4], b[5], b[7]])
    lstm_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
    c_bar, h_bar = lstm.lstm(c, lstm_in)
    return h_bar, c_bar
Beispiel #10
0
    def __call__(self, x, W_lateral, b_lateral):

        lstm_in = x
        if self.h is not None:
            lstm_in += linear.linear(self.h, W_lateral, b_lateral)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(
                xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype))
        self.c, self.h = lstm.lstm(self.c, lstm_in)
        return self.h
Beispiel #11
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.
        Args:
            x (~chainer.Variable): A new batch from the input sequence.
        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.
        """
        if self.upward.has_uninitialized_params:
            in_size = x.size // x.shape[0]
            self.upward._initialize_params(in_size)
            self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than the '
                       'size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(self.h, [batch],
                                                         axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((batch, self.state_size),
                                                dtype=x.dtype),
                                       volatile='auto')
        # self.c, y = lstm.lstm(self.c, lstm_in)

        c, y = lstm.lstm(self.c, lstm_in)
        enable = (x.data != -1)
        self.c = where(enable, c, self.c)
        if self.h is not None:
            y = where(enable, y, self.h)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Beispiel #12
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.has_uninitialized_params:
            with cuda.get_device_from_id(self._device_id):
                in_size = x.size // x.shape[0]
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((batch, self.state_size), dtype=x.dtype),
                    volatile='auto')
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.data is None:
            with cuda.get_device_from_id(self._device_id):
                in_size = functools.reduce(operator.mul, x.shape[1:], 1)
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(self.h, [batch],
                                                         axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with cuda.get_device_from_id(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Beispiel #14
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.W.array is None:
            with chainer.using_device(self.device):
                in_size = utils.size_of_shape(x.shape[1:])
                self.upward._initialize_params(in_size)
                self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)
        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than'
                       'the size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    self.xp.zeros((batch, self.state_size), dtype=x.dtype))
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.array) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Beispiel #15
0
def test_t_is_10_nonzero_c_sequence_output():
    np.random.seed(2)
    N = 1
    T = 10
    C1 = 128
    C2 = 64
    vx = np.random.normal(size=(N, T, C1)).astype(np.float32)
    vw_input = np.random.normal(size=(C1, C2 * 4)).astype(np.float32)
    vw_hidden = np.random.normal(size=(C2, C2 * 4)).astype(np.float32)
    vb = np.random.normal(size=(C2 * 4,)).astype(np.float32)
    vc_in = np.random.normal(size=(N, C2)).astype(np.float32)
    vc_out = vc_in.copy()
    vh_in = np.random.normal(size=(N, C2)).astype(np.float32)
    vh = vh_in

    vw_input_c = _convert_to_chainer_order(vw_input)
    vw_hidden_c = _convert_to_chainer_order(vw_hidden)
    vb_c = _convert_to_chainer_order(vb[None, :])
    vh_sequence = []

    for i in range(T):
        vc_out, vh = lstm(vc_out, linear(vx[:, i, :], vw_input_c.T) + linear(vh, vw_hidden_c.T) + vb_c)
        vh_sequence.append(vh.data)

    vh = np.array(vh_sequence).transpose((1, 0, 2))  # TNC -> NTC
    vc_out = vc_out.data

    x = Variable(vx.shape, order=OrderNTC)
    c_in = ConstantVariable(vc_in, order=OrderNC)
    vh_in = ConstantVariable(vh_in, order=OrderNC)
    w_input = ConstantVariable(vw_input, order=OrderCN)
    w_hidden = ConstantVariable(vw_hidden, order=OrderCN)
    b = ConstantVariable(vb, order=OrderC)
    y, c_out = LSTM(None, return_sequences=True, use_bias=True, use_initial_c=True, use_initial_h=True,
                    activation="tanh", recurrent_activation="sigmoid")(x, w_input, w_hidden, b, initial_c=c_in,
                                                                       initial_h=vh_in)

    generate_kernel_test_case(
        description=f"LSTM t=10 initial_c,initial_h=nonzero sequence_out",
        backend=["webassembly", "webgpu"],
        graph=Graph([x], [y, c_out]),
        inputs={x: vx},
        expected={y: vh, c_out: vc_out},
        EPS=1e-3,
        ABS_EPS=1e-7
    )
Beispiel #16
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype), volatile="auto")
        self.c, self.h = lstm.lstm(self.c, lstm_in)
        return self.h
Beispiel #17
0
            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                counter = 0
                for x in xs_list:
                    counter += 1
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)
                    if counter == 4:
                        lstm_in = linear.linear(x, xws[layer_idx],
                                                xbs[layer_idx])
                    else:
                        lstm_in = linear.linear(
                            x, xws[layer_idx], xbs[layer_idx]) + linear.linear(
                                h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list
Beispiel #18
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(
                xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype),
                volatile='auto')
        self.c, self.h = lstm.lstm(self.c, lstm_in)
        return self.h
Beispiel #19
0
    def __call__(self, c, h):
        """Returns new cell state and updated output of LSTM.

        Args:
            c (~chainer.Variable): Cell states of LSTM units.
            h (~chainer.Variable): Output at the previous timestep.
            For a grid LSTM the initial h and c should be a transform
            of the input x
            For a grid LSTM h acts as the input at each step which itself
            is a transform of the original input for the first step

        Returns:
            tuple of ~chainer.Variable: Returns ``(c_new, h_new)``, where
                ``c_new`` represents new cell state, and ``h_new`` is updated
                output of LSTM units.

        """
        assert c is not None
        assert h is not None
        lstm_in = self.lateral(h)
        return lstm.lstm(c, lstm_in)
Beispiel #20
0
            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio,
                                            train=train)
                    lstm_in = linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) + \
                        linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list
Beispiel #21
0
    def __call__(self, x,Whx,Wmx,Wmh,Whm):
        """Updates the internal state and returns the LSTM outputs.
        Args:
            x (~chainer.Variable): A new batch from the input sequence.
        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.
        """
    #    if self.upward.has_uninitialized_params:
    #        in_size = x.size // x.shape[0]
    #        self.upward._initialize_params(in_size)
    #        self._initialize_params()
    #    if self.upward2.has_uninitialized_params:
    #        in_size = x.size // x.shape[0]
    #        self.upward2._initialize_params(in_size)
    #        self._initialize_params()

        batch = x.shape[0]
    #    Whx = self.upward()

    #    Wmx = self.upward2()

        factor_in = F.linear(x,Wmx)
        lstm_in = F.linear(x,Whx,self.b)

        h_rest = None
        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than the '
                       'size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
    #            Wmh = self.lateral1()

                mult_in = F.linear(h_update,Wmh)

                mult_out = mult_in*factor_in
        #        Whm = self.lateral2()
                lstm_in += F.linear(mult_out,Whm)

            else:
    #            Wmh = self.lateral1()

                mult_in = F.linear(self.h,Wmh)

                mult_out = mult_in*factor_in
        #        Whm = self.lateral2()
                lstm_in += F.linear(mult_out,Whm)

        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((batch, self.state_size), dtype=x.dtype),volatile='auto')
        self.c, y = lstm.lstm(self.c, lstm_in)

        if h_rest is None:
            self.h = y
        elif len(y.data) == 0:
            self.h = h_rest
        else:
            self.h = concat.concat([y, h_rest], axis=0)

        return y
Beispiel #22
0
def n_step_lstm(n_layers,
                dropout_ratio,
                hx,
                cx,
                ws,
                bs,
                xs,
                train=True,
                use_cudnn=True):
    """Stacked Long Short-Term Memory function for sequence inputs.

    This function calculates stacked LSTM with sequences. This function gets
    an initial hidden state :math:`h_0`, an initial cell state :math:`c_0`,
    an input sequence :math:`x`, weight matrices :math:`W`, and bias vectors
    :math:`b`.
    This function calculates hidden states :math:`h_t` and :math:`c_t` for each
    time :math:`t` from input :math:`x_t`.

    .. math::

       i_t = \sigma(W_0 x_t + W_4 h_{t-1} + b_0 + b_4)
       f_t = \sigma(W_1 x_t + W_5 h_{t-1} + b_1 + b_5)
       o_t = \sigma(W_2 x_t + W_6 h_{t-1} + b_2 + b_6)
       a_t = \tanh(W_3 x_t + W_7 h_{t-1} + b_3 + b_7)
       c_t = f_t \dot c_{t-1} + i_t \dot a_t
       h_t = o_t \dot \tanh(c_t)

    As the function accepts a sequence, it calculates :math:`h_t` for all
    :math:`t` with one call. Eight weight matrices and eight bias vectors are
    required for each layers. So, when :math:`S` layers exists, you need to
    prepare :math:`8S` weigth matrices and :math:`8S` bias vectors.

    If the number of layers ``n_layers`` is greather than :math:`1`, input
    of ``k``-th layer is hidden state ``h_t`` of ``k-1``-th layer.
    Note that all input variables except first layer may have different shape
    from the first layer.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        cx (chainer.Variable): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        train (bool): If ``True``, this function executes dropout.
        use_cudnn (bool): If ``True``, this function uses cuDNN if available.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``cy`` is an updated cell states whose shape is same as ``cx``.
            - ``ys`` is a list of :class:~chainer.Variable. Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.lstm`

    """

    xp = cuda.get_array_module(hx, hx.data)

    if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \
       _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(
            itertools.chain((hx, cx), itertools.chain.from_iterable(ws),
                            itertools.chain.from_iterable(bs), xs))
        rnn = NStepLSTM(n_layers, states, train=train)
        ret = rnn(*inputs)
        hy, cy = ret[:2]
        ys = ret[2:]
        return hy, cy, ys

    else:
        hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        ys = []
        for x in xs:
            batch = x.shape[0]
            h_next = []
            c_next = []
            for layer in six.moves.range(n_layers):
                h = hx[layer]
                c = cx[layer]
                if h.shape[0] > batch:
                    h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                else:
                    h_rest = None

                x = dropout.dropout(x, ratio=dropout_ratio, train=train)
                h = dropout.dropout(h, ratio=dropout_ratio, train=train)
                lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \
                    linear.linear(h, hws[layer], hbs[layer])

                c_bar, h_bar = lstm.lstm(c, lstm_in)
                if h_rest is not None:
                    h = concat.concat([h_bar, h_rest], axis=0)
                    c = concat.concat([c_bar, c_rest], axis=0)
                else:
                    h = h_bar
                    c = c_bar
                h_next.append(h)
                c_next.append(c)
                x = h_bar
            hx = h_next
            cx = c_next
            ys.append(x)

        hy = stack.stack(hx)
        cy = stack.stack(cx)
        return hy, cy, tuple(ys)
Beispiel #23
0
def n_step_lstm(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, train=True,
        use_cudnn=True):
    """Stacked Long Short-Term Memory function for sequence inputs.

    This function calculates stacked LSTM with sequences. This function gets
    an initial hidden state :math:`h_0`, an initial cell state :math:`c_0`,
    an input sequence :math:`x`, weight matrices :math:`W`, and bias vectors
    :math:`b`.
    This function calculates hidden states :math:`h_t` and :math:`c_t` for each
    time :math:`t` from input :math:`x_t`.

    .. math::

       i_t &= \\sigma(W_0 x_t + W_4 h_{t-1} + b_0 + b_4) \\\\
       f_t &= \\sigma(W_1 x_t + W_5 h_{t-1} + b_1 + b_5) \\\\
       o_t &= \\sigma(W_2 x_t + W_6 h_{t-1} + b_2 + b_6) \\\\
       a_t &= \\tanh(W_3 x_t + W_7 h_{t-1} + b_3 + b_7) \\\\
       c_t &= f_t \\dot c_{t-1} + i_t \\dot a_t \\\\
       h_t &= o_t \\dot \\tanh(c_t)

    As the function accepts a sequence, it calculates :math:`h_t` for all
    :math:`t` with one call. Eight weight matrices and eight bias vectors are
    required for each layers. So, when :math:`S` layers exists, you need to
    prepare :math:`8S` weigth matrices and :math:`8S` bias vectors.

    If the number of layers ``n_layers`` is greather than :math:`1`, input
    of ``k``-th layer is hidden state ``h_t`` of ``k-1``-th layer.
    Note that all input variables except first layer may have different shape
    from the first layer.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        cx (chainer.Variable): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        train (bool): If ``True``, this function executes dropout.
        use_cudnn (bool): If ``True``, this function uses cuDNN if available.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``cy`` is an updated cell states whose shape is same as ``cx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.lstm`

    """

    xp = cuda.get_array_module(hx, hx.data)

    if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \
       _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(itertools.chain(
            (hx, cx),
            itertools.chain.from_iterable(ws),
            itertools.chain.from_iterable(bs),
            xs))
        rnn = NStepLSTM(n_layers, states, train=train)
        ret = rnn(*inputs)
        hy, cy = ret[:2]
        ys = ret[2:]
        return hy, cy, ys

    else:
        hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        ys = []
        for x in xs:
            batch = x.shape[0]
            h_next = []
            c_next = []
            for layer in six.moves.range(n_layers):
                h = hx[layer]
                c = cx[layer]
                if h.shape[0] > batch:
                    h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                else:
                    h_rest = None

                x = dropout.dropout(x, ratio=dropout_ratio, train=train)
                h = dropout.dropout(h, ratio=dropout_ratio, train=train)
                lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \
                    linear.linear(h, hws[layer], hbs[layer])

                c_bar, h_bar = lstm.lstm(c, lstm_in)
                if h_rest is not None:
                    h = concat.concat([h_bar, h_rest], axis=0)
                    c = concat.concat([c_bar, c_rest], axis=0)
                else:
                    h = h_bar
                    c = c_bar
                h_next.append(h)
                c_next.append(c)
                x = h_bar
            hx = h_next
            cx = c_next
            ys.append(x)

        hy = stack.stack(hx)
        cy = stack.stack(cx)
        return hy, cy, tuple(ys)
Beispiel #24
0
def fixed_length_n_step_lstm(
    n_layers,
    dropout_ratio,
    hx,
    cx,
    ws,
    bs,
    xs,
    train=True,
):

    xp = cuda.get_array_module(hx, hx.data)

    if xp is not numpy and cuda.cudnn_enabled and _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(
            itertools.chain((hx, cx), itertools.chain.from_iterable(ws),
                            itertools.chain.from_iterable(bs), (xs, )))
        rnn = FixedLengthNStepLSTMFunction(n_layers, states, train=train)
        ret = rnn(*inputs)
        hy, cy, ys = ret
        _, batch_size, dim = hy.shape
        ys_reshape = F.reshape(ys,
                               (-1, batch_size, dim))  # (length, batch, dim)
        return hy, cy, ys_reshape

    else:
        hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        ys = []
        for x in xs:
            batch = x.shape[0]
            h_next = []
            c_next = []
            for layer in six.moves.range(n_layers):
                h = hx[layer]
                c = cx[layer]
                if h.shape[0] > batch:
                    h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                else:
                    h_rest = None

                x = dropout.dropout(x, ratio=dropout_ratio)
                h = dropout.dropout(h, ratio=dropout_ratio)
                lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \
                          linear.linear(h, hws[layer], hbs[layer])

                c_bar, h_bar = lstm.lstm(c, lstm_in)
                if h_rest is not None:
                    h = concat.concat([h_bar, h_rest], axis=0)
                    c = concat.concat([c_bar, c_rest], axis=0)
                else:
                    h = h_bar
                    c = c_bar
                h_next.append(h)
                c_next.append(c)
                x = h_bar
            hx = h_next
            cx = c_next
            ys.append(x)

        hy = stack.stack(hx)
        cy = stack.stack(cx)
        #return hy, cy, tuple(ys)
        ys_concat = F.concat(ys, axis=0)
        ys_reshape = F.reshape(
            ys_concat,
            (-1, ys[0].shape[0], ys[0].shape[1]))  # (length, batch, dim)

        return hy, cy, ys_reshape