def __call__(self, x, h, c):
     ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h))
     ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
     ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h))
     c = ft * c + (1 - ft)) * ct
     h = ot * tanh.tanh(c)
     return h, c
Exemple #2
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))
        lstm_in = reshape.reshape(lstm_in,
                                  (len(lstm_in), lstm_in.shape[1] // 4, 4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, a.shape[:2])
        i = reshape.reshape(i, i.shape[:2])
        f = reshape.reshape(f, f.shape[:2])
        o = reshape.reshape(o, o.shape[:2])
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h
Exemple #3
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((x.shape[0], self.state_size), dtype=x.dtype), volatile="auto")
        lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.shape[1] // 4, 4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, (len(a.data), a.shape[1]))
        i = reshape.reshape(i, (len(i.data), i.shape[1]))
        f = reshape.reshape(f, (len(f.data), f.shape[1]))
        o = reshape.reshape(o, (len(o.data), o.shape[1]))
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h
Exemple #4
0
    def forward(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            with chainer.using_device(self.device):
                self.c = variable.Variable(
                    xp.zeros((len(x), self.state_size), dtype=x.dtype))
        lstm_in = reshape.reshape(
            lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, a.shape[:2])
        i = reshape.reshape(i, i.shape[:2])
        f = reshape.reshape(f, f.shape[:2])
        o = reshape.reshape(o, o.shape[:2])
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h
Exemple #5
0
    def __call__(self, *cshsx):
        """Returns new cell state and output of Child-Sum TreeLSTM.

        Args:
            cshsx (list of :class:`~chainer.Variable`): Variable arguments
                which include all cell vectors and all output vectors of
                variable children, and an input vector.

        Returns:
            tuple of ~chainer.Variable: Returns
            :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents
            new cell state vector, and :math:`h_{new}` is new output
            vector.

        """

        cs = cshsx[:len(cshsx) // 2]
        hs = cshsx[len(cshsx) // 2:-1]
        x = cshsx[-1]
        assert (len(cshsx) % 2 == 1)
        assert (len(cs) == len(hs))

        if x is None:
            if any(c is not None for c in cs):
                base = [c for c in cs if c is not None][0]
            elif any(h is not None for h in hs):
                base = [h for h in hs if h is not None][0]
            else:
                raise ValueError('All inputs (cs, hs, x) are None.')
            batchsize, dtype = base.shape[0], base.dtype
            x = self.xp.zeros((batchsize, self.in_size), dtype=dtype)

        W_x_in = self.W_x(x)
        W_x_aio_in, W_x_f_in = split_axis.split_axis(W_x_in,
                                                     [3 * self.state_size],
                                                     axis=1)

        if len(hs) == 0:
            aio_in = W_x_aio_in
            a, i, o = split_axis.split_axis(aio_in, 3, axis=1)
            c = sigmoid.sigmoid(i) * tanh.tanh(a)
            h = sigmoid.sigmoid(o) * tanh.tanh(c)
            return c, h

        hs = self._pad_zero_nodes(hs, (x.shape[0], self.state_size),
                                  dtype=x.dtype)
        cs = self._pad_zero_nodes(cs, (x.shape[0], self.state_size),
                                  dtype=x.dtype)

        aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in
        W_h_fs_in = concat.concat(split_axis.split_axis(self.W_h_f(
            concat.concat(hs, axis=0)),
                                                        len(hs),
                                                        axis=0),
                                  axis=1)
        f_in = W_h_fs_in + \
            concat.concat([W_x_f_in] * len(hs), axis=1)
        tree_lstm_in = concat.concat([aio_in, f_in], axis=1)

        return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
 def __call__(self, x, h, c):
     ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h) + self.W_fc(c))
     ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
     c = ft * c + (1 - ft) * ct
     ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h) + self.W_oc(c))
     h = ot * tanh.tanh(c)
     return h, c
Exemple #7
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(xp.zeros((len(x.data), self.state_size),
                                                dtype=x.data.dtype),
                                       volatile='auto')
        lstm_in = reshape.reshape(
            lstm_in, (len(lstm_in.data), lstm_in.data.shape[1] // 4, 4))
        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, (len(a.data), a.data.shape[1]))
        i = reshape.reshape(i, (len(i.data), i.data.shape[1]))
        f = reshape.reshape(f, (len(f.data), f.data.shape[1]))
        o = reshape.reshape(o, (len(o.data), o.data.shape[1]))
        peep_in_i = self.peep_i(self.c)
        peep_in_f = self.peep_f(self.c)
        a = tanh.tanh(a)
        i = sigmoid.sigmoid(i + peep_in_i)
        f = sigmoid.sigmoid(f + peep_in_f)
        self.c = a * i + f * self.c
        peep_in_o = self.peep_o(self.c)
        o = sigmoid.sigmoid(o + peep_in_o)
        self.h = o * tanh.tanh(self.c)
        return self.h
 def __call__(self, x, h, c):
     ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h))
     it = sigmoid.sigmoid(self.W_ix(x) + self.W_ih(h))
     ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
     ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h))
     c = ft * c + it * ct
     h = ot * tanh.tanh(c)
     return h, c
 def __call__(self, x, h, c):
     ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h) + self.W_fc(c))
     it = sigmoid.sigmoid(self.W_ix(x) + self.W_ih(h) + self.W_ic(c))
     ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
     c = ft * c + it * ct
     ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h) + self.W_oc(c))
     h = ot * tanh.tanh(c)
     return h, c
Exemple #10
0
    def forward(self, *cshsx):
        """Returns new cell state and output of Child-Sum TreeLSTM.

        Args:
            cshsx (list of :class:`~chainer.Variable`): Variable arguments
                which include all cell vectors and all output vectors of
                variable children, and an input vector.

        Returns:
            tuple of ~chainer.Variable: Returns
            :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents
            new cell state vector, and :math:`h_{new}` is new output
            vector.

        """

        cs = cshsx[:len(cshsx) // 2]
        hs = cshsx[len(cshsx) // 2:-1]
        x = cshsx[-1]
        assert(len(cshsx) % 2 == 1)
        assert(len(cs) == len(hs))

        if x is None:
            if any(c is not None for c in cs):
                base = [c for c in cs if c is not None][0]
            elif any(h is not None for h in hs):
                base = [h for h in hs if h is not None][0]
            else:
                raise ValueError('All inputs (cs, hs, x) are None.')
            batchsize, dtype = base.shape[0], base.dtype
            x = self.xp.zeros(
                (batchsize, self.in_size), dtype=dtype)

        W_x_in = self.W_x(x)
        W_x_aio_in, W_x_f_in = split_axis.split_axis(
            W_x_in, [3 * self.state_size], axis=1)

        if len(hs) == 0:
            aio_in = W_x_aio_in
            a, i, o = split_axis.split_axis(aio_in, 3, axis=1)
            c = sigmoid.sigmoid(i) * tanh.tanh(a)
            h = sigmoid.sigmoid(o) * tanh.tanh(c)
            return c, h

        hs = self._pad_zero_nodes(
            hs, (x.shape[0], self.state_size), dtype=x.dtype)
        cs = self._pad_zero_nodes(
            cs, (x.shape[0], self.state_size), dtype=x.dtype)

        aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in
        W_h_fs_in = concat.concat(split_axis.split_axis(
            self.W_h_f(concat.concat(hs, axis=0)), len(hs), axis=0),
            axis=1)
        f_in = W_h_fs_in + \
            concat.concat([W_x_f_in] * len(hs), axis=1)
        tree_lstm_in = concat.concat([aio_in, f_in], axis=1)

        return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
Exemple #11
0
    def forward(self, x, y):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        if self.upward.has_uninitialized_params:
            in_size = x.size // x.shape[0]
            self.upward._initialize_params(in_size)
            self._initialize_params()

        batch = x.shape[0]
        lstm_in = self.upward(x)

        if self.h is not None:
            h_size = self.h.shape[0]
            if batch == 0:
                h_rest = self.h
            elif h_size < batch:
                msg = ('The batch size of x must be equal to or less than the '
                       'size of the previous state h.')
                raise TypeError(msg)
            elif h_size > batch:
                h_update, h_rest = split_axis.split_axis(
                    self.h, [batch], axis=0)
                lstm_in += self.lateral(h_update)
            else:
                lstm_in += self.lateral(self.h)
        if self.c is None:
            xp = self.xp
            self.c = variable.Variable(
                xp.zeros((batch, self.state_size), dtype=x.dtype),
                volatile='auto')

        r = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.data.shape[1] // 4, 4) + lstm_in.data.shape[2:])
        a, i, f, o = [r[:, :, i] for i in range(4)]

        # self.c, y = lstm.lstm(self.c,lstm_in)

        a = tanh.tanh(a)  # tanh.tanh(a)
        i = sigmoid.sigmoid(i)
        f = sigmoid.sigmoid(f)
        o = sigmoid.sigmoid(o)

        self.c = a * i + f * self.c + tanh(self.w_y(y))
        self.h = o * tanh.tanh(self.c)

        return self.h
Exemple #12
0
	def __call__(self, h, x):
		x_g = self.W_xh(x)
		z_g = tanh.tanh(self.W_zxh(x_g * h))
		z_out = sigmoid.sigmoid(self.W_go(z_g * h))
		z_t = hard_sigmoid(self.W_xz(x) + self.W_hz(h))
		h_t = (1 - z_t) * h + z_t * z_out
		return h_t
            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (
                        linear.linear(x, xws[layer_idx], xbs[layer_idx]) +
                        linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Exemple #14
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Exemple #15
0
 def __call__(self, h, x):
     x_g = self.W_xh(x)
     z_g = tanh.tanh(self.W_zxh(x_g * h))
     z_out = sigmoid.sigmoid(self.W_go(z_g * h))
     z_t = hard_sigmoid(self.W_xz(x) + self.W_hz(h))
     h_t = (1 - z_t) * h + z_t * z_out
     return h_t
Exemple #16
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Exemple #17
0
            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) +
                              linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Exemple #18
0
	def __call__(self, h, x):
		x_g = self.W_xh(x)
		z_g = tanh.tanh(self.W_zxh(x_g * h))
		z_out = sigmoid.sigmoid(self.W_go(z_g * h))
		z_t = hard_sigmoid(self.W_xz(x) + self.W_hz(h))
		h_t = linear_interpolate(z_t, z_out, h)
		return h_t
Exemple #19
0
 def f(x, h, c, w, b):
     xw, hw = w
     xb, hb = b
     rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
     if activation == 'tanh':
         return tanh.tanh(rnn_in), None
     elif activation == 'relu':
         return relu.relu(rnn_in), None
Exemple #20
0
 def f(x, h, c, w, b):
     xw, hw = w
     xb, hb = b
     rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
     if activation == 'tanh':
         return tanh.tanh(rnn_in), None
     elif activation == 'relu':
         return relu.relu(rnn_in), None
Exemple #21
0
    def __call__(self, x):
        """Updates the internal state and returns the LSTM outputs.

        Args:
            x (~chainer.Variable): A new batch from the input sequence.

        Returns:
            ~chainer.Variable: Outputs of updated LSTM units.

        """
        lstm_in = self.upward(x)
        if self.h is not None:
            lstm_in += self.lateral(self.h)
        else:
            xp = self.xp
            with cuda.get_device(self._device_id):
                self.h = variable.Variable(
                    xp.zeros((len(x.data), self.state_size),
                             dtype=x.data.dtype),
                    volatile='auto')
        if self.c is None:
            xp = self.xp
            with cuda.get_device(self._device_id):
                self.c = variable.Variable(
                    xp.zeros((len(x.data), self.state_size),
                             dtype=x.data.dtype),
                    volatile='auto')

        lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data),
                                            lstm_in.data.shape[1] // 4,
                                            4))

        a, i, f, o = split_axis.split_axis(lstm_in, 4, 2)
        a = reshape.reshape(a, (len(a.data), self.state_size))
        i = reshape.reshape(i, (len(i.data), self.state_size))
        f = reshape.reshape(f, (len(f.data), self.state_size))
        o = reshape.reshape(o, (len(o.data), self.state_size))

        c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c
        self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio, self.train)
        self.h = zoneout.zoneout(self.h,
                                 sigmoid.sigmoid(o) * tanh.tanh(c_tmp),
                                 self.h_ratio, self.train)
        return self.h
    def __call__(self, x):
        ft = self.W_fx(x)
        ct = self.W_cx(x)
        ot = self.W_ox(x)

        if self.h is not None and self.c is not None:
            ft += self.W_fh(h) + self.W_fc(self.c)
            ct += self.W_ch(h)
            ot += self.W_oh(h)
        ft = sigmoid.sigmoid(ft)
        ct = tanh.tanh(ct)
        ot = sigmoid.sigmoid(ot + self.W_oc(ct))

        c = (1 - ft) * ct
        if self.c is not none:
            self.c += ft * c

        self.h = ot * tanh.tanh(self.c)
        return self.h
    def __call__(self, x):
        ft = self.W_fx(x)
        ct = self.W_cx(x)
        ot = self.W_ox(x)

        if self.h is not None and self.c is not None:
            ft += self.W_fh(h) + self.W_fc(self.c)
            ct += self.W_ch(h)
            ot += self.W_oh(h)
        ft = sigmoid.sigmoid(ft)
        ct = tanh.tanh(ct)
        ot = sigmoid.sigmoid(ot + self.W_oc(ct))

        c = (1 - ft) * ct
        if self.c is not none:
            self.c += ft * c

        self.h = ot * tanh.tanh(self.c)
        return self.h
    def __call__(self, x):
        ft = self.W_fx(x)
        it = self.W_ix(x)
        ct = self.W_cx(x)
        ot = self.W_ox(x)

        if self.h is not None:
            ft += self.W_fh(h)
            it += self.W_ih(h)
            ct += self.W_ch(h)
            ot += self.W_oh(h)
        ft = sigmoid.sigmoid(ft)
        it = sigmoid.sigmoid(it)
        ct = tanh.tanh(ct)
        ot = sigmoid.sigmoid(ot)

        c = it * ct
        if self.c is not none:
            c += ft * self.c
        self.c = c
        self.h = ot * tanh.tanh(self.c)
        return self.h
    def __call__(self, x):
        ft = self.W_fx(x)
        it = self.W_ix(x)
        ct = self.W_cx(x)
        ot = self.W_ox(x)

        if self.h is not None:
            ft += self.W_fh(h)
            it += self.W_ih(h)
            ct += self.W_ch(h)
            ot += self.W_oh(h)
        ft = sigmoid.sigmoid(ft)
        it = sigmoid.sigmoid(it)
        ct = tanh.tanh(ct)
        ot = sigmoid.sigmoid(ot)

        c = it * ct
        if self.c is not none:
            c += ft * self.c
        self.c = c
        self.h = ot * tanh.tanh(self.c)
        return self.h
Exemple #26
0
    def __call__(self, x):
        z = self.W_z(x)
        h_bar = self.W(x)
        if self.h is not None:
            r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h))
            z += self.U_z(self.h)
            h_bar += self.U(r * self.h)
        z = sigmoid.sigmoid(z)
        h_bar = tanh.tanh(h_bar)

        h_new = z * h_bar
        if self.h is not None:
            h_new += (1 - z) * self.h
        self.h = h_new
        return self.h
Exemple #27
0
    def __call__(self, x):
        z = self.W_z(x)
        h_bar = self.W(x)
        if self.h is not None:
            r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h))
            z += self.U_z(self.h)
            h_bar += self.U(r * self.h)
        z = sigmoid.sigmoid(z)
        h_bar = tanh.tanh(h_bar)

        h_new = z * h_bar
        if self.h is not None:
            h_new += (1 - z) * self.h
        self.h = h_new
        return self.h
Exemple #28
0
def _gru(x, h, c, w, b):
    xw = concat.concat([w[0], w[1], w[2]], axis=0)
    hw = concat.concat([w[3], w[4], w[5]], axis=0)
    xb = concat.concat([b[0], b[1], b[2]], axis=0)
    hb = concat.concat([b[3], b[4], b[5]], axis=0)

    gru_x = linear.linear(x, xw, xb)
    gru_h = linear.linear(h, hw, hb)

    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

    r = sigmoid.sigmoid(W_r_x + U_r_h)
    z = sigmoid.sigmoid(W_z_x + U_z_h)
    h_bar = tanh.tanh(W_x + r * U_x)
    return (1 - z) * h_bar + z * h, None
Exemple #29
0
    def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1)))
            w = self.vw(v)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e),
                                        (batch_size, self.hidden_size))
        return context, e_list, sum_e
Exemple #30
0
    def faster_call(self, h, x):
        r_z_h_x = self.W_r_z_h(x)
        r_x, z_x, h_x = split_axis(r_z_h_x, (self.n_units, self.n_units * 2),
                                   axis=1)
        assert r_x.data.shape[1] == self.n_units
        assert z_x.data.shape[1] == self.n_units
        assert h_x.data.shape[1] == self.n_units

        r_z_h = self.U_r_z(h)
        r_h, z_h = split_axis(r_z_h, (self.n_units, ), axis=1)

        r = sigmoid.sigmoid(r_x + r_h)
        z = sigmoid.sigmoid(z_x + z_h)
        h_bar = tanh.tanh(h_x + self.U(r * h))
        h_new = (1 - z) * h + z * h_bar
        return h_new
Exemple #31
0
    def __call__(self, x):
        z = self.W_z(x)
        h_bar = self.W(x)
        if self.h is not None:
            r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h))
            z += self.U_z(self.h)
            h_bar += self.U(r * self.h)
        z = sigmoid.sigmoid(z)
        h_bar = tanh.tanh(h_bar)

        if self.h is not None:
            h_new = linear_interpolate.linear_interpolate(z, h_bar, self.h)
        else:
            h_new = z * h_bar
        self.h = h_new
        return self.h
Exemple #32
0
def _gru(x, h, c, w, b):
    xw = concat.concat([w[0], w[1], w[2]], axis=0)
    hw = concat.concat([w[3], w[4], w[5]], axis=0)
    xb = concat.concat([b[0], b[1], b[2]], axis=0)
    hb = concat.concat([b[3], b[4], b[5]], axis=0)

    gru_x = linear.linear(x, xw, xb)
    gru_h = linear.linear(h, hw, hb)

    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

    r = sigmoid.sigmoid(W_r_x + U_r_h)
    z = sigmoid.sigmoid(W_z_x + U_z_h)
    h_bar = tanh.tanh(W_x + r * U_x)
    return (1 - z) * h_bar + z * h, None
Exemple #33
0
    def forward(self, x):
        z = self.W_z(x)
        h_bar = self.W(x)
        if self.h is not None:
            r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h))
            z += self.U_z(self.h)
            h_bar += self.U(r * self.h)
        z = sigmoid.sigmoid(z)
        h_bar = tanh.tanh(h_bar)

        if self.h is not None:
            h_new = linear_interpolate.linear_interpolate(z, h_bar, self.h)
        else:
            h_new = z * h_bar
        self.h = h_new
        return self.h
Exemple #34
0
    def __call__(self, x):
        z = self.W_z(x)
        h_bar = self.W(x)
        if self.h is not None:
            r = hard_sigmoid.hard_sigmoid(self.W_r(x) + self.U_r(self.h))
            z += self.U_z(self.h)
            h_bar += self.U(r * self.h)  # this may differs by version
        z = hard_sigmoid.hard_sigmoid(z)
        h_bar = tanh.tanh(h_bar)

        if self.h is not None:
            h_new = linear_interpolate.linear_interpolate(z, self.h, h_bar)  #(z, h_bar, self.h)
        else:
            h_new = ( 1- z) * h_bar
        self.h = h_new
        
        return self.h
Exemple #35
0
 def _call_mgu(self, h, x):
     f = sigmoid.sigmoid(self.W_f(concat.concat([h, x])))
     h_bar = tanh.tanh(self.W_h(concat.concat([f * h, x])))
     h_new = linear_interpolate.linear_interpolate(f, h_bar, h)
     return h_new
Exemple #36
0
 def __call__(self, h, x):
     r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h))
     z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h))
     h_bar = tanh.tanh(self.W(x) + self.U(r * h))
     h_new = linear_interpolate.linear_interpolate(z, h_bar, h)
     return h_new
Exemple #37
0
 def __call__(self, h, x):
     r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h))
     z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h))
     h_bar = tanh.tanh(self.W(x) + self.U(r * h))
     h_new = (1 - z) * h + z * h_bar
     return h_new
Exemple #38
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = h0[layer_idx]

                # h:d_bar_s_1
                # h_bar:d_s
                '''
                print(len(xs_list))
                print(len(xs_list[0]))
                print(len(xs_list[0][0]))
                '''
                h_list = []
                h_bar_list = []
                c_s_list = []
                z_s_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h

                    phi_d = linear.linear(h_bar, W2, B2)
                    '''
                    print(type(phi_ht), len(phi_ht))
                    print(type(phi_ht[0]), len(phi_ht[0]))
                    print(type(phi_ht[0][0]), len(phi_ht[0][0]))
                    
                    print(type(phi_d), len(phi_d))
                    print(type(phi_d[0]), len(phi_d[0]), phi_d[0].shape)
                    '''
                    #phi_ht_len = [t.shape[1] for t in phi_ht]
                    #phi_ht_section = np.cumsum(phi_ht_len[:-1])
                    #concat_phi_ht  = F.concat(phi_ht, axis=1)
                    #concat_phi_d = [F.concat([phi_d[i]]*phi_ht_len[i], axis=0) for i in range(batch)]
                    #concat_phi_d = F.concat(concat_phi_d, axis=0)
                    #concat_phi_d = F.concat(F.transpose(phi_d), axis=0)

                    u_st = list(
                        map(
                            lambda x, y: reshape.reshape((linear.linear(
                                x, reshape.reshape(y, (1, len(y))))),
                                                         (len(x), )), phi_ht,
                            phi_d))  #(4)

                    sum_u = list(map(F.sum, u_st))
                    alpha_st = list(
                        map(lambda x, y: x / F.broadcast_to(y, x.shape), u_st,
                            sum_u))  #(3)
                    z_s = list(map(F.argmax, alpha_st))
                    z_s = list(map(lambda x: F.broadcast_to(x, (1, )), z_s))
                    z_s = F.concat(z_s, axis=0)
                    '''
                    print(type(alpha_st),len(alpha_st))
                    print(type(alpha_st[0]),len(alpha_st[0]))
                    
                    print(alpha_st[0].shape)
                    print(ht[0].shape)
                    '''
                    c_s = list(
                        map(
                            lambda x, y: F.sum(F.broadcast_to(
                                reshape.reshape(x,
                                                (x.shape[0], 1)), y.shape) * y,
                                               axis=0), alpha_st, ht))  #(2)

                    c_s_2d = list(
                        map(lambda x: reshape.reshape(x, (1, len(x))), c_s))
                    concat_c_s = F.concat(c_s_2d, axis=0)

                    c_s = list(
                        map(lambda x: F.broadcast_to(x, (1, len(x))), c_s))
                    c_s = F.concat(c_s, axis=0)
                    '''
                    print(type(c_s), len(c_s))
                    print(type(c_s[0]), len(c_s[0]), c_s[0].shape)
                    '''
                    h = F.relu(
                        linear.linear(F.concat([concat_c_s, h_bar], axis=1),
                                      W3, B3))

                    h_list.append(h)
                    h_bar_list.append(h_bar)
                    c_s_list.append(c_s)
                    z_s_list.append(z_s)

                    #単語数の違いを担保
                    if h_rest is not None:
                        h = concat.concat([h, h_rest], axis=0)
                        h_bar = concat.concat([h_bar, h_rest], axis=0)

                return h_list, h_bar_list, c_s_list, z_s_list
Exemple #39
0
def compute_output(z_x, z_h, h_x, h, hh):
    z = sigmoid.sigmoid(z_x + z_h)
    h_bar = tanh.tanh(h_x + hh)
    h_new = (1 - z) * h + z * h_bar
    return h_new
Exemple #40
0
 def forward(self, h, x):
     r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h))
     z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h))
     h_bar = tanh.tanh(self.W(x) + self.U(r * h))
     h_new = linear_interpolate.linear_interpolate(z, h_bar, h)
     return h_new
Exemple #41
0
 def __call__(self, h, x):
     r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h))
     z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h))
     h_bar = tanh.tanh(self.W(x) + self.U(r * h))
     h_new = (1 - z) * h + z * h_bar
     return h_new
Exemple #42
0
 def _call_mgu(self, h, x):
     f = sigmoid.sigmoid(self.W_f(concat.concat([h, x])))
     h_bar = tanh.tanh(self.W_h(concat.concat([f * h, x])))
     h_new = linear_interpolate.linear_interpolate(f, h_bar, h)
     return h_new