Example #1
0
 def functions(self):
     return collections.OrderedDict([
         ('conv1_1', [self.conv1_1, relu]),
         ('conv1_2', [self.conv1_2, relu]),
         ('pool1', [_max_pooling_2d]),
         ('conv2_1', [self.conv2_1, relu]),
         ('conv2_2', [self.conv2_2, relu]),
         ('pool2', [_max_pooling_2d]),
         ('conv3_1', [self.conv3_1, relu]),
         ('conv3_2', [self.conv3_2, relu]),
         ('conv3_3', [self.conv3_3, relu]),
         ('conv3_4', [self.conv3_4, relu]),
         ('pool3', [_max_pooling_2d]),
         ('conv4_1', [self.conv4_1, relu]),
         ('conv4_2', [self.conv4_2, relu]),
         ('conv4_3', [self.conv4_3, relu]),
         ('conv4_4', [self.conv4_4, relu]),
         ('pool4', [_max_pooling_2d]),
         ('conv5_1', [self.conv5_1, relu]),
         ('conv5_2', [self.conv5_2, relu]),
         ('conv5_3', [self.conv5_3, relu]),
         ('conv5_4', [self.conv5_4, relu]),
         ('pool5', [_max_pooling_2d]),
         # ('fc6', [self.fc6, relu, dropout]),
         ('fc6', [self.fc6, relu, lambda x: dropout(x, ratio=0.0)]),
         # ('fc7', [self.fc7, relu, dropout]),
         ('fc7', [self.fc7, relu, lambda x: dropout(x, ratio=0.0)]),
         ('fc8', [self.fc8]),
         ('prob', [softmax]),
     ])
Example #2
0
    def __call__(self, x):
        h = x

        h = relu(self.conv1_1(h))
        h = relu(self.conv1_2(h))
        h = _max_pooling_2d(h)

        h = relu(self.conv2_1(h))
        h = relu(self.conv2_2(h))
        h = _max_pooling_2d(h)

        h = relu(self.conv3_1(h))
        h = relu(self.conv3_2(h))
        h = relu(self.conv3_3(h))
        h = _max_pooling_2d(h)

        h = relu(self.conv4_1(h))
        h = relu(self.conv4_2(h))
        h = relu(self.conv4_3(h))
        h = _max_pooling_2d(h)

        h = relu(self.conv5_1(h))
        h = relu(self.conv5_2(h))
        h = relu(self.bn1(self.conv5_3(h)))
        h = _max_pooling_2d(h)

        h = dropout(relu(self.bn2(self.fc6(h))))
        h = dropout(relu(self.bn3(self.fc7(h))))
        h = self.fc8(h)

        return h
Example #3
0
 def feed_lstm(self, word, embed_layer, lstm_layer_list, train):
     # get embedding for word
     embed_id = N.dropout(embed_layer(word),
                          ratio=DROPOUT_RATIO,
                          train=train)
     # feed into first LSTM layer
     hs = N.dropout(self[lstm_layer_list[0]](embed_id),
                    ratio=DROPOUT_RATIO,
                    train=train)
     # feed into remaining LSTM layers
     for lstm_layer in lstm_layer_list[1:]:
         hs = N.dropout(self[lstm_layer](hs),
                        ratio=DROPOUT_RATIO,
                        train=train)
Example #4
0
    def __call__(self, x, **kwargs):
        """Applies the lstm layer.

        Args:
            x (~chainer.Variable): Time-Batch of input vectors.

        Returns:
            ~chainer.Variable: Output of the lstm layer.

        """

        dropout_rate = kwargs.get('dropout', 0.)
        dropout_rate_hidden_hidden = kwargs.get('dropout_hidden_hidden', 0.)
        x = dropout(x, dropout_rate)
        lstm_in = sequence_linear_function(x, self.W_x, self.b)
        if self.normalized:
            lstm_in = sequence_batch_normalization_function(
                lstm_in, self.gamma, self.beta)
        if self.stateful:
            c_prev = self.c_prev
            h_prev = self.h_prev
        else:
            c_prev = None
            h_prev = None
        lstm_out, self.h_prev, self.c_prev = \
            sequence_lstm_function(lstm_in, self.W_h, c_prev, h_prev,
                                   self.reverse, dropout_rate_hidden_hidden)
        return lstm_out
            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (
                        linear.linear(x, xws[layer_idx], xbs[layer_idx]) +
                        linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #6
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #7
0
    def __call__(self, x, **kwargs):
        """Applies the lstm layer.

        Args:
            x (~chainer.Variable): Time-Batch of input vectors.

        Returns:
            ~chainer.Variable: Output of the lstm layer.

        """

        dropout_rate = kwargs.get('dropout', 0.)
        dropout_rate_hidden_hidden = kwargs.get('dropout_hidden_hidden', 0.)
        x = dropout(x, dropout_rate)
        lstm_in = sequence_linear_function(x, self.W_x, self.b)
        if self.normalized:
            lstm_in = sequence_batch_normalization_function(lstm_in, self.gamma,
                                                            self.beta)
        if self.stateful:
            c_prev = self.c_prev
            h_prev = self.h_prev
        else:
            c_prev = None
            h_prev = None
        lstm_out, self.h_prev, self.c_prev = \
            sequence_lstm_function(lstm_in, self.W_h, c_prev, h_prev,
                                   self.reverse, dropout_rate_hidden_hidden)
        return lstm_out
Example #8
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #9
0
            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) +
                              linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list
Example #10
0
 def __call__(self, frame, prev_word, state, dropout_flag, dropout_ratio):
     i1 = self.xi1(dropout(frame, dropout_ratio, dropout_flag))
     c1, h1 = lstm(state['c1'], self.ih1(i1) + self.hh1(state['h1']))
     i2 = self.xi2(prev_word)
     concat = array.concat.concat((i2, h1))
     c2, h2 = lstm(state['c2'], self.ih2(concat) + self.hh2(state['h2']))
     state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
     return state
Example #11
0
 def __call__(self, x):
     x = self.embed(x)
     xs = split_axis.split_axis(x, x.data.shape[1], 1)
     ret = []
     for x in xs:
         for l in self.rnns:
             x = l(x)
             x = dropout.dropout(x, 0.25, self.train)
         for l in self.linears:
             x = l(x)
         x = reshape.reshape(x, x.data.shape + (-1, ))
         ret.append(x)
     ret = concat.concat(ret, axis=2)
     return ret
Example #12
0
File: Models.py Project: ebsrn/CORE
 def ASPP(x):
     y = [
         F.tile(
             self.ASPP[0](F.average_pooling_2d(
                 x, ksize=x.shape[-2:])), x.shape[-2:])
     ]
     y.extend([
         self.ASPP[i](x) for i in range(1,
                                        len(self.ASPP) - 1)
     ])
     y = F.concat(y, axis=1)
     y = dropout.dropout(y, ratio=0.5)
     y = self.ASPP[-1](y)
     return y
Example #13
0
 def __init__(self, pretrained_model='auto'):
     super(GoogLeNet, self).__init__(
         conv1=Convolution2D(3, 64, 7, stride=2, pad=3),
         conv2_reduce=Convolution2D(64, 64, 1),
         conv2=Convolution2D(64, 192, 3, stride=1, pad=1),
         inc3a=Inception(192, 64, 96, 128, 16, 32, 32),
         inc3b=Inception(256, 128, 128, 192, 32, 96, 64),
         inc4a=Inception(480, 192, 96, 208, 16, 48, 64),
         inc4b=Inception(512, 160, 112, 224, 24, 64, 64),
         inc4c=Inception(512, 128, 128, 256, 24, 64, 64),
         inc4d=Inception(512, 112, 144, 288, 32, 64, 64),
         inc4e=Inception(528, 256, 160, 320, 32, 128, 128),
         inc5a=Inception(832, 256, 160, 320, 32, 128, 128),
         inc5b=Inception(832, 384, 192, 384, 48, 128, 128),
         loss3_fc=Linear(1024, 1000),
         loss1_conv=Convolution2D(512, 128, 1),
         loss1_fc1=Linear(4 * 4 * 128, 1024),
         loss1_fc2=Linear(1024, 1000),
         loss2_conv=Convolution2D(528, 128, 1),
         loss2_fc1=Linear(4 * 4 * 128, 1024),
         loss2_fc2=Linear(1024, 1000),
     )
     if pretrained_model == 'auto':
         _retrieve(
             'bvlc_googlenet.npz',
             'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel',
             self)
     elif pretrained_model:
         npz.load_npz(pretrained_model, self)
     self.functions = OrderedDict([
         ('conv1', [self.conv1, relu]),
         ('pool1', [
             lambda x: max_pooling_2d(x, ksize=3, stride=2),
             lambda x: local_response_normalization(x, n=5)
         ]), ('conv2_reduce', [self.conv2_reduce, relu]),
         ('conv2', [self.conv2, relu]),
         ('pool2', [
             lambda x: local_response_normalization(x, n=5),
             lambda x: max_pooling_2d(x, ksize=3, stride=2)
         ]), ('inc3a', [self.inc3a]), ('inc3b', [self.inc3b]),
         ('pool3', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]),
         ('inc4a', [self.inc4a]), ('inc4b', [self.inc4b]),
         ('inc4c', [self.inc4c]), ('inc4d', [self.inc4d]),
         ('inc4e', [self.inc4e]),
         ('pool4', [lambda x: max_pooling_2d(x, ksize=3, stride=2)]),
         ('inc5a', [self.inc5a]), ('inc5b', [self.inc5b]),
         ('pool6', [lambda x: average_pooling_2d(x, ksize=7, stride=1)]),
         ('prob', [lambda x: dropout(x, ratio=0.4), self.loss3_fc])
     ])
Example #14
0
    def __call__(self, x, **kwargs):
        """Applies the linear layer.

        Args:
            x (~chainer.Variable): Time-Batch of input vectors.

        Returns:
            ~chainer.Variable: Output of the linear layer.

        """

        dropout_rate = kwargs.get('dropout', 0.)
        x = dropout(x, dropout_rate)
        x = sequence_linear_function(x, self.W, self.b)
        if self.normalized:
            x = sequence_batch_normalization_function(x, self.gamma, self.beta)
        return x
Example #15
0
    def __call__(self, x, **kwargs):
        """Applies the linear layer.

        Args:
            x (~chainer.Variable): Time-Batch of input vectors.

        Returns:
            ~chainer.Variable: Output of the linear layer.

        """

        dropout_rate = kwargs.get('dropout', 0.)
        x = dropout(x, dropout_rate)
        x = sequence_linear_function(x, self.W, self.b)
        if self.normalized:
            x = sequence_batch_normalization_function(x, self.gamma, self.beta)
        return x
Example #16
0
            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                counter = 0
                for x in xs_list:
                    counter += 1
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)
                    if counter == 4:
                        lstm_in = linear.linear(x, xws[layer_idx],
                                                xbs[layer_idx])
                    else:
                        lstm_in = linear.linear(
                            x, xws[layer_idx], xbs[layer_idx]) + linear.linear(
                                h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list
Example #17
0
            def _one_directional_loop(di):
                # di=0, forward LSTM
                # di=1, backward LSTM
                h_list = []
                c_list = []
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                c = cx[layer_idx]
                if di == 0:
                    xs_list = xs_next
                else:
                    xs_list = reversed(xs_next)
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                    else:
                        h_rest = None
                        c_rest = None

                    if layer != 0:
                        x = dropout.dropout(x, ratio=dropout_ratio,
                                            train=train)
                    lstm_in = linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) + \
                        linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    c_bar, h_bar = lstm.lstm(c, lstm_in)
                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                        c = concat.concat([c_bar, c_rest], axis=0)
                    else:
                        h = h_bar
                        c = c_bar
                    h_list.append(h_bar)
                    c_list.append(c_bar)
                return h, c, h_list, c_list
Example #18
0
 def classifier(x, train):
     x = A.average_pooling_2d(x, 8)
     x = dropout.dropout(x, train=train)
     x = self.linear(x)
     return x
Example #19
0
def _dropout_sequence(xs, dropout_ratio):
    return [dropout.dropout(x, ratio=dropout_ratio) for x in xs]
Example #20
0
def _dropout(x):
    return dropout(x, ratio=0.4)
Example #21
0
            def _one_directional_loop(di):
                # di=0, forward GRU
                # di=1, backward GRU
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = h0[layer_idx]

                # h:d_bar_s_1
                # h_bar:d_s
                '''
                print(len(xs_list))
                print(len(xs_list[0]))
                print(len(xs_list[0][0]))
                '''
                h_list = []
                h_bar_list = []
                c_s_list = []
                z_s_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

                    r = sigmoid.sigmoid(W_r_x + U_r_h)
                    z = sigmoid.sigmoid(W_z_x + U_z_h)
                    h_bar = tanh.tanh(W_x + r * U_x)
                    h_bar = (1 - z) * h_bar + z * h

                    phi_d = linear.linear(h_bar, W2, B2)
                    '''
                    print(type(phi_ht), len(phi_ht))
                    print(type(phi_ht[0]), len(phi_ht[0]))
                    print(type(phi_ht[0][0]), len(phi_ht[0][0]))
                    
                    print(type(phi_d), len(phi_d))
                    print(type(phi_d[0]), len(phi_d[0]), phi_d[0].shape)
                    '''
                    #phi_ht_len = [t.shape[1] for t in phi_ht]
                    #phi_ht_section = np.cumsum(phi_ht_len[:-1])
                    #concat_phi_ht  = F.concat(phi_ht, axis=1)
                    #concat_phi_d = [F.concat([phi_d[i]]*phi_ht_len[i], axis=0) for i in range(batch)]
                    #concat_phi_d = F.concat(concat_phi_d, axis=0)
                    #concat_phi_d = F.concat(F.transpose(phi_d), axis=0)

                    u_st = list(
                        map(
                            lambda x, y: reshape.reshape((linear.linear(
                                x, reshape.reshape(y, (1, len(y))))),
                                                         (len(x), )), phi_ht,
                            phi_d))  #(4)

                    sum_u = list(map(F.sum, u_st))
                    alpha_st = list(
                        map(lambda x, y: x / F.broadcast_to(y, x.shape), u_st,
                            sum_u))  #(3)
                    z_s = list(map(F.argmax, alpha_st))
                    z_s = list(map(lambda x: F.broadcast_to(x, (1, )), z_s))
                    z_s = F.concat(z_s, axis=0)
                    '''
                    print(type(alpha_st),len(alpha_st))
                    print(type(alpha_st[0]),len(alpha_st[0]))
                    
                    print(alpha_st[0].shape)
                    print(ht[0].shape)
                    '''
                    c_s = list(
                        map(
                            lambda x, y: F.sum(F.broadcast_to(
                                reshape.reshape(x,
                                                (x.shape[0], 1)), y.shape) * y,
                                               axis=0), alpha_st, ht))  #(2)

                    c_s_2d = list(
                        map(lambda x: reshape.reshape(x, (1, len(x))), c_s))
                    concat_c_s = F.concat(c_s_2d, axis=0)

                    c_s = list(
                        map(lambda x: F.broadcast_to(x, (1, len(x))), c_s))
                    c_s = F.concat(c_s, axis=0)
                    '''
                    print(type(c_s), len(c_s))
                    print(type(c_s[0]), len(c_s[0]), c_s[0].shape)
                    '''
                    h = F.relu(
                        linear.linear(F.concat([concat_c_s, h_bar], axis=1),
                                      W3, B3))

                    h_list.append(h)
                    h_bar_list.append(h_bar)
                    c_s_list.append(c_s)
                    z_s_list.append(z_s)

                    #単語数の違いを担保
                    if h_rest is not None:
                        h = concat.concat([h, h_rest], axis=0)
                        h_bar = concat.concat([h_bar, h_rest], axis=0)

                return h_list, h_bar_list, c_s_list, z_s_list
Example #22
0
def _dropout(x, train):
    return dropout(x, ratio=0.4, train=train)
Example #23
0
def n_step_lstm(n_layers,
                dropout_ratio,
                hx,
                cx,
                ws,
                bs,
                xs,
                train=True,
                use_cudnn=True):
    """Stacked Long Short-Term Memory function for sequence inputs.

    This function calculates stacked LSTM with sequences. This function gets
    an initial hidden state :math:`h_0`, an initial cell state :math:`c_0`,
    an input sequence :math:`x`, weight matrices :math:`W`, and bias vectors
    :math:`b`.
    This function calculates hidden states :math:`h_t` and :math:`c_t` for each
    time :math:`t` from input :math:`x_t`.

    .. math::

       i_t = \sigma(W_0 x_t + W_4 h_{t-1} + b_0 + b_4)
       f_t = \sigma(W_1 x_t + W_5 h_{t-1} + b_1 + b_5)
       o_t = \sigma(W_2 x_t + W_6 h_{t-1} + b_2 + b_6)
       a_t = \tanh(W_3 x_t + W_7 h_{t-1} + b_3 + b_7)
       c_t = f_t \dot c_{t-1} + i_t \dot a_t
       h_t = o_t \dot \tanh(c_t)

    As the function accepts a sequence, it calculates :math:`h_t` for all
    :math:`t` with one call. Eight weight matrices and eight bias vectors are
    required for each layers. So, when :math:`S` layers exists, you need to
    prepare :math:`8S` weigth matrices and :math:`8S` bias vectors.

    If the number of layers ``n_layers`` is greather than :math:`1`, input
    of ``k``-th layer is hidden state ``h_t`` of ``k-1``-th layer.
    Note that all input variables except first layer may have different shape
    from the first layer.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        cx (chainer.Variable): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        train (bool): If ``True``, this function executes dropout.
        use_cudnn (bool): If ``True``, this function uses cuDNN if available.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``cy`` is an updated cell states whose shape is same as ``cx``.
            - ``ys`` is a list of :class:~chainer.Variable. Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.lstm`

    """

    xp = cuda.get_array_module(hx, hx.data)

    if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \
       _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(
            itertools.chain((hx, cx), itertools.chain.from_iterable(ws),
                            itertools.chain.from_iterable(bs), xs))
        rnn = NStepLSTM(n_layers, states, train=train)
        ret = rnn(*inputs)
        hy, cy = ret[:2]
        ys = ret[2:]
        return hy, cy, ys

    else:
        hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        ys = []
        for x in xs:
            batch = x.shape[0]
            h_next = []
            c_next = []
            for layer in six.moves.range(n_layers):
                h = hx[layer]
                c = cx[layer]
                if h.shape[0] > batch:
                    h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                else:
                    h_rest = None

                x = dropout.dropout(x, ratio=dropout_ratio, train=train)
                h = dropout.dropout(h, ratio=dropout_ratio, train=train)
                lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \
                    linear.linear(h, hws[layer], hbs[layer])

                c_bar, h_bar = lstm.lstm(c, lstm_in)
                if h_rest is not None:
                    h = concat.concat([h_bar, h_rest], axis=0)
                    c = concat.concat([c_bar, c_rest], axis=0)
                else:
                    h = h_bar
                    c = c_bar
                h_next.append(h)
                c_next.append(c)
                x = h_bar
            hx = h_next
            cx = c_next
            ys.append(x)

        hy = stack.stack(hx)
        cy = stack.stack(cx)
        return hy, cy, tuple(ys)
Example #24
0
def n_step_lstm(
        n_layers, dropout_ratio, hx, cx, ws, bs, xs, train=True,
        use_cudnn=True):
    """Stacked Long Short-Term Memory function for sequence inputs.

    This function calculates stacked LSTM with sequences. This function gets
    an initial hidden state :math:`h_0`, an initial cell state :math:`c_0`,
    an input sequence :math:`x`, weight matrices :math:`W`, and bias vectors
    :math:`b`.
    This function calculates hidden states :math:`h_t` and :math:`c_t` for each
    time :math:`t` from input :math:`x_t`.

    .. math::

       i_t &= \\sigma(W_0 x_t + W_4 h_{t-1} + b_0 + b_4) \\\\
       f_t &= \\sigma(W_1 x_t + W_5 h_{t-1} + b_1 + b_5) \\\\
       o_t &= \\sigma(W_2 x_t + W_6 h_{t-1} + b_2 + b_6) \\\\
       a_t &= \\tanh(W_3 x_t + W_7 h_{t-1} + b_3 + b_7) \\\\
       c_t &= f_t \\dot c_{t-1} + i_t \\dot a_t \\\\
       h_t &= o_t \\dot \\tanh(c_t)

    As the function accepts a sequence, it calculates :math:`h_t` for all
    :math:`t` with one call. Eight weight matrices and eight bias vectors are
    required for each layers. So, when :math:`S` layers exists, you need to
    prepare :math:`8S` weigth matrices and :math:`8S` bias vectors.

    If the number of layers ``n_layers`` is greather than :math:`1`, input
    of ``k``-th layer is hidden state ``h_t`` of ``k-1``-th layer.
    Note that all input variables except first layer may have different shape
    from the first layer.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        cx (chainer.Variable): Variable holding stacked cell states.
            It has the same shape as ``hx``.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing eight matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing eight vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        train (bool): If ``True``, this function executes dropout.
        use_cudnn (bool): If ``True``, this function uses cuDNN if available.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy``, ``cy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``cy`` is an updated cell states whose shape is same as ``cx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is
              mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::

       :func:`chainer.functions.lstm`

    """

    xp = cuda.get_array_module(hx, hx.data)

    if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \
       _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(itertools.chain(
            (hx, cx),
            itertools.chain.from_iterable(ws),
            itertools.chain.from_iterable(bs),
            xs))
        rnn = NStepLSTM(n_layers, states, train=train)
        ret = rnn(*inputs)
        hy, cy = ret[:2]
        ys = ret[2:]
        return hy, cy, ys

    else:
        hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        ys = []
        for x in xs:
            batch = x.shape[0]
            h_next = []
            c_next = []
            for layer in six.moves.range(n_layers):
                h = hx[layer]
                c = cx[layer]
                if h.shape[0] > batch:
                    h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                else:
                    h_rest = None

                x = dropout.dropout(x, ratio=dropout_ratio, train=train)
                h = dropout.dropout(h, ratio=dropout_ratio, train=train)
                lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \
                    linear.linear(h, hws[layer], hbs[layer])

                c_bar, h_bar = lstm.lstm(c, lstm_in)
                if h_rest is not None:
                    h = concat.concat([h_bar, h_rest], axis=0)
                    c = concat.concat([c_bar, c_rest], axis=0)
                else:
                    h = h_bar
                    c = c_bar
                h_next.append(h)
                c_next.append(c)
                x = h_bar
            hx = h_next
            cx = c_next
            ys.append(x)

        hy = stack.stack(hx)
        cy = stack.stack(cx)
        return hy, cy, tuple(ys)
Example #25
0
def _dropout(x):
    return dropout(x, ratio=0.4)
Example #26
0
def fixed_length_n_step_lstm(
    n_layers,
    dropout_ratio,
    hx,
    cx,
    ws,
    bs,
    xs,
    train=True,
):

    xp = cuda.get_array_module(hx, hx.data)

    if xp is not numpy and cuda.cudnn_enabled and _cudnn_version >= 5000:
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(
            itertools.chain((hx, cx), itertools.chain.from_iterable(ws),
                            itertools.chain.from_iterable(bs), (xs, )))
        rnn = FixedLengthNStepLSTMFunction(n_layers, states, train=train)
        ret = rnn(*inputs)
        hy, cy, ys = ret
        _, batch_size, dim = hy.shape
        ys_reshape = F.reshape(ys,
                               (-1, batch_size, dim))  # (length, batch, dim)
        return hy, cy, ys_reshape

    else:
        hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
        cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True)
        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

        ys = []
        for x in xs:
            batch = x.shape[0]
            h_next = []
            c_next = []
            for layer in six.moves.range(n_layers):
                h = hx[layer]
                c = cx[layer]
                if h.shape[0] > batch:
                    h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    c, c_rest = split_axis.split_axis(c, [batch], axis=0)
                else:
                    h_rest = None

                x = dropout.dropout(x, ratio=dropout_ratio)
                h = dropout.dropout(h, ratio=dropout_ratio)
                lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \
                          linear.linear(h, hws[layer], hbs[layer])

                c_bar, h_bar = lstm.lstm(c, lstm_in)
                if h_rest is not None:
                    h = concat.concat([h_bar, h_rest], axis=0)
                    c = concat.concat([c_bar, c_rest], axis=0)
                else:
                    h = h_bar
                    c = c_bar
                h_next.append(h)
                c_next.append(c)
                x = h_bar
            hx = h_next
            cx = c_next
            ys.append(x)

        hy = stack.stack(hx)
        cy = stack.stack(cx)
        #return hy, cy, tuple(ys)
        ys_concat = F.concat(ys, axis=0)
        ys_reshape = F.reshape(
            ys_concat,
            (-1, ys[0].shape[0], ys[0].shape[1]))  # (length, batch, dim)

        return hy, cy, ys_reshape
Example #27
0
def _dropout(x, train):
    return dropout(x, ratio=0.4, train=train)
Example #28
0
def _dropout_sequence(xs, dropout_ratio):
    return [dropout.dropout(x, ratio=dropout_ratio) for x in xs]