Python NStepGRU 예제들, chainer.links.NStepGRU Python 예제들

예제 #1

0

파일 보기

    def __init__(self,
                 n_layers,
                 n_vocab,
                 n_genre,
                 pretrained_w2v,
                 is_update_w2v,
                 dropout,
                 genre_units=5):

        super(GRUEncoder, self).__init__()
        with self.init_scope():
            self.base_embedding_layer = BaseEmbeddingLayer(
                n_vocab=n_vocab,
                n_genre=n_genre,
                genre_units=genre_units,
                pretrained_w2v=pretrained_w2v,
                is_update_w2v=is_update_w2v,
                dropout=dropout)

            self.title_encoder = L.NStepGRU(n_layers,
                                            self.base_embedding_layer.n_units,
                                            self.base_embedding_layer.n_units,
                                            dropout)
            self.content_encoder = L.NStepGRU(
                n_layers, self.base_embedding_layer.n_units,
                self.base_embedding_layer.n_units, dropout)

        self.out_units = self.base_embedding_layer.n_units * 2 \
            + genre_units \
            + const.PREPROCESS_GENDER_TARGET_NUM

        self.n_layers = n_layers
        self.dropout = dropout

예제 #2

0

파일 보기

    def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_embed,
                 n_units, n_latent, type_unit, word_dropout, denoising_rate):
        super(Seq2seq, self).__init__()
        with self.init_scope():
            self.embed_x = L.EmbedID(n_source_vocab, n_embed)
            self.encoder = L.NStepGRU(n_layers, n_embed, n_units, 0.5)
            self.W_mu = L.Linear(n_units * n_layers, n_latent)
            self.W_ln_var = L.Linear(n_units * n_layers, n_latent)

            self.W_h = L.Linear(n_latent, n_units * n_layers)
            self.decoder = L.NStepGRU(n_layers, n_embed, n_units, 0.5)
            self.W = L.Linear(n_units, n_target_vocab)
            self.embed_y = L.EmbedID(n_target_vocab, n_embed)
            # if attr:
            #     self.Wc = L.Linear(2*n_units, n_units)

        self.n_layers = n_layers
        self.n_units = n_units
        self.n_embed = n_embed
        self.word_dropout = word_dropout
        self.denoising_rate = denoising_rate
        self.n_latent = n_latent
        self.C = 0
        self.k = 10
        self.n_target_vocab = n_target_vocab

예제 #3

0

파일 보기

파일: ONT.py 프로젝트: s1230128/AutomateOntology

 def __init__(self, n_cell, size_hidden, rate_dropout):
     super(ONT_GRU, self).__init__()
     self.rate_dropout = rate_dropout
     with self.init_scope():
         self.rnn_a = L.NStepGRU(n_cell, 300, size_hidden, rate_dropout)
         self.rnn_b = L.NStepGRU(n_cell, 300, size_hidden, rate_dropout)
         self.l1 = L.Highway(size_hidden * 2)
         self.l2 = L.Linear(size_hidden * 2, 4)

예제 #4

0

파일 보기

파일: GRU.py 프로젝트: xiaoshengjun/NANHM-for-GEC

 def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units):
     super(Seq2seq, self).__init__(
         embed_x=L.EmbedID(n_source_vocab, n_units),
         embed_y=L.EmbedID(n_target_vocab, n_units),
         encoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         decoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         W=L.Linear(n_units, n_target_vocab),
     )
     self.n_layers = n_layers
     self.n_units = n_units

예제 #5

0

파일 보기

파일: ima.py 프로젝트: nuric/softuni

 def __init__(self):
     super().__init__()
     # Create model parameters
     with self.init_scope():
         self.embed = L.EmbedID(len(word2idx), EMBED, ignore_label=0)
         self.pred_rnn = L.NStepGRU(1, EMBED, EMBED, DROPOUT)
         self.att_dense1 = L.Linear(5 * EMBED, EMBED // 2)
         self.att_dense2 = L.Linear(EMBED // 2, 1)
         self.unifier = L.NStepGRU(1, EMBED, EMBED, DROPOUT)
         self.out_linear = L.Linear(EMBED, 1)
     self.log = None

예제 #6

0

파일 보기

 def __init__(self,
              encoder_input_channels=5,
              decoder_input_channels=5,
              n_layers=3,
              hidden_units=1024,
              dropout=0.1):
     super(GRUEncoderDecoder, self).__init__()
     with self.init_scope():
         self.encoder = L.NStepGRU(n_layers, encoder_input_channels,
                                   hidden_units, dropout)
         self.decoder = L.NStepGRU(n_layers, decoder_input_channels,
                                   hidden_units, dropout)
         self.decoder_dense = TimeDistributedDense(hidden_units, 1)

예제 #7

0

파일 보기

파일: Att_BiGRU.py 프로젝트: xiaoshengjun/NANHM-for-GEC

 def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char,
              n_target_char, n_units):
     super(Seq2seq, self).__init__(
         embed_x=L.EmbedID(n_source_vocab, n_units),
         embed_y=L.EmbedID(n_target_vocab, n_units * 2),
         embed_xc=L.EmbedID(n_source_char, n_units),
         embed_yc=L.EmbedID(n_target_char, n_units * 2),
         encoder_f=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         encoder_b=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1),
         #decoder_att=D.AttGRUdec(n_layers, n_units * 2, n_units * 2, n_target_vocab),
         W=L.Linear(n_units * 2, n_target_vocab),
     )
     self.n_layers = n_layers
     self.n_units = n_units

예제 #8

0

파일 보기

파일: test_stateless_recurrent_sequential.py 프로젝트: williamd4112/chainerrl

    def _test_three_recurrent_children(self, gpu):
        # Test if https://github.com/chainer/chainer/issues/6053 is addressed
        in_size = 2
        out_size = 6

        rseq = StatelessRecurrentSequential(
            L.NStepLSTM(1, in_size, 3, 0),
            L.NStepGRU(2, 3, 4, 0),
            L.NStepRNNTanh(5, 4, out_size, 0),
        )

        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp

        seqs_x = [
            xp.random.uniform(-1, 1, size=(4, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(1, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(3, in_size)).astype(np.float32),
        ]

        # Make and load a recurrent state to check if the order is correct.
        _, rs = rseq.n_step_forward(seqs_x, None, output_mode='concat')
        _, _ = rseq.n_step_forward(seqs_x, rs, output_mode='concat')

        _, rs = rseq.n_step_forward(seqs_x, None, output_mode='split')
        _, _ = rseq.n_step_forward(seqs_x, rs, output_mode='split')

예제 #9

0

파일 보기

    def setUp(self):
        shape = (self.n_layer, len(self.lengths), self.out_size)
        if self.hidden_none:
            self.h = numpy.zeros(shape, 'f')
        else:
            self.h = numpy.random.uniform(-1, 1, shape).astype('f')
        self.xs = [
            numpy.random.uniform(-1, 1, (l, self.in_size)).astype('f')
            for l in self.lengths
        ]

        self.gh = numpy.random.uniform(-1, 1, shape).astype('f')
        self.gys = [
            numpy.random.uniform(-1, 1, (l, self.out_size)).astype('f')
            for l in self.lengths
        ]
        self.rnn = links.NStepGRU(self.n_layer,
                                  self.in_size,
                                  self.out_size,
                                  self.dropout,
                                  use_cudnn=self.use_cudnn)

        for layer in self.rnn:
            for p in layer.params():
                p.data[...] = numpy.random.uniform(-1, 1, p.data.shape)
        self.rnn.zerograds()

예제 #10

0

파일 보기

 def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char,
              n_units):
     super(Seq2seq, self).__init__(
         embed_xw=L.EmbedID(n_source_vocab, n_units),
         embed_xc=L.EmbedID(n_source_char, n_units),
         embed_y=L.EmbedID(n_target_vocab, n_units * 2),
         encoder_fw=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         encoder_bw=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         encoder_fc=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         encoder_bc=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1),
         W=L.Linear(n_units * 2, n_target_vocab),
     )
     self.n_layers = n_layers
     self.n_units = n_units
     self.n_params = 5

예제 #11

0

파일 보기

파일: train_imdb.py 프로젝트: exbracer/imdb

	def __init__(self, maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN):
		super(SymbolModule, self).__init__()
		with self.init_scope():
			'''
			TODO
			'''
			self.embedding = L.EmbedID(in_size=maxf, out_size=edim)
			self.gru = L.NStepGRU(n_layers=1, in_size=edim, out_size=nhid, dropout=0)
			self.l_out = L.Linear(in_size=nhid*1, out_size=2)

예제 #12

0

파일 보기

    def __init__(self,
                 n_layers,
                 n_vocab,
                 n_units,
                 dropout=0.1,
                 same_network=False):
        super(GRUEncoder, self).__init__()
        with self.init_scope():
            self.embed = L.EmbedID(n_vocab, n_units, initialW=embed_init)

            self.encoder1 = L.NStepGRU(n_layers, n_units, n_units, dropout)
            if same_network:
                self.encoder2 = self.encoder1
            else:
                self.encoder2 = L.NStepGRU(n_layers, n_units, n_units, dropout)

        self.n_layers = n_layers
        self.out_units = n_units * 2
        self.dropout = dropout

예제 #13

0

파일 보기

 def __init__(self,
              encoder_input_channels=5,
              decoder_input_channels=5,
              n_layers=3,
              hidden_units=1024,
              dropout=0.1,
              clf_target_num=3):
     super(GRUEncoderDecoderTwin, self).__init__()
     self.clf_target_num = clf_target_num
     self.clf_targets = np.log1p(range(1, clf_target_num + 1)).reshape(
         1, clf_target_num, 1).astype(np.float32)
     with self.init_scope():
         self.encoder = L.NStepGRU(n_layers, encoder_input_channels,
                                   hidden_units, dropout)
         self.decoder = L.NStepGRU(n_layers, decoder_input_channels,
                                   hidden_units, dropout)
         self.decoder_reg_dense = TimeDistributedDense(hidden_units, 1)
         self.decoder_clf_dense = TimeDistributedDense(
             hidden_units, clf_target_num + 1)

예제 #14

0

파일 보기

파일: Encoder.py 프로젝트: KFleee/HITSZ_SZ160110112

 def __init__(self, item_size, embed_size, hidden_size):
     super(NStepGRUEncoder, self).__init__(
         #利用word2vector对item进行嵌入
         xe=L.EmbedID(item_size,
                      embed_size,
                      initialW=chainer.initializers.GlorotNormal(),
                      ignore_label=-1),
         gru=L.NStepGRU(1, embed_size, hidden_size, 0.5),
     )
     self.hidden_size = hidden_size

예제 #15

0

파일 보기

파일: ggnn_dev_jknet.py 프로젝트: Minys233/GCN-BMP

    def __init__(self, n_layers, in_size, out_size, dropout=0.0):
        super(GRUAggregator, self).__init__()
        with self.init_scope():
            self.gru_layer = links.NStepGRU(n_layers, in_size, out_size,
                                            dropout)

        self.n_layers = n_layers
        self.in_size = in_size
        self.out_size = out_size
        self.dropout = dropout

예제 #16

0

파일 보기

파일: Hybrid_BiGRU.py 프로젝트: xiaoshengjun/NANHM-for-GEC

 def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char,
              n_target_char, n_units):
     super(Seq2seq, self).__init__(
         embed_x=L.EmbedID(n_source_vocab, n_units),
         embed_y=L.EmbedID(n_target_vocab, n_units * 2),
         embed_xc=L.EmbedID(n_source_char, n_units),
         embed_yc=L.EmbedID(n_target_char, n_units),
         encoder_f=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         encoder_b=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         char_encoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1),
         char_decoder=L.NStepGRU(n_layers, n_units, n_units, 0.1),
         char_att_decoder=My.NStepGRU(n_layers, n_units, n_units, 0.1),
         W=L.Linear(n_units * 2, n_target_vocab),
         W_hat=L.Linear(n_units * 4, n_units),
         W_char=L.Linear(n_units, n_target_char),
     )
     self.n_layers = n_layers
     self.n_units = n_units
     self.n_params = 6

예제 #17

0

파일 보기

파일: seq2seq.py 프로젝트: eggplant60/seq2seq_arXiv

    def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units,
                 type_unit, word_dropout, denoising_rate, direc, attr,
                 loss_type):
        super(Seq2seq, self).__init__()
        with self.init_scope():
            self.embed_x = L.EmbedID(n_source_vocab, n_units)
            self.embed_y = L.EmbedID(n_target_vocab, n_units)
            #self.attention = Attention(n_units)
            if type_unit == 'lstm':
                if direc == 'uni':
                    self.encoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1)
                    self.decoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1)
                elif direc == 'bi':
                    self.encoder = L.NStepBiLSTM(n_layers, n_units, n_units,
                                                 0.1)
                    self.decoder = L.NStepBiLSTM(n_layers, n_units, n_units,
                                                 0.1)
            elif type_unit == 'gru':
                if direc == 'uni':
                    self.encoder = L.NStepGRU(n_layers, n_units, n_units, 0.1)
                    self.decoder = L.NStepGRU(n_layers, n_units, n_units, 0.1)
                elif direc == 'bi':
                    self.encoder = L.NStepBiGRU(n_layers, n_units, n_units,
                                                0.1)
                    self.decoder = L.NStepBiGRU(n_layers, n_units, n_units,
                                                0.1)
            if direc == 'uni':
                self.W = L.Linear(n_units, n_target_vocab)
            elif direc == 'bi':
                self.W = L.Linear(2 * n_units, n_target_vocab)
            if attr:
                self.Wc = L.Linear(2 * n_units, n_units)

        self.n_layers = n_layers
        self.n_units = n_units
        self.type_unit = type_unit
        self.word_dropout = word_dropout
        self.denoising_rate = denoising_rate
        self.attr = attr
        self.loss_type = loss_type

예제 #18

0

파일 보기

	def __init__(self, n_vocab, embed_size, n_fvocab, n_units, n_labels, w2vfrg, embeddings):
		# n_vocab, embed_size = embeddings.shape
		if w2vfrg == 0:
			embeddings = None
		super(BiLSTM, self).__init__(
			embed = L.EmbedID(n_vocab, embed_size, initialW=embeddings, ignore_label=-1),
			f_embed = L.EmbedID(n_fvocab, embed_size, ignore_label=-1),
			# bilstm = L.NStepBiLSTM(1, embed_size, embed_size, dropout=0),
			bigru = L.NStepGRU(1, embed_size, embed_size, dropout=0),
			l2 = L.Linear(None, n_units),
			l3 = L.Linear(None, n_labels)
		)
		self.reset_state()

예제 #19

0

파일 보기

    def __init__(self, opt, shared=None):
        super(RNNAgent, self).__init__(opt, shared)
        if not shared:
            # don't enter this loop for shared instantiations
            opt['cuda'] = not opt['no_cuda'] and chainer.cuda.available
            global xp
            if opt['cuda']:
                print('[ Using CUDA ]')
                cuda.get_device(opt['gpu']).use()
                xp = cuda.cupy
            else:
                xp = np

            self.id = 'RNN'
            self.dict = DictionaryAgent(opt)
            self.observation = {}
            self.rnn_type = opt['rnntype']
            self.hidden_size = opt['hiddensize']
            self.num_layers = opt['numlayers']
            self.dropout_rate = opt['dropout']
            self.learning_rate = opt['learningrate']
            self.use_cuda = opt.get('cuda', False)
            self.path = opt.get('model_file', None)
            vs = len(self.dict)
            hs = self.hidden_size
            nl = self.num_layers
            dr = self.dropout_rate

            super(Agent, self).__init__(embedding=L.EmbedID(vs, hs),
                                        projection=L.Linear(hs, vs))
            if self.rnn_type == 'GRU':
                super(Agent, self).add_link('rnn', L.NStepGRU(nl, hs, hs, dr))
            elif self.rnn_type == 'LSTM':
                super(Agent, self).add_link('rnn', L.NStepLSTM(nl, hs, hs, dr))
            self.dropout = F.dropout
            self.softmax = F.softmax
            self.loss = F.softmax_cross_entropy

            self.optimizer = chainer.optimizers.SGD(lr=self.learning_rate)
            self.optimizer.setup(self)
            self.optimizer.add_hook(chainer.optimizer.GradientClipping(5))

            if self.use_cuda:
                self.cuda()
            if opt.get('model_file') and os.path.isfile(opt['model_file']):
                print('Loading existing model parameters from ' +
                      opt['model_file'])
                self.load(opt['model_file'])

        self.episode_done = True

예제 #20

0

파일 보기

파일: nets_con.py 프로젝트: H-Tsuyuki/SemSeq

    def __init__(self, config):
        super(GRUEncoder, self).__init__()
        
        self.n_words = config['n_words']
        self.word_emb_dim = config['word_emb_dim']
        self.dpout_word = config['dpout_word']
        self.enc_dim = config['enc_dim']
        self.n_enc_layers = config['n_enc_layers']
        self.dpout_enc = config['dpout_enc']
        self.glove = config['glove']

        with self.init_scope():
            if not self.glove:
                self.embed = L.EmbedID(self.n_words, self.word_emb_dim)
            self.encoder = L.NStepGRU(self.n_enc_layers, self.word_emb_dim, self.enc_dim, self.dpout_enc)

예제 #21

0

파일 보기

파일: model.py 프로젝트: alantian/kanshi

    def __init__(self, charset_size, hidden_size, n_layers, dropout):
        super(Decoder, self).__init__()

        self.charset_size = charset_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        with self.init_scope():
            self.latent_rep_lin = L.Linear(n_layers * hidden_size)
            self.embedid = L.EmbedID(in_size=charset_size,
                                     out_size=hidden_size)
            self.gru = L.NStepGRU(n_layers=n_layers,
                                  in_size=hidden_size,
                                  out_size=hidden_size,
                                  dropout=dropout)
            self.W = L.Linear(hidden_size, charset_size)

예제 #22

0

파일 보기

def construct_RNN(unit_type, bidirection, n_layers, n_input, n_units, dropout):
    rnn = None
    if unit_type == 'lstm':
        if bidirection:
            rnn = L.NStepBiLSTM(n_layers, n_input, n_units, dropout)
        else:
            rnn = L.NStepLSTM(n_layers, n_input, n_units, dropout)
    elif unit_type == 'gru':
        if bidirection:
            rnn = L.NStepBiGRU(n_layers, n_input, n_units, dropout)
        else:
            rnn = L.NStepGRU(n_layers, n_input, n_units, dropout)
    else:
        if bidirection:
            rnn = L.NStepBiRNNTanh(n_layers, n_input, n_units, dropout)
        else:
            rnn = L.NStepRNNTanh(n_layers, n_input, n_units, dropout)

    print('# RNN unit: {}, dropout={}'.format(rnn, rnn.__dict__['dropout']),
          file=sys.stderr)
    for i, c in enumerate(rnn._children):
        print('#   {}-th param'.format(i), file=sys.stderr)
        print('#      0 - W={}, b={}'.format(c.w0.shape, c.b0.shape),
              file=sys.stderr)
        print('#      1 - W={}, b={}'.format(c.w1.shape, c.b1.shape),
              file=sys.stderr)

        if unit_type == 'gru' or unit_type == 'lstm':
            print('#      2 - W={}, b={}'.format(c.w2.shape, c.b2.shape),
                  file=sys.stderr)
            print('#      3 - W={}, b={}'.format(c.w3.shape, c.b3.shape),
                  file=sys.stderr)
            print('#      4 - W={}, b={}'.format(c.w4.shape, c.b4.shape),
                  file=sys.stderr)
            print('#      5 - W={}, b={}'.format(c.w5.shape, c.b5.shape),
                  file=sys.stderr)

        if unit_type == 'lstm':
            print('#      6 - W={}, b={}'.format(c.w6.shape, c.b6.shape),
                  file=sys.stderr)
            print('#      7 - W={}, b={}'.format(c.w7.shape, c.b7.shape),
                  file=sys.stderr)

    return rnn

예제 #23

0

파일 보기

 def __init__(self,
              n_vocab,
              n_emb,
              n_units,
              n_layers=1,
              dropout=0.1,
              rnn='LSTM',
              initialW=None):
     super().__init__()
     with self.init_scope():
         self.embed = L.EmbedID(n_vocab,
                                n_emb,
                                initialW,
                                ignore_label=IGNORE_ID)
         if rnn == 'LSTM':
             self.rnn = L.NStepLSTM(n_layers, n_emb, n_units, dropout)
         elif rnn == 'GRU':
             self.rnn = L.NStepGRU(n_layers, n_emb, n_units, dropout)
     self.n_layers = n_layers
     self.n_out = n_units
     self.dropout = dropout
     self.rnn_type = rnn

예제 #24

0

파일 보기

    def __init__(self,
                 input_size,
                 rnn_type,
                 bidirectional,
                 num_units,
                 num_proj,
                 num_layers,
                 dropout_input,
                 dropout_hidden,
                 subsample_list=[],
                 subsample_type='drop',
                 use_cuda=False,
                 merge_bidirectional=False,
                 num_stack=1,
                 splice=1,
                 input_channel=1,
                 conv_channels=[],
                 conv_kernel_sizes=[],
                 conv_strides=[],
                 poolings=[],
                 activation='relu',
                 batch_norm=False,
                 residual=False,
                 dense_residual=False,
                 num_layers_sub=0):

        super(RNNEncoder, self).__init__()

        if len(subsample_list) > 0 and len(subsample_list) != num_layers:
            raise ValueError(
                'subsample_list must be the same size as num_layers.')
        if subsample_type not in ['drop', 'concat']:
            raise TypeError('subsample_type must be "drop" or "concat".')
        if num_layers_sub < 0 or (num_layers_sub > 1
                                  and num_layers < num_layers_sub):
            raise ValueError('Set num_layers_sub between 1 to num_layers.')

        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        self.num_units = num_units
        self.num_proj = num_proj if num_proj is not None else 0
        self.num_layers = num_layers
        self.dropout_input = dropout_input
        self.dropout_hidden = dropout_hidden
        self.merge_bidirectional = merge_bidirectional
        self.use_cuda = use_cuda

        # TODO: self.clip_activation = clip_activation

        # Setting for hierarchical encoder
        self.num_layers_sub = num_layers_sub

        # Setting for subsampling
        if len(subsample_list) == 0:
            self.subsample_list = [False] * num_layers
        else:
            self.subsample_list = subsample_list
        self.subsample_type = subsample_type
        # This implementation is bases on
        # https://arxiv.org/abs/1508.01211
        #     Chan, William, et al. "Listen, attend and spell."
        #         arXiv preprint arXiv:1508.01211 (2015).

        # Setting for residual connection
        assert not (residual and dense_residual)
        self.residual = residual
        self.dense_residual = dense_residual
        subsample_last_layer = 0
        for l_reverse, is_subsample in enumerate(subsample_list[::-1]):
            if is_subsample:
                subsample_last_layer = num_layers - l_reverse
                break
        self.residual_start_layer = subsample_last_layer + 1
        # NOTE: residual connection starts from the last subsampling layer

        with self.init_scope():
            # Setting for CNNs before RNNs# Setting for CNNs before RNNs
            if len(conv_channels) > 0 and len(conv_channels) == len(
                    conv_kernel_sizes) and len(conv_kernel_sizes) == len(
                        conv_strides):
                assert num_stack == 1 and splice == 1
                self.conv = CNNEncoder(input_size,
                                       input_channel=input_channel,
                                       conv_channels=conv_channels,
                                       conv_kernel_sizes=conv_kernel_sizes,
                                       conv_strides=conv_strides,
                                       poolings=poolings,
                                       dropout_input=0,
                                       dropout_hidden=dropout_hidden,
                                       activation=activation,
                                       use_cuda=use_cuda,
                                       batch_norm=batch_norm)
                input_size = self.conv.output_size
            else:
                input_size = input_size * splice * num_stack
                self.conv = None

            self.rnns = []
            self.projections = []
            for l in range(num_layers):
                if l == 0:
                    encoder_input_size = input_size
                elif self.num_proj > 0:
                    encoder_input_size = num_proj
                    if subsample_type == 'concat' and l > 0 and self.subsample_list[
                            l - 1]:
                        encoder_input_size *= 2
                else:
                    encoder_input_size = num_units * self.num_directions
                    if subsample_type == 'concat' and l > 0 and self.subsample_list[
                            l - 1]:
                        encoder_input_size *= 2

                if rnn_type == 'lstm':
                    if bidirectional:
                        rnn_i = L.NStepBiLSTM(n_layers=1,
                                              in_size=encoder_input_size,
                                              out_size=num_units,
                                              dropout=0)
                    else:
                        rnn_i = L.NStepLSTM(n_layers=1,
                                            in_size=encoder_input_size,
                                            out_size=num_units,
                                            dropout=0)

                elif rnn_type == 'gru':
                    if bidirectional:
                        rnn_i = L.NStepBiGRU(n_layers=1,
                                             in_size=encoder_input_size,
                                             out_size=num_units,
                                             dropout=0)
                    else:
                        rnn_i = L.NStepGRU(n_layers=1,
                                           in_size=encoder_input_size,
                                           out_size=num_units,
                                           dropout=0)

                elif rnn_type == 'rnn':
                    if bidirectional:
                        # rnn_i = L.NStepBiRNNReLU(
                        rnn_i = L.NStepBiRNNTanh(n_layers=1,
                                                 in_size=encoder_input_size,
                                                 out_size=num_units,
                                                 dropout=0)
                    else:
                        # rnn_i = L.NStepRNNReLU(
                        rnn_i = L.NStepRNNTanh(n_layers=1,
                                               in_size=encoder_input_size,
                                               out_size=num_units,
                                               dropout=0)
                else:
                    raise ValueError(
                        'rnn_type must be "lstm" or "gru" or "rnn".')

                if use_cuda:
                    rnn_i.to_gpu()
                setattr(self, rnn_type + '_l' + str(l), rnn_i)

                if l != self.num_layers - 1 and self.num_proj > 0:
                    proj_i = LinearND(num_units * self.num_directions,
                                      num_proj,
                                      dropout=dropout_hidden,
                                      use_cuda=use_cuda)

                    if use_cuda:
                        proj_i.to_gpu()
                    setattr(self, 'proj_l' + str(l), proj_i)

예제 #25

0

파일 보기

파일: test_rnn.py 프로젝트: zwh930712/chainer

 def __init__(self):
     super().__init__()
     with self.init_scope():
         self.gru = L.NStepGRU(n_layers, input_size, hidden_size,
                               dropout_ratio)

예제 #26

0

파일 보기

파일: test_stateless_recurrent_sequential.py 프로젝트: williamd4112/chainerrl

    def _test_mask_recurrent_state_at(self, gpu):
        in_size = 2
        out_size = 4
        rseq = StatelessRecurrentSequential(
            L.Linear(in_size, 3),
            F.elu,
            L.NStepGRU(1, 3, out_size, 0),
            F.softmax,
        )
        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            rseq.to_gpu()
        xp = rseq.xp
        seqs_x = [
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
        ]
        transposed_x = F.transpose_sequence(seqs_x)
        print('transposed_x[0]', transposed_x[0])

        def no_mask_n_step_forward():
            nomask_nstep_out, nstep_rs = rseq.n_step_forward(
                seqs_x, None, output_mode='concat')
            return F.reshape(nomask_nstep_out, (2, 2, out_size)), nstep_rs
        nstep_out, nstep_rs = no_mask_n_step_forward()

        # Check if n_step_forward and forward twice results are same
        def no_mask_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            return rseq(transposed_x[1], rs)
        nomask_out, nomask_rs = no_mask_forward_twice()
        xp.testing.assert_allclose(
            nstep_out.array[:, 1],
            nomask_out.array,
        )
        xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array)

        # 1st-only mask forward twice: only 2nd should be the same
        def mask0_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, 0)
            return rseq(transposed_x[1], rs)
        mask0_out, mask0_rs = mask0_forward_twice()
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[0, 1],
                mask0_out.array[0],
            )
        xp.testing.assert_allclose(
            nstep_out.array[1, 1],
            mask0_out.array[1],
        )

        # 2nd-only mask forward twice: only 1st should be the same
        def mask1_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, 1)
            return rseq(transposed_x[1], rs)
        mask1_out, mask1_rs = mask1_forward_twice()
        xp.testing.assert_allclose(
            nstep_out.array[0, 1],
            mask1_out.array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[1, 1],
                mask1_out.array[1],
            )

        # both 1st and 2nd mask forward twice: both should be different
        def mask01_forward_twice():
            _, rs = rseq(transposed_x[0], None)
            rs = rseq.mask_recurrent_state_at(rs, [0, 1])
            return rseq(transposed_x[1], rs)
        mask01_out, mask01_rs = mask01_forward_twice()
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[0, 1],
                mask01_out.array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out.array[1, 1],
                mask01_out.array[1],
            )

        # get and concat recurrent states and resume forward
        def get_and_concat_rs_forward():
            _, rs = rseq(transposed_x[0], None)
            rs0 = rseq.get_recurrent_state_at(rs, 0, unwrap_variable=True)
            rs1 = rseq.get_recurrent_state_at(rs, 1, unwrap_variable=True)
            concat_rs = rseq.concatenate_recurrent_states([rs0, rs1])
            return rseq(transposed_x[1], concat_rs)
        getcon_out, getcon_rs = get_and_concat_rs_forward()
        xp.testing.assert_allclose(getcon_rs[0].array, nomask_rs[0].array)
        xp.testing.assert_allclose(
            nstep_out.array[0, 1], getcon_out.array[0])
        xp.testing.assert_allclose(
            nstep_out.array[1, 1], getcon_out.array[1])

예제 #27

0

파일 보기

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        type=str,
                        default='BreakoutNoFrameskip-v4',
                        help='Gym Env ID.')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU device ID. Set to -1 to use CPUs only.')
    parser.add_argument('--num-envs',
                        type=int,
                        default=8,
                        help='Number of env instances run in parallel.')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 32)')
    parser.add_argument('--outdir',
                        type=str,
                        default='results',
                        help='Directory path to save output files.'
                        ' If it does not exist, it will be created.')
    parser.add_argument('--steps',
                        type=int,
                        default=10**7,
                        help='Total time steps for training.')
    parser.add_argument(
        '--max-frames',
        type=int,
        default=30 * 60 * 60,  # 30 minutes with 60 fps
        help='Maximum number of frames for each episode.')
    parser.add_argument('--lr',
                        type=float,
                        default=2.5e-4,
                        help='Learning rate.')
    parser.add_argument('--eval-interval',
                        type=int,
                        default=100000,
                        help='Interval (in timesteps) between evaluation'
                        ' phases.')
    parser.add_argument('--eval-n-runs',
                        type=int,
                        default=10,
                        help='Number of episodes ran in an evaluation phase.')
    parser.add_argument('--demo',
                        action='store_true',
                        default=False,
                        help='Run demo episodes, not training.')
    parser.add_argument('--load',
                        type=str,
                        default='',
                        help='Directory path to load a saved agent data from'
                        ' if it is a non-empty string.')
    parser.add_argument('--logging-level',
                        type=int,
                        default=20,
                        help='Logging level. 10:DEBUG, 20:INFO etc.')
    parser.add_argument('--render',
                        action='store_true',
                        default=False,
                        help='Render env states in a GUI window.')
    parser.add_argument('--monitor',
                        action='store_true',
                        default=False,
                        help='Monitor env. Videos and additional information'
                        ' are saved as output files.')
    parser.add_argument('--update-interval',
                        type=int,
                        default=128 * 8,
                        help='Interval (in timesteps) between PPO iterations.')
    parser.add_argument('--batchsize',
                        type=int,
                        default=32 * 8,
                        help='Size of minibatch (in timesteps).')
    parser.add_argument('--epochs',
                        type=int,
                        default=4,
                        help='Number of epochs used for each PPO iteration.')
    parser.add_argument('--log-interval',
                        type=int,
                        default=10000,
                        help='Interval (in timesteps) of printing logs.')
    parser.add_argument('--recurrent',
                        action='store_true',
                        default=False,
                        help='Use a recurrent model. See the code for the'
                        ' model definition.')
    parser.add_argument('--flicker',
                        action='store_true',
                        default=False,
                        help='Use so-called flickering Atari, where each'
                        ' screen is blacked out with probability 0.5.')
    parser.add_argument('--no-frame-stack',
                        action='store_true',
                        default=False,
                        help='Disable frame stacking so that the agent can'
                        ' only see the current screen.')
    parser.add_argument('--checkpoint-frequency',
                        type=int,
                        default=None,
                        help='Frequency at which agents are stored.')
    args = parser.parse_args()

    import logging
    logging.basicConfig(level=args.logging_level)

    # Set a random seed used in ChainerRL.
    misc.set_random_seed(args.seed, gpus=(args.gpu, ))

    # Set different random seeds for different subprocesses.
    # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
    # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
    process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
    assert process_seeds.max() < 2**32

    args.outdir = experiments.prepare_output_dir(args, args.outdir)
    print('Output files are saved in {}'.format(args.outdir))

    def make_env(idx, test):
        # Use different random seeds for train and test envs
        process_seed = int(process_seeds[idx])
        env_seed = 2**32 - 1 - process_seed if test else process_seed
        env = atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
            episode_life=not test,
            clip_rewards=not test,
            flicker=args.flicker,
            frame_stack=not args.no_frame_stack,
        )
        env.seed(env_seed)
        if args.monitor:
            env = chainerrl.wrappers.Monitor(
                env, args.outdir, mode='evaluation' if test else 'training')
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env

    def make_batch_env(test):
        return chainerrl.envs.MultiprocessVectorEnv([
            (lambda: make_env(idx, test))
            for idx, env in enumerate(range(args.num_envs))
        ])

    sample_env = make_env(0, test=False)
    print('Observation space', sample_env.observation_space)
    print('Action space', sample_env.action_space)
    n_actions = sample_env.action_space.n

    winit_last = chainer.initializers.LeCunNormal(1e-2)
    if args.recurrent:
        model = chainerrl.links.StatelessRecurrentSequential(
            L.Convolution2D(None, 32, 8, stride=4), F.relu,
            L.Convolution2D(None, 64, 4, stride=2), F.relu,
            L.Convolution2D(None, 64, 3, stride=1), F.relu,
            L.Linear(None, 512), F.relu, L.NStepGRU(1, 512, 512, 0),
            chainerrl.links.Branched(
                chainer.Sequential(
                    L.Linear(None, n_actions, initialW=winit_last),
                    chainerrl.distribution.SoftmaxDistribution,
                ),
                L.Linear(None, 1),
            ))
    else:
        model = chainer.Sequential(
            L.Convolution2D(None, 32, 8, stride=4), F.relu,
            L.Convolution2D(None, 64, 4, stride=2), F.relu,
            L.Convolution2D(None, 64, 3, stride=1), F.relu,
            L.Linear(None, 512), F.relu,
            chainerrl.links.Branched(
                chainer.Sequential(
                    L.Linear(None, n_actions, initialW=winit_last),
                    chainerrl.distribution.SoftmaxDistribution,
                ),
                L.Linear(None, 1),
            ))

    # Draw the computational graph and save it in the output directory.
    fake_obss = np.zeros(sample_env.observation_space.shape,
                         dtype=np.float32)[None]
    if args.recurrent:
        fake_out, _ = model(fake_obss, None)
    else:
        fake_out = model(fake_obss)
    chainerrl.misc.draw_computational_graph([fake_out],
                                            os.path.join(args.outdir, 'model'))

    opt = chainer.optimizers.Adam(alpha=args.lr, eps=1e-5)
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(0.5))

    def phi(x):
        # Feature extractor
        return np.asarray(x, dtype=np.float32) / 255

    agent = PPO(
        model,
        opt,
        gpu=args.gpu,
        phi=phi,
        update_interval=args.update_interval,
        minibatch_size=args.batchsize,
        epochs=args.epochs,
        clip_eps=0.1,
        clip_eps_vf=None,
        standardize_advantages=True,
        entropy_coef=1e-2,
        recurrent=args.recurrent,
    )
    if args.load:
        agent.load(args.load)

    if args.demo:
        eval_stats = experiments.eval_performance(
            env=make_batch_env(test=True),
            agent=agent,
            n_steps=None,
            n_episodes=args.eval_n_runs)
        print('n_runs: {} mean: {} median: {} stdev: {}'.format(
            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
            eval_stats['stdev']))
    else:
        step_hooks = []

        # Linearly decay the learning rate to zero
        def lr_setter(env, agent, value):
            agent.optimizer.alpha = value

        step_hooks.append(
            experiments.LinearInterpolationHook(args.steps, args.lr, 0,
                                                lr_setter))

        experiments.train_agent_batch_with_evaluation(
            agent=agent,
            env=make_batch_env(False),
            eval_env=make_batch_env(True),
            outdir=args.outdir,
            steps=args.steps,
            eval_n_steps=None,
            eval_n_episodes=args.eval_n_runs,
            checkpoint_freq=args.checkpoint_frequency,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_best_so_far_agent=False,
            step_hooks=step_hooks,
        )

예제 #28

0

파일 보기

def main(args):
    import logging
    logging.basicConfig(level=logging.INFO, filename='log')

    if(type(args) is list):
        args=make_args(args)
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # Set a random seed used in ChainerRL.
    misc.set_random_seed(args.seed, gpus=(args.gpu,))

    # Set different random seeds for different subprocesses.
    # If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
    # If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
    process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
    assert process_seeds.max() < 2 ** 32

    def make_env(idx, test):
        # Use different random seeds for train and test envs
        process_seed = int(process_seeds[idx])
        env_seed = 2 ** 32 - 1 - process_seed if test else process_seed
        env = atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
            episode_life=not test,
            clip_rewards=not test,
            flicker=args.flicker,
            frame_stack=not args.no_frame_stack,
        )
        env.seed(env_seed)
        if args.monitor:
            env = chainerrl.wrappers.Monitor(
                env, args.outdir,
                mode='evaluation' if test else 'training')
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env

    def make_env_check():
        # Use different random seeds for train and test envs
        env_seed = args.seed
        env = atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
            episode_life=True,
            clip_rewards=True)
        env.seed(int(env_seed))
        return env

    def make_batch_env(test):
        return chainerrl.envs.MultiprocessVectorEnv(
            [(lambda: make_env(idx, test))
             for idx, env in enumerate(range(args.num_envs))])

    sample_env = make_env(0, test=False)
    print('Observation space', sample_env.observation_space)
    print('Action space', sample_env.action_space)
    n_actions = sample_env.action_space.n

    winit_last = chainer.initializers.LeCunNormal(1e-2)
    if args.recurrent:
        model = chainerrl.links.StatelessRecurrentSequential(
            L.Convolution2D(None, 32, 8, stride=4),
            F.relu,
            L.Convolution2D(None, 64, 4, stride=2),
            F.relu,
            L.Convolution2D(None, 64, 3, stride=1),
            F.relu,
            L.Linear(None, 512),
            F.relu,
            L.NStepGRU(1, 512, 512, 0),
            chainerrl.links.Branched(
                chainer.Sequential(
                    L.Linear(None, n_actions, initialW=winit_last),
                    chainerrl.distribution.SoftmaxDistribution,
                ),
                L.Linear(None, 1),
            )
        )
    else:
        model = chainer.Sequential(
            L.Convolution2D(None, 32, 8, stride=4),
            F.relu,
            L.Convolution2D(None, 64, 4, stride=2),
            F.relu,
            L.Convolution2D(None, 64, 3, stride=1),
            F.relu,
            L.Linear(None, 512),
            F.relu,
            chainerrl.links.Branched(
                chainer.Sequential(
                    L.Linear(None, n_actions, initialW=winit_last),
                    chainerrl.distribution.SoftmaxDistribution,
                ),
                L.Linear(None, 1),
            )
        )

    # Draw the computational graph and save it in the output directory.
    fake_obss = np.zeros(
        sample_env.observation_space.shape, dtype=np.float32)[None]
    if args.recurrent:
        fake_out, _ = model(fake_obss, None)
    else:
        fake_out = model(fake_obss)
    chainerrl.misc.draw_computational_graph(
        [fake_out], os.path.join(args.outdir, 'model'))

    opt = chainer.optimizers.Adam(alpha=args.lr, eps=1e-5)
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(0.5))

    def phi(x):
        # Feature extractor
        return np.asarray(x, dtype=np.float32) / 255

    agent = PPO(
        model,
        opt,
        gpu=args.gpu,
        phi=phi,
        update_interval=args.update_interval,
        minibatch_size=args.batchsize,
        epochs=args.epochs,
        clip_eps=0.1,
        clip_eps_vf=None,
        standardize_advantages=True,
        entropy_coef=1e-2,
        recurrent=args.recurrent,
    )

    if args.load_agent:
        agent.load(args.load_agent)

    if (args.mode=='train'):
        step_hooks = []
        # Linearly decay the learning rate to zero
        def lr_setter(env, agent, value):
            agent.optimizer.alpha = value

        step_hooks.append(
            experiments.LinearInterpolationHook(
                args.steps, args.lr, 0, lr_setter))

        experiments.train_agent_batch_with_evaluation(
            agent=agent,
            env=make_batch_env(False),
            eval_env=make_batch_env(True),
            outdir=args.outdir,
            steps=args.steps,
            eval_n_steps=None,
            eval_n_episodes=args.eval_n_runs,
            step_offset=args.step_offset,
            checkpoint_freq=args.checkpoint_frequency,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_best_so_far_agent=False,
            step_hooks=step_hooks,
            log_type=args.log_type
        )
    elif (args.mode=='check'):
        return tools.make_video.check(env=make_env_check(),agent=agent,save_mp4=args.save_mp4)

    elif (args.mode=='growth'):
        return tools.make_video.growth(env=make_env_check(),agent=agent,outdir=args.outdir,max_num=args.max_frames,save_mp4=args.save_mp4)

예제 #29

0

파일 보기

파일: test_stateless_recurrent_branched.py 프로젝트: daniellawson9999/quick_start

    def _test_mask_recurrent_state_at(self, gpu):
        in_size = 2
        out0_size = 2
        out1_size = 3
        par = StatelessRecurrentBranched(
            L.NStepGRU(1, in_size, out0_size, 0),
            StatelessRecurrentSequential(L.NStepLSTM(1, in_size, out1_size,
                                                     0), ),
        )
        if gpu >= 0:
            chainer.cuda.get_device_from_id(gpu).use()
            par.to_gpu()
        xp = par.xp
        seqs_x = [
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
            xp.random.uniform(-1, 1, size=(2, in_size)).astype(np.float32),
        ]
        transposed_x = F.transpose_sequence(seqs_x)

        nstep_out, nstep_rs = par.n_step_forward(seqs_x,
                                                 None,
                                                 output_mode='concat')

        # Check if n_step_forward and forward twice results are same
        def no_mask_forward_twice():
            _, rs = par(transposed_x[0], None)
            return par(transposed_x[1], rs)

        nomask_out, nomask_rs = no_mask_forward_twice()
        # GRU
        xp.testing.assert_allclose(
            nstep_out[0].array[[1, 3]],
            nomask_out[0].array,
        )
        # LSTM
        xp.testing.assert_allclose(
            nstep_out[1].array[[1, 3]],
            nomask_out[1].array,
        )
        xp.testing.assert_allclose(nstep_rs[0].array, nomask_rs[0].array)
        self.assertIsInstance(nomask_rs[1], tuple)
        self.assertEqual(len(nomask_rs[1]), 1)
        self.assertEqual(len(nomask_rs[1][0]), 2)
        xp.testing.assert_allclose(nstep_rs[1][0][0].array,
                                   nomask_rs[1][0][0].array)
        xp.testing.assert_allclose(nstep_rs[1][0][1].array,
                                   nomask_rs[1][0][1].array)

        # 1st-only mask forward twice: only 2nd should be the same
        def mask0_forward_twice():
            _, rs = par(transposed_x[0], None)
            rs = par.mask_recurrent_state_at(rs, 0)
            return par(transposed_x[1], rs)

        mask0_out, mask0_rs = mask0_forward_twice()
        # GRU
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[1],
                mask0_out[0].array[0],
            )
        xp.testing.assert_allclose(
            nstep_out[0].array[3],
            mask0_out[0].array[1],
        )
        # LSTM
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[1],
                mask0_out[1].array[0],
            )
        xp.testing.assert_allclose(
            nstep_out[1].array[3],
            mask0_out[1].array[1],
        )

        # 2nd-only mask forward twice: only 1st should be the same
        def mask1_forward_twice():
            _, rs = par(transposed_x[0], None)
            rs = par.mask_recurrent_state_at(rs, 1)
            return par(transposed_x[1], rs)

        mask1_out, mask1_rs = mask1_forward_twice()
        # GRU
        xp.testing.assert_allclose(
            nstep_out[0].array[1],
            mask1_out[0].array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[3],
                mask1_out[0].array[1],
            )
        # LSTM
        xp.testing.assert_allclose(
            nstep_out[1].array[1],
            mask1_out[1].array[0],
        )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[3],
                mask1_out[1].array[1],
            )

        # both 1st and 2nd mask forward twice: both should be different
        def mask01_forward_twice():
            _, rs = par(transposed_x[0], None)
            rs = par.mask_recurrent_state_at(rs, [0, 1])
            return par(transposed_x[1], rs)

        mask01_out, mask01_rs = mask01_forward_twice()
        # GRU
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[1],
                mask01_out[0].array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[0].array[3],
                mask01_out[0].array[1],
            )
        # LSTM
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[1],
                mask01_out[1].array[0],
            )
        with self.assertRaises(AssertionError):
            xp.testing.assert_allclose(
                nstep_out[1].array[3],
                mask01_out[1].array[1],
            )

        # get and concat recurrent states and resume forward
        def get_and_concat_rs_forward():
            _, rs = par(transposed_x[0], None)
            rs0 = par.get_recurrent_state_at(rs, 0, unwrap_variable=True)
            rs1 = par.get_recurrent_state_at(rs, 1, unwrap_variable=True)
            concat_rs = par.concatenate_recurrent_states([rs0, rs1])
            return par(transposed_x[1], concat_rs)

        getcon_out, getcon_rs = get_and_concat_rs_forward()
        # GRU
        xp.testing.assert_allclose(
            nstep_out[0].array[1],
            getcon_out[0].array[0],
        )
        xp.testing.assert_allclose(
            nstep_out[0].array[3],
            getcon_out[0].array[1],
        )
        # LSTM
        xp.testing.assert_allclose(
            nstep_out[1].array[1],
            getcon_out[1].array[0],
        )
        xp.testing.assert_allclose(
            nstep_out[1].array[3],
            getcon_out[1].array[1],
        )