예제 #1
0
def make_stacked_gru(input_dim, hidden_dim, out_dim, layer_num):
    grus = [L.StatefulGRU(input_dim, hidden_dim)]
    grus.extend([
        L.StatefulGRU(hidden_dim * 2, hidden_dim) for _ in range(layer_num - 2)
    ])
    grus.append(L.StatefulGRU(hidden_dim * 2, out_dim))
    return chainer.ChainList(*grus)
예제 #2
0
    def __init__(self, n_units, n_vocab, n_turn,
                 share=False, listener=False, speaker=False):
        if share:
            super(NaiveLanguage, self).__init__(
                definition=L.EmbedID(n_vocab, n_units),
                interpreter=L.StatefulGRU(n_units, n_units),
                decoder=L.StatefulGRU(n_units, n_units),
            )
        elif listener:
            super(NaiveLanguage, self).__init__(
                definition=L.EmbedID(n_vocab, n_units),
                interpreter=L.StatefulGRU(n_units, n_units),
            )
        elif speaker:
            super(NaiveLanguage, self).__init__(
                definition=L.EmbedID(n_vocab, n_units),
                decoder=L.StatefulGRU(n_units, n_units),
            )
        else:
            print('choose language type. [share, listener, speaker]')
            exit()

        self.n_vocab = n_vocab
        self.n_units = n_units

        self.add_param('eos', (n_units,), dtype='f')
        self.eos.data[:] = 0
        self.add_param('bos', (n_units,), dtype='f')
        self.bos.data[:] = 0
예제 #3
0
 def __init__(self, n_layer, input_dim, n_units, dropout=0.2, cudnn=False):
     super(SMARNN,
           self).__init__(W_vp=L.Linear(input_dim, n_units),
                          W_vpa=L.Linear(n_units, n_units),
                          W_v=L.Linear(n_units, 1),
                          W_f_gru=L.StatefulGRU(input_dim * 2, n_units),
                          W_b_gru=L.StatefulGRU(input_dim * 2, n_units))
예제 #4
0
 def __init__(self, n_vocab, n_units, train=True):
     super(RNNLMGRU, self).__init__(
         embed=L.EmbedID(n_vocab, n_units),
         l1=L.StatefulGRU(n_units, n_units),
         l2=L.StatefulGRU(n_units, n_units),
         l3=L.Linear(n_units, n_vocab),
     )
     self.train = train
 def __init__(self):
     super(Encoder_Decoder_Model, self).__init__()
     with self.init_scope():
         self.hidden1 = L.Linear(4, 6)
         self.encoder = L.StatefulGRU(6, 2)
         self.hidden2 = L.Linear(4, 6)
         self.decoder = L.StatefulGRU(6, 2)
         self.output = L.Linear(2, 4)
예제 #6
0
 def __init__(self, n_layer, input_dim, n_units, batch_size, dropout = 0.2, cudnn = False):
     super(MGRU, self).__init__(
         W_up=L.Linear(input_dim, n_units/2),
         W_vp=L.Linear(n_units/2, n_units/2),
         W_uq = L.Linear(input_dim, n_units/2),
         W_v = L.Linear(n_units/2,1),
         #W_g = L.Linear(input_dim * 2, input_dim * 2),
         W_gru_f = L.StatefulGRU(input_dim * 2, n_units/2),
         W_gru_b = L.StatefulGRU(input_dim * 2, n_units/2)
         )
     self.batch_size = batch_size
     self.n_units = n_units
예제 #7
0
    def __init__(self, n_units, n_vocab, agent_type='sender'):
        super(Language, self).__init__(
            meanings=L.EmbedID(n_vocab, n_units))

        if agent_type == 'sender':
            self.add_link('decoder', L.StatefulGRU(n_units, n_units))
            self.add_param('bos', (n_units,), dtype='f')
            self.bos.data[:] = 0
        else:
            self.add_link('encoder', L.StatefulGRU(n_units, n_units))

        self.n_vocab = n_vocab
        self.n_units = n_units
예제 #8
0
 def __init__(self, n_layer, input_dim, n_units, dropout=0.2, cudnn=False):
     super(OutputLayer,
           self).__init__(Wp_h=L.Linear(input_dim, n_units),
                          Wa_h=L.Linear(n_units, n_units),
                          W_v=L.Linear(n_units, 1),
                          W_vQVQ=L.Linear(100, 37),
                          W_f_gru=L.StatefulGRU(input_dim, n_units))
예제 #9
0
파일: example.py 프로젝트: rhythm92/gan-rl
 def __init__(self, obs_size, layer_sz, act_sz):
     super(RNNAgent, self).__init__(
         ipt=L.StatefulGRU(obs_size, layer_sz),
         out=L.Linear(layer_sz, act_sz),
     )
     self.noise_probability = 0.0  # probability to output noise
     self.action_size = act_sz
예제 #10
0
파일: example.py 프로젝트: rhythm92/gan-rl
 def __init__(self, x_sz, rand_sz, layer_sz, output_sz):
     super(GeneratorNet, self).__init__(
         ipt=L.StatefulGRU(x_sz + rand_sz, layer_sz),
         out=L.Linear(layer_sz, output_sz + 2),
     )
     self.rand_sz = rand_sz
     self.act_size = x_sz
     self.spec = fitter.EnvSpec(4)
예제 #11
0
 def __init__(self, n_layer, input_dim, n_units, batch_size, dropout = 0.2, cudnn = False):
     super(PointLayer, self).__init__(
         Wp_h = L.Linear(input_dim, n_units),
         Wa_h =L.Linear(n_units, n_units),
         W_v = L.Linear(input_dim,1), 
         #W_vQVQ = L.Linear(input_dim, q_max_word), 
         #W_rq = L.Linear(input_dim,n_units),
         W_f_gru = L.StatefulGRU(input_dim, n_units))
     self.n_units = n_units
     self.batch_size = batch_size
    def __init__(self, n_units, n_vocab):
        super(Language, self).__init__(
            definition=L.EmbedID(n_vocab, n_units),
            expression=L.Linear(n_units, n_vocab, nobias=True),
            interpreter=L.StatefulGRU(n_units, n_units),
            decoder=L.StatefulGRU(n_units, n_units),
            bn_first_interpreter=L.BatchNormalization(n_units,
                                                      use_cudnn=False),
            bn_next_interpreter=L.BatchNormalization(n_units, use_cudnn=False),
            bn_first_expression=L.BatchNormalization(n_vocab, use_cudnn=False),
            bn_next_expression=L.BatchNormalization(n_vocab, use_cudnn=False),
        )
        self.n_vocab = n_vocab
        self.n_units = n_units

        self.add_param('eos', (n_units, ), dtype='f')
        self.eos.data[:] = 0
        self.add_param('bos', (n_units, ), dtype='f')
        self.bos.data[:] = 0
예제 #13
0
 def __init__(self,
              n_layer,
              input_dim,
              n_units,
              batch_size,
              dropout=0.2,
              cudnn=False):
     super(GARNN,
           self).__init__(W_up=L.Linear(input_dim, n_units),
                          W_vp=L.Linear(n_units, n_units),
                          W_uq=L.Linear(input_dim, n_units),
                          W_v=L.Linear(n_units, 1),
                          W_g=L.Linear(input_dim * 2, input_dim * 2),
                          W_gru=L.StatefulGRU(input_dim * 2, n_units))
     self.batch_size = batch_size
예제 #14
0
 def __init__(self, dim):
     super(GRULearner, self).__init__(gru0=L.StatefulGRU(dim, dim), )
예제 #15
0
 def __init__(self, v, eos_id, k):
     super(Gru, self).__init__(embed=L.EmbedID(v, k),
                               H=L.StatefulGRU(k, k),
                               W=L.Linear(k, v))
     self.eos_id = eos_id
예제 #16
0
 def setUp(self):
     in_size, out_size = 10, 8
     self.link = links.StatefulGRU(in_size, out_size)
     self.h = chainer.Variable(
         numpy.random.uniform(-1, 1, (3, out_size)).astype(numpy.float32))
예제 #17
0
 def __init__(self):
     super(Tempture_NN, self).__init__()
     with self.init_scope():
         self.c1 = L.StatefulGRU(1, 32)
         self.c2 = L.StatefulGRU(32, 32)
         self.c3 = L.StatefulGRU(32, 10)
예제 #18
0
    def __init__(self, conf, name="mono"):
        self.name = name
        conf.check()
        wscale = 0.1

        forward_lstm_attributes = {}
        forward_lstm_units = [(conf.ndim_char_embed, conf.lstm_hidden_units[0])
                              ]
        forward_lstm_units += zip(conf.lstm_hidden_units[:-1],
                                  conf.lstm_hidden_units[1:])

        for i, (n_in, n_out) in enumerate(forward_lstm_units):
            if conf.rnn_type == "dsgu":
                forward_lstm_attributes["layer_%i" % i] = StatefulDSGU(
                    n_in, n_out)
            elif conf.rnn_type == "lstm":
                if conf.lstm_apply_batchnorm:
                    forward_lstm_attributes["layer_%i" % i] = BNLSTM(
                        n_in, n_out)
                else:
                    forward_lstm_attributes["layer_%i" % i] = L.LSTM(
                        n_in, n_out)
            elif conf.rnn_type == "gru":
                forward_lstm_attributes["layer_%i" % i] = L.StatefulGRU(
                    n_in, n_out)
            else:
                raise NotImplementedError()

        self.forward_lstm = StackedLSTM(**forward_lstm_attributes)
        self.forward_lstm.n_layers = len(forward_lstm_units)
        self.forward_lstm.apply_dropout = conf.lstm_apply_dropout

        backward_lstm_attributes = {}
        backward_lstm_units = [(conf.ndim_char_embed,
                                conf.lstm_hidden_units[0])]
        backward_lstm_units += zip(conf.lstm_hidden_units[:-1],
                                   conf.lstm_hidden_units[1:])

        for i, (n_in, n_out) in enumerate(backward_lstm_units):
            if conf.rnn_type == "dsgu":
                backward_lstm_attributes["layer_%i" % i] = StatefulDSGU(
                    n_in, n_out)
            elif conf.rnn_type == "lstm":
                if conf.lstm_apply_batchnorm:
                    backward_lstm_attributes["layer_%i" % i] = BNLSTM(
                        n_in, n_out)
                else:
                    backward_lstm_attributes["layer_%i" % i] = L.LSTM(
                        n_in, n_out)
            elif conf.rnn_type == "gru":
                backward_lstm_attributes["layer_%i" % i] = L.StatefulGRU(
                    n_in, n_out)
            else:
                raise NotImplementedError()

        self.backward_lstm = StackedLSTM(**backward_lstm_attributes)
        self.backward_lstm.n_layers = len(backward_lstm_units)
        self.backward_lstm.apply_dropout = conf.lstm_apply_dropout

        self.char_embed = L.EmbedID(conf.n_vocab,
                                    conf.ndim_char_embed,
                                    ignore_label=-1)

        self.f_ym = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_m,
                             nobias=True)
        self.f_um = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_m,
                             nobias=True)

        attention_fc_attributes = {}
        if len(conf.attention_fc_hidden_units) == 0:
            attention_fc_hidden_units = [(conf.ndim_m, 1)]
        else:
            attention_fc_hidden_units = [(conf.ndim_m,
                                          conf.attention_fc_hidden_units[0])]
            attention_fc_hidden_units += zip(
                conf.attention_fc_hidden_units[:-1],
                conf.attention_fc_hidden_units[1:])
            attention_fc_hidden_units += [(conf.attention_fc_hidden_units[-1],
                                           1)]
        for i, (n_in, n_out) in enumerate(attention_fc_hidden_units):
            attention_fc_attributes["layer_%i" % i] = L.Linear(n_in,
                                                               n_out,
                                                               wscale=wscale)
        self.attention_fc = FullyConnectedNetwork(**attention_fc_attributes)
        self.attention_fc.n_layers = len(attention_fc_hidden_units)
        self.attention_fc.hidden_activation_function = conf.attention_fc_hidden_activation_function
        self.attention_fc.output_activation_function = conf.attention_fc_output_activation_function
        self.attention_fc.apply_dropout = conf.attention_fc_apply_dropout

        self.f_rg = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_g,
                             nobias=True)
        self.f_ug = L.Linear(conf.lstm_hidden_units[-1],
                             conf.ndim_g,
                             nobias=True)

        reader_fc_attributes = {}
        if len(conf.reader_fc_hidden_units) == 0:
            reader_fc_hidden_units = [(conf.ndim_g, conf.n_vocab)]
        else:
            reader_fc_hidden_units = [(conf.ndim_g,
                                       conf.reader_fc_hidden_units[0])]
            reader_fc_hidden_units += zip(conf.reader_fc_hidden_units[:-1],
                                          conf.reader_fc_hidden_units[1:])
            reader_fc_hidden_units += [(conf.reader_fc_hidden_units[-1],
                                        conf.n_vocab)]
        for i, (n_in, n_out) in enumerate(reader_fc_hidden_units):
            reader_fc_attributes["layer_%i" % i] = L.Linear(n_in,
                                                            n_out,
                                                            wscale=wscale)
        self.reader_fc = FullyConnectedNetwork(**reader_fc_attributes)
        self.reader_fc.n_layers = len(reader_fc_hidden_units)
        self.reader_fc.hidden_activation_function = conf.reader_fc_hidden_activation_function
        self.reader_fc.output_activation_function = conf.reader_fc_output_activation_function
        self.reader_fc.apply_dropout = conf.attention_fc_apply_dropout

        if conf.use_gpu:
            self.forward_lstm.to_gpu()
            self.backward_lstm.to_gpu()
            self.char_embed.to_gpu()
            self.attention_fc.to_gpu()
            self.reader_fc.to_gpu()
            self.f_ym.to_gpu()
            self.f_um.to_gpu()
            self.f_rg.to_gpu()
            self.f_ug.to_gpu()

        self.optimizer_char_embed = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_char_embed.setup(self.char_embed)
        self.optimizer_char_embed.add_hook(GradientClipping(10.0))

        self.optimizer_forward_lstm = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_forward_lstm.setup(self.forward_lstm)
        self.optimizer_forward_lstm.add_hook(GradientClipping(10.0))

        self.optimizer_backward_lstm = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_backward_lstm.setup(self.backward_lstm)
        self.optimizer_backward_lstm.add_hook(GradientClipping(10.0))

        self.optimizer_f_um = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_um.setup(self.f_um)
        self.optimizer_f_um.add_hook(GradientClipping(10.0))

        self.optimizer_f_ym = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_ym.setup(self.f_ym)
        self.optimizer_f_ym.add_hook(GradientClipping(10.0))

        self.optimizer_attention_fc = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_attention_fc.setup(self.attention_fc)
        self.optimizer_attention_fc.add_hook(GradientClipping(10.0))

        self.optimizer_f_rg = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_rg.setup(self.f_rg)
        self.optimizer_f_rg.add_hook(GradientClipping(10.0))

        self.optimizer_f_ug = optimizers.Adam(alpha=conf.learning_rate,
                                              beta1=conf.gradient_momentum)
        self.optimizer_f_ug.setup(self.f_ug)
        self.optimizer_f_ug.add_hook(GradientClipping(10.0))

        self.optimizer_reader_fc = optimizers.Adam(
            alpha=conf.learning_rate, beta1=conf.gradient_momentum)
        self.optimizer_reader_fc.setup(self.reader_fc)
        self.optimizer_reader_fc.add_hook(GradientClipping(10.0))
예제 #19
0
        self.f = F.sigmoid(f)
        self.o = F.sigmoid(o)

        self.c = self.a * self.i + self.f * c
        self.h = self.o * F.tanh(self.c)
        return self.h



if __name__ == "__main__":
    import numpy, chainer, chainer.functions as F
    from chainer import links as L

    x = chainer.Variable(numpy.ones((10, 10), dtype=numpy.float32))
    # gru = TreeLSTM(lateral_init=1, upward_init=1,bias_init=0, forget_bias_init=0,in_size=10, out_size=10,children=2)
    gru = L.StatefulGRU(bias_init=0,inner_init=1,in_size=10, out_size=10)
    lstm = L.LSTM(lateral_init=1, upward_init=1,
                 bias_init=0, forget_bias_init=0,in_size=10, out_size=10)
    y1 = gru(x)[1]
    y2 = lstm(x)
    y3 = F.sigmoid(x)
    y3 += F.sigmoid(x)

    print(y1.data)
    print(y2.data)

    print(y1.data.shape)
    print(y2.data.shape)

    print(numpy.allclose(y1.data,y2.data))
    y3.backward()
예제 #20
0
 def __init__(self, v, k):
     super(MyGRU, self).__init__(
         embed=L.EmbedID(v, k),
         H=L.StatefulGRU(k, k),
         W=L.Linear(k, v),
     )
예제 #21
0
파일: example.py 프로젝트: rhythm92/gan-rl
 def __init__(self, action_size, observation_size, layer_sz):
     super(DiscriminatorNet, self).__init__(
         ipt=L.StatefulGRU(action_size + observation_size + 2,
                           layer_sz),  # the first linear layer
         out=L.Linear(layer_sz, 1),  # the feed-forward output layer
     )