Esempio n. 1
0
    def __init__(self,
                 src_vocab_size,
                 input_size,
                 output_size,
                 bidirectional=False,
                 with_ln=False,
                 prefix='Encoder', **kwargs):

        super(Encoder, self).__init__()

        self.output_size = output_size
        f = lambda name: str_cat(prefix, name)  # return 'Encoder_' + parameters name

        self.src_lookup_table = nn.Embedding(src_vocab_size, wargs.src_wemb_size, padding_idx=PAD)

        if wargs.enc_rnn_type == 'gru':
            self.forw_gru = GRU(input_size, output_size, with_ln=with_ln, prefix=f('Forw'))
            self.back_gru = GRU(output_size, output_size, with_ln=with_ln, prefix=f('Back'))
        elif wargs.enc_rnn_type == 'sru':
            self.rnn = SRU(
                    input_size=input_size,
                    hidden_size=output_size,
                    num_layers=wargs.enc_layer_cnt,
                    dropout=wargs.drop_rate,
                    bidirectional=bidirectional)
Esempio n. 2
0
    def __init__(self,
                 src_vocab_size,
                 input_size,
                 output_size,
                 with_ln=False,
                 prefix='Encoder',
                 **kwargs):

        super(Encoder, self).__init__()

        self.output_size = output_size
        f = lambda name: str_cat(prefix, name
                                 )  # return 'Encoder_' + parameters name

        self.src_lookup_table = nn.Embedding(src_vocab_size,
                                             wargs.src_wemb_size,
                                             padding_idx=PAD)

        self.forw_gru = GRU(input_size,
                            output_size,
                            with_ln=with_ln,
                            prefix=f('Forw'))
        self.back_gru = GRU(output_size,
                            output_size,
                            with_ln=with_ln,
                            prefix=f('Back'))
Esempio n. 3
0
    def __init__(self,
                 src_vocab_size,
                 input_size,
                 output_size,
                 with_ln=False,
                 prefix='Encoder',
                 **kwargs):

        super(Encoder, self).__init__()

        self.output_size = output_size
        f = lambda name: str_cat(prefix, name
                                 )  # return 'Encoder_' + parameters name

        self.src_lookup_table = nn.Embedding(src_vocab_size,
                                             wargs.src_wemb_size,
                                             padding_idx=PAD)

        self.forw_gru = GRU(input_size,
                            output_size,
                            with_ln=with_ln,
                            prefix=f('Forw'))

        #self.relay0 = RelationLayer(output_size, output_size, wargs.filter_window_size,
        #                            wargs.filter_feats_size, wargs.mlp_size)
        #self.laynorm0 = LayerNormalization(wargs.enc_hid_size)

        self.back_gru = GRU(output_size,
                            output_size,
                            with_ln=with_ln,
                            prefix=f('Back'))

        self.rn = RelationLayer(output_size, output_size,
                                wargs.filter_window_size,
                                wargs.filter_feats_size, wargs.mlp_size)
Esempio n. 4
0
    def __init__(self,
                 trg_vocab_size,
                 trg_lookup_table,
                 max_out=True,
                 com=True):

        super(Decoder, self).__init__()

        self.max_out = max_out
        self.attention = Attention(
            wargs.dec_hid_size if com else wargs.dec_hid_size_pri,
            wargs.align_size if com else wargs.align_size_pri)
        self.trg_lookup_table = trg_lookup_table
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.gru1 = GRU(
            wargs.trg_wemb_size if com else wargs.trg_wemb_size_pri,
            wargs.dec_hid_size if com else wargs.dec_hid_size_pri)
        self.gru2 = GRU(wargs.enc_hid_size if com else wargs.enc_hid_size_pri,
                        wargs.dec_hid_size if com else wargs.dec_hid_size_pri)

        out_size = 2 * wargs.out_size if max_out else wargs.out_size
        self.ls = nn.Linear(
            wargs.dec_hid_size if com else wargs.dec_hid_size_pri, out_size)
        self.ly = nn.Linear(
            wargs.trg_wemb_size if com else wargs.trg_wemb_size_pri, out_size)
        self.lc = nn.Linear(
            wargs.enc_hid_size if com else wargs.enc_hid_size_pri, out_size)
Esempio n. 5
0
    def __init__(self, trg_vocab_size, with_ln=False, max_out=True):

        super(Decoder, self).__init__()

        self.max_out = max_out
        self.attention = Attention(wargs.dec_hid_size, wargs.align_size)
        self.trg_lookup_table = nn.Embedding(trg_vocab_size, wargs.trg_wemb_size, padding_idx=PAD)
        self.tanh = nn.Tanh()

        if wargs.dec_rnn_type == 'gru':
            self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size, with_ln=with_ln)
            self.gru2 = GRU(wargs.enc_hid_size, wargs.dec_hid_size, with_ln=with_ln)
        elif wargs.dec_rnn_type == 'sru':
            self.gru1 = SRU(input_size=wargs.trg_wemb_size, hidden_size=wargs.dec_hid_size,
                    num_layers=wargs.dec_layer_cnt, dropout=0., bidirectional=False)
            self.gru2 = SRU(input_size=2*wargs.enc_hid_size, hidden_size=wargs.dec_hid_size,
                    num_layers=wargs.dec_layer_cnt, dropout=0., bidirectional=False)

        out_size = 2 * wargs.out_size if max_out else wargs.out_size
        self.ls = nn.Linear(wargs.dec_hid_size, out_size)
        self.ly = nn.Linear(wargs.trg_wemb_size, out_size)
        self.lc = nn.Linear(2*wargs.enc_hid_size, out_size)

        self.classifier = Classifier(wargs.out_size, trg_vocab_size,
                                     self.trg_lookup_table if wargs.proj_share_weight is True else None)
Esempio n. 6
0
    def __init__(self,
                 src_vocab_size,
                 d_in,
                 d_out,
                 with_ln=False,
                 prefix='Encoder', **kwargs):

        super(Encoder, self).__init__()

        self.d_out = d_out
        f = lambda name: str_cat(prefix, name)  # return 'Encoder_' + parameters name

        self.src_lookup_table = nn.Embedding(src_vocab_size, d_in, padding_idx=PAD)

        self.forw_gru = GRU(d_in, d_out, with_ln=with_ln, prefix=f('Forw'))

        self.rnlay0 = RelationLayer(d_out, d_out, wargs.fltr_windows,
                                    wargs.d_fltr_feats, wargs.d_mlp)
        #self.map_in_out = nn.Linear(d_in, d_out)
        #self.laynorm0 = Layer_Norm(wargs.enc_hid_size)

        self.back_gru = GRU(d_out, d_out, with_ln=with_ln, prefix=f('Back'))

        self.rnlay1 = RelationLayer(d_out, d_out, wargs.fltr_windows,
                                    wargs.d_fltr_feats, wargs.d_mlp)
        #self.laynorm1 = LayerNormalization(wargs.enc_hid_size)
        #self.dropout = nn.Dropout(0.1)

        self.down0 = nn.Linear(d_in + d_out, d_out)
        self.down1 = nn.Linear(d_in + 2 * d_out, d_out)
        self.down2 = nn.Linear(d_in + 3 * d_out, d_out)
        self.down3 = nn.Linear(d_in + 4 * d_out, d_out)
Esempio n. 7
0
class CHAR_RNN(nn.Module):
    def __init__(self,
                 vocab_size,
                 hidden_size=256,
                 lr=2e-3,
                 rnn='gru',
                 sampling='sample'):
        super(CHAR_RNN, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.sampling = sampling

        if rnn == 'rnn':
            self.rnn = RNN(self.vocab_size, self.hidden_size)
        elif rnn == 'gru':
            self.rnn = GRU(self.vocab_size, self.hidden_size)
        else:
            raise NotImplementedError()

        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, idxs):
        pred = self.rnn(idxs)
        return pred

    def lossFn(self, inputs, targets):
        loss = torch.tensor(0.)
        for in_idxs, trg_idxs in zip(inputs.transpose(0, 1),
                                     targets.transpose(0, 1)):
            trg_idxs = trg_idxs
            preds = self(in_idxs)
            loss += self.criterion(input=preds, target=trg_idxs)
        return loss

    def sample(self, seed_ix, n, ix_to_char):
        ixes = []
        self.init_hidden()
        current_ix = seed_ix
        for i in range(n):
            # sample
            if self.sampling == 'sample':
                probs = torch.softmax(self(current_ix), dim=-1)
                pred_ix = np.random.choice(len(probs),
                                           p=probs.detach().numpy())
            elif self.sampling == 'max':
                pred_ix = torch.argmax(self(current_ix))
            else:
                raise NotImplementedError()

            ixes.append(pred_ix)
            current_ix = torch.tensor(pred_ix)
        pred_chars = ''.join([ix_to_char[int(ix)] for ix in ixes])
        return pred_chars

    def init_hidden(self):
        self.rnn.init_hidden()
Esempio n. 8
0
    def __init__(self, trg_vocab_size, max_out=True):

        super(Decoder, self).__init__()

        self.max_out = max_out
        self.attention = Attention(wargs.dec_hid_size, wargs.align_size)
        self.trg_lookup_table = nn.Embedding(trg_vocab_size,
                                             wargs.trg_wemb_size,
                                             padding_idx=PAD)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size)
        #self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size, enc_hid_size=wargs.trg_wemb_size)
        self.gru2 = GRU(wargs.enc_hid_size, wargs.dec_hid_size)

        out_size = 2 * wargs.out_size if max_out else wargs.out_size
        self.ls = nn.Linear(wargs.dec_hid_size, out_size)
        self.ly = nn.Linear(wargs.trg_wemb_size, out_size)
        self.lc = nn.Linear(wargs.enc_hid_size, out_size)
        #self.map_vocab = nn.Linear(wargs.out_size, trg_vocab_size)

        self.classifier = Classifier(
            wargs.out_size, trg_vocab_size,
            self.trg_lookup_table if wargs.proj_share_weight is True else None)

        if wargs.dynamic_cyk_decoding is True:
            self.gru2 = GRU(wargs.trg_wemb_size,
                            wargs.dec_hid_size,
                            enc_hid_size=wargs.dec_hid_size)
            self.fwz = wargs.filter_window_size
            self.ffs = wargs.filter_feats_size

            #self.ha = nn.Linear(wargs.enc_hid_size, wargs.align_size)
            self.ha_btg = nn.Linear(wargs.enc_hid_size, wargs.align_size)
            self.U_att1 = nn.Linear(wargs.enc_hid_size, wargs.enc_hid_size)
            self.U_att2 = nn.Linear(wargs.enc_hid_size, wargs.enc_hid_size)
            #self.ha = nn.Sequential(
            #    nn.Linear(wargs.enc_hid_size, wargs.mlp_size),
            #nn.LeakyReLU(0.1),
            #nn.Linear(wargs.mlp_size, wargs.mlp_size),
            #nn.LeakyReLU(0.1),
            #    nn.Linear(wargs.mlp_size, wargs.align_size)
            #nn.LeakyReLU(0.1)
            #)

            for i in range(len(self.fwz)):
                self.l_f1_0 = nn.Linear(wargs.enc_hid_size, wargs.enc_hid_size)
                self.l_f1_1 = nn.Linear(wargs.enc_hid_size, wargs.enc_hid_size)
                self.l_conv = nn.Sequential(
                    nn.Conv1d(wargs.enc_hid_size,
                              self.ffs[i],
                              kernel_size=self.fwz[i],
                              stride=1),
                    nn.ReLU()
                    #nn.BatchNorm2d(self.ffs[i])
                )
                self.l_f2 = nn.Linear(self.ffs[i], wargs.enc_hid_size)
Esempio n. 9
0
    def __init__(self,
                 input_size,
                 output_size,
                 with_ln=False,
                 prefix='Encoder', **kwargs):

        super(Encoder, self).__init__()

        self.output_size = output_size
        f = lambda name: str_cat(prefix, name)  # return 'Encoder_' + parameters name

        self.forw_gru = GRU(input_size, output_size, with_ln=with_ln, prefix=f('Forw'))
        self.back_gru = GRU(output_size, output_size, with_ln=with_ln, prefix=f('Back'))
Esempio n. 10
0
    def build(self):
        print '\t building rnn cell...'
        if self.cell=='gru':
            hidden_layer=GRU(self.rng,
                             self.n_input,self.n_hidden,self.n_batch,
                             self.x,self.E,self.x_mask,
                             self.is_train,self.p)
        else:
            hidden_layer=LSTM(self.rng,
                              self.n_input,self.n_hidden,self.n_batch,
                              self.x,self.E,self.x_mask,
                              self.is_train,self.p)
        print '\t building softmax output layer...'
        softmax_shape=(self.n_hidden,self.n_output)
        output_layer=H_Softmax(softmax_shape,
                               hidden_layer.activation,
                               self.y_node,self.y_choice,self.y_bit_mask,self.y_mask)
        self.params=[self.E,]
        self.params+=hidden_layer.params
        self.params+=output_layer.params

        cost=output_layer.activation
        lr=T.scalar("lr")
        gparams=[T.clip(T.grad(cost,p),-10,10) for p in self.params]
        updates=sgd(self.params,gparams,lr)

        self.train=theano.function(inputs=[self.x,self.x_mask,self.y_node,self.y_choice,self.y_bit_mask,self.y_mask,self.n_batch,lr],
                                   outputs=cost,
                                   updates=updates,
                                   givens={self.is_train:np.cast['int32'](1)})

        self.test=theano.function(inputs=[self.x,self.x_mask,self.y_node,self.y_choice,self.y_bit_mask,self.y_mask,self.n_batch],
                                   outputs=cost,
                                   givens={self.is_train:np.cast['int32'](0)})
        '''
Esempio n. 11
0
    def __init__(self, trg_vocab_size, max_out=True):

        super(Decoder, self).__init__()

        self.max_out = max_out
        self.attention = Attention(wargs.dec_hid_size, wargs.align_size)
        self.trg_lookup_table = nn.Embedding(trg_vocab_size,
                                             wargs.trg_wemb_size, padding_idx=PAD)
        self.tanh = nn.Tanh()
        self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size)
        #self.gru2 = GRU(wargs.enc_hid_size, wargs.dec_hid_size, with_two_attents=True)
        self.gru2 = GRU(wargs.enc_hid_size, wargs.dec_hid_size)

        out_size = 2 * wargs.out_size if max_out else wargs.out_size
        self.ls = nn.Linear(wargs.dec_hid_size, out_size)
        self.ly = nn.Linear(wargs.trg_wemb_size, out_size)
        self.lc = nn.Linear(wargs.enc_hid_size, out_size)
Esempio n. 12
0
def create_model():
    if args.model_type == 'lstm':
        return LSTM(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'rnn':
        return RNN(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'irnn':
        return IRNN(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'gru':
        return GRU(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'rnn+':
        if args.layers == 1:
            args.layers = 2
        return IntersectionRNN(input_size=dset.input_dimension,
                                      hidden_size=args.hx,
                                      output_size=dset.output_dimension,
                                      layers=args.layers,
                                      drop=args.drop,
                                      rec_drop=args.rec_drop)
    elif args.model_type == 'peephole':
        return Peephole(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    elif args.model_type == 'ugrnn':
        return UGRNN(input_size=dset.input_dimension,
                                          hidden_size=args.hx,
                                          output_size=dset.output_dimension,
                                          layers=args.layers,
                                          drop=args.drop,
                                          rec_drop=args.rec_drop)
    else:
        raise Exception
Esempio n. 13
0
    def __init__(self, trg_vocab_size, max_out=True):

        super(Decoder, self).__init__()

        self.max_out = max_out
        self.attention = Attention(wargs.dec_hid_size, wargs.align_size)
        self.trg_lookup_table = nn.Embedding(trg_vocab_size, wargs.trg_wemb_size, padding_idx=PAD)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size)
        #self.gru1 = GRU(wargs.trg_wemb_size, wargs.dec_hid_size, enc_hid_size=wargs.trg_wemb_size)
        self.gru2 = GRU(wargs.enc_hid_size, wargs.dec_hid_size)

        out_size = 2 * wargs.out_size if max_out else wargs.out_size
        self.ls = nn.Linear(wargs.dec_hid_size, out_size)
        self.ly = nn.Linear(wargs.trg_wemb_size, out_size)
        self.lc = nn.Linear(wargs.enc_hid_size, out_size)

        self.classifier = Classifier(wargs.out_size, trg_vocab_size,
                                     self.trg_lookup_table if wargs.proj_share_weight is True else None)
Esempio n. 14
0
    def __init__(self,
                 vocab_size,
                 hidden_size=256,
                 lr=2e-3,
                 rnn='gru',
                 sampling='sample'):
        super(CHAR_RNN, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.sampling = sampling

        if rnn == 'rnn':
            self.rnn = RNN(self.vocab_size, self.hidden_size)
        elif rnn == 'gru':
            self.rnn = GRU(self.vocab_size, self.hidden_size)
        else:
            raise NotImplementedError()

        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.criterion = nn.CrossEntropyLoss()
Esempio n. 15
0
def run_model(which='all'):
    if which in ['ann', 'all', 'main', 'standard']:
        model = ANN(emb_size, vocab_size, hid_dim, hid_num, class_num,
                    sent_len).cuda()
        ann_loss = train(model, x, target, ann=True)
        plt.plot(ann_loss, label='ann')
    if which in ['wann', 'all', 'standard']:
        model = WANN(emb_size, vocab_size, hid_dim, hid_num, class_num,
                     sent_len).cuda()
        wann_loss = train(model, x, target, ann=True)
        plt.plot(wann_loss, label='wann')
    if which in ['rnn', 'all', 'main']:
        model = RNN(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda()
        rnn_loss = train(model, x, target)
        plt.plot(rnn_loss, label='rnn')
    if which in ['exrnn', 'all']:
        model = EXRNN(emb_size, vocab_size, hid_dim, hid_num, class_num, 2000,
                      2000).cuda()
        exrnn_loss = train(model, x, target)
        plt.plot(exrnn_loss, label='exrnn')
    if which in ['exmem', 'all']:
        model = EXRNN(emb_size,
                      vocab_size,
                      hid_dim,
                      hid_num,
                      class_num,
                      2000,
                      forget_dim=None).cuda()
        exmem_loss = train(model, x, target)
        plt.plot(exmem_loss, label='exmem')
    if which in ['lstm', 'all', 'main']:
        model = LSTM(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda()
        lstm_loss = train(model, x, target)
        plt.plot(lstm_loss, label='lstm')
    if which in ['gru', 'all', 'main']:
        model = GRU(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda()
        gru_loss = train(model, x, target)
        plt.plot(gru_loss, label='gru')
    # plt.ylim([0, 2])
    plt.legend()
    plt.grid(True)
    plt.show()
Esempio n. 16
0
    def build(self):
        print 'building rnn cell...'
        if self.cell == 'gru':
            hidden_layer = GRU(self.rng, self.n_input, self.n_hidden,
                               self.n_batch, self.x, self.E, self.x_mask,
                               self.is_train, self.p)
        else:
            hidden_layer = LSTM(self.rng, self.n_input, self.n_hidden,
                                self.n_batch, self.x, self.E, self.x_mask,
                                self.is_train, self.p)
        print 'building softmax output layer...'
        output_layer = level_softmax(self.n_hidden, self.n_output,
                                     hidden_layer.activation, self.y)
        cost = self.categorical_crossentropy(output_layer.activation)

        self.params = [
            self.E,
        ]
        self.params += hidden_layer.params
        self.params += output_layer.params

        lr = T.scalar("lr")
        gparams = [T.clip(T.grad(cost, p), -10, 10) for p in self.params]
        updates = sgd(self.params, gparams, lr)

        self.train = theano.function(
            inputs=[
                self.x, self.x_mask, self.y, self.y_mask, self.n_batch, lr
            ],
            outputs=cost,
            updates=updates,
            givens={self.is_train: np.cast['int32'](1)})

        self.predict = theano.function(
            inputs=[self.x, self.x_mask, self.n_batch],
            outputs=output_layer.predicted,
            givens={self.is_train: np.cast['int32'](0)})
        self.test = theano.function(
            inputs=[self.x, self.x_mask, self.y, self.y_mask, self.n_batch],
            outputs=cost,
            givens={self.is_train: np.cast['int32'](0)})
Esempio n. 17
0
def gru(x, a, rew, rnn_state, n_hidden, n, activation, output_size):
    hidden = tf.concat([x, a, rew], 1)
    # use layer normalization for gru
    gru_cell = GRU(n_hidden, activation=activation)
    #    gru_cell = tf.nn.rnn_cell.GRUCell(n_hidden, activation=activation, kernel_initializer=tf.initializers.orthogonal(), bias_initializer=tf.initializers.zeros())
    rnn_in = tf.expand_dims(hidden, [0])
    step_size = tf.minimum(tf.shape(rew)[:1], n)
    gru_outputs, gru_state = tf.nn.dynamic_rnn(gru_cell,
                                               rnn_in,
                                               initial_state=rnn_state,
                                               sequence_length=step_size,
                                               time_major=False)
    state_out = gru_state[:1, :]
    rnn_out = tf.reshape(gru_outputs, [-1, n_hidden])
    out = tf.layers.dense(
        rnn_out,
        units=output_size,
        kernel_initializer=tf.initializers.glorot_normal(),
        bias_initializer=tf.zeros_initializer(),
    )
    # layer normalization for dense layer
    norm_out = tf.contrib.layers.layer_norm(out)
    return norm_out, state_out
Esempio n. 18
0
    def train(self, epochs, learning_rate, kernel_size, hidden_size, model_cls,
              interaction, dropout):
        if model_cls == "cnn":
            model = CNN(embedding=self.data_train.vocab_embedding,
                        embedding_size=self.data_train.vocab_embedding_size,
                        lengths=self.data_train.lengths(),
                        kernel_size=kernel_size,
                        hidden_size=hidden_size,
                        interaction=interaction,
                        dropout=dropout)
        else:
            model = GRU(embedding=self.data_train.vocab_embedding,
                        embedding_size=self.data_train.vocab_embedding_size,
                        encoding_size=hidden_size,
                        interaction=interaction,
                        dropout=dropout)
        if self.use_gpu:
            model = model.cuda()

        loader = self.data_train.get_loader()
        loss_fn = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        losses = []
        accuracies = []
        for epoch in range(1, epochs + 1):
            e_loss = []
            print("\nStarting epoch {}".format(epoch))
            for i, (s1, s2, labels) in enumerate(loader):
                if self.use_gpu:
                    s1, s2, labels = s1.cuda(), s2.cuda(), labels.cuda()
                model.train()
                optimizer.zero_grad()

                # Forward pass
                logits = model(s1, s2)
                instance_loss = loss_fn(logits, labels)

                # Backward and optimize
                instance_loss.backward()
                optimizer.step()

                losses.append(instance_loss.item())
                e_loss.append(instance_loss.item())

                # validate every 100 iterations
                if i > 0 and i % 100 == 0:
                    val_acc = self.validate(model)
                    accuracies.append(val_acc)
                    print(
                        'Epoch: [{}/{}]\tStep: [{}/{}]\tValidation Acc: {:.4f}'
                        .format(epoch, epochs, i, len(loader), val_acc))
#             self.analyzer.plot_live_lr(e_loss, title="Epoch {}".format(epoch))

        avg_acc = sum(accuracies[-5:]) / 5

        self.analyzer.record(model.cpu(),
                             losses,
                             epochs=epochs,
                             accuracies=accuracies,
                             learning_rate=learning_rate,
                             hidden_size=hidden_size,
                             kernel_size=kernel_size,
                             validation_accuracy=avg_acc,
                             model_name=model_cls,
                             dropout=dropout,
                             interaction=interaction,
                             data_length=32 * len(loader))
        self.analyzer.print_validation_results(self, model_cls, model)
        print("Final Accuracy: {}".format(avg_acc))
Esempio n. 19
0
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text


haikus_dataset = sequences.map(split_input_target)

BATCH_SIZE = 64
BUFFER_SIZE = 1000

haikus_dataset = (haikus_dataset.shuffle(BUFFER_SIZE).batch(
    BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE))

model = GRU(vocab_size=len(ids_from_chars.get_vocabulary()),
            embedding_dim=256,
            rnn_units=1024)

for input_example_batch, target_example_batch in haikus_dataset.take(1):
    print(input_example_batch.shape,
          "# (batch_size, sequence_length, vocab_size)")

    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape,
          "# (batch_size, sequence_length, vocab_size)")

    example_sequence_input = input_example_batch[0]
    example_sequence_prediction_logits = example_batch_predictions[0]
    example_sequence_prediction_indice = tf.squeeze(tf.random.categorical(
        example_sequence_prediction_logits, num_samples=1),
                                                    axis=-1).numpy()
Esempio n. 20
0
print(INPUT_DATA_FILE)
PRINT_EVERY = int(os.environ.get("PRINT_EVERY", "25000"))

if not MODEL_OUTPUT_FILE:
    ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
    MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE,
                                                 EMBEDDING_DIM, HIDDEN_DIM)

# Load data
x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE,
                                                           VOCABULARY_SIZE,
                                                           max_sents=1000000)

if not FLAGS.print_sentences:
    # Build model
    model = GRU(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1)

    # Print SGD step time
    def sgd_callback(model, num_examples_seen):
        dt = datetime.now().isoformat()
        loss = model.calculate_loss(x_train[:10000], y_train[:10000])
        print("\n%s (%d)" % (dt, num_examples_seen))
        print("--------------------------------------------------")
        print("Loss: %f" % loss)
        generate_sentences_from_scratch(model, 10, index_to_word,
                                        word_to_index)
        save_model_parameters_theano(model, MODEL_OUTPUT_FILE)
        print("\n")
        sys.stdout.flush()

    for epoch in range(NEPOCH):
Esempio n. 21
0
def train(config, seed):
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    losses = []
    train_accuracies = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()
        losses.append(loss.item())

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)
        train_accuracies.append(accuracy)

        # print(predictions[0, ...], batch_targets[0, ...])

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

        # Stop early if the last 100 losses were all low enough
        if all(x < 0.001 for x in losses[-100:]):
            break

    print('Done training.')

    # evaluate the model on new random data
    model.eval()
    test_accuracies = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Forward pass
        with torch.no_grad():
            log_probs = model(batch_inputs)
            predictions = torch.argmax(log_probs, dim=1)
            correct = (predictions == batch_targets).sum().item()
            accuracy = correct / log_probs.size(0)
        test_accuracies.append(accuracy)

        if step >= 5000 / config.batch_size:
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

    return losses, train_accuracies, torch.tensor(
        test_accuracies).mean().item()
Esempio n. 22
0
model = RNN(session_layers=session_layers,
            user_layers=user_layers,
            loss=args.loss,
            item_embedding=args.item_embedding,
            init_item_embeddings=item_embedding_values,
            hidden_act=args.hidden_act,
            dropout_p_hidden_usr=args.dropout_p_hidden_usr,
            dropout_p_hidden_ses=args.dropout_p_hidden_ses,
            dropout_p_init=args.dropout_p_init,
            lmbd=args.lmbd,
            decay=args.decay,
            grad_cap=args.grad_cap,
            sigma=args.sigma,
            adapt=args.adapt,
            batch_size=args.batch_size,
            learning_rate=args.learning_rate,
            momentum=args.momentum,
            init_as_normal=bool(args.init_as_normal),
            reset_after_session=bool(args.reset_after_session),
            train_random_order=bool(args.train_random_order),
            n_epochs=args.n_epochs,
            user_key=args.user_key,
            session_key=args.session_key,
            item_key=args.item_key,
            time_key=args.time_key,
            seed=args.rnd_seed,
            user_to_session_act=args.user_to_ses_act,
            user_propagation_mode=args.user_propagation_mode,
            user_to_output=bool(args.user_to_output))
Esempio n. 23
0
def train(config, seed=0, seq_length=0):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if seq_length != 0:
        config.input_length = seq_length

    # Initialize tensorboard writer
    # writer = SummaryWriter()

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    loss_history = []
    acc_history = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)
        # print('log', log_probs.size())
        # print('batch', batch_targets.size)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)

        loss_history.append(loss.item())
        acc_history.append(accuracy)

        if step % 200 == 0:
            print('\nLoss:', loss.item())
            print('Acc:', accuracy)
        # writer.add_scalar("Loss", loss, step)
        # writer.add_scalar("Accuracy", accuracy, step)

        # print(predictions[0, ...], batch_targets[0, ...])

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break

    # writer.flush()
    # writer.close()
    print(f'Done training with seed {seed} and seq_length {seq_length}')
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])
    return loss_history, acc_history
    def fit(self, X, Y, activation=T.tanh, learning_rate=1e-1, mu=0.5, reg=0, epochs=120, show_fig=False):
        N, t, D = X.shape

        self.hidden_layers = []
        Mi = D
        for Mo in self.hidden_layer_sizes:
            ru = GRU(Mi, Mo, activation)
            self.hidden_layers.append(ru)
            Mi = Mo

        Wo = np.random.randn(Mi) / np.sqrt(Mi)
        bo = 0.0
        self.Wo = theano.shared(Wo)
        self.bo = theano.shared(bo)
        self.params = [self.Wo, self.bo]
        for ru in self.hidden_layers:
            self.params += ru.params

        lr = T.scalar('lr')
        thX = T.matrix('X')
        thY = T.scalar('Y')
        Yhat = self.forward(thX)[-1]

        # let's return py_x too so we can draw a sample instead
        self.predict_op = theano.function(
            inputs=[thX],
            outputs=Yhat,
            allow_input_downcast=True,
        )

        cost = T.mean((thY - Yhat) * (thY - Yhat))
        grads = T.grad(cost, self.params)
        dparams = [theano.shared(p.get_value() * 0) for p in self.params]

        updates = [
                      (p, p + mu * dp - lr * g) for p, dp, g in zip(self.params, dparams, grads)
                  ] + [
                      (dp, mu * dp - lr * g) for dp, g in zip(dparams, grads)
                  ]

        self.train_op = theano.function(
            inputs=[lr, thX, thY],
            outputs=cost,
            updates=updates
        )

        costs = []
        for i in range(epochs):
            t0 = datetime.now()
            X, Y = shuffle(X, Y)
            n_correct = 0
            n_total = 0
            cost = 0
            for j in range(N):
                c = self.train_op(learning_rate, X[j], Y[j])
                cost += c
            if i % 10 == 0:
                print(                "i:", i, "cost:", cost, "time for epoch:", (datetime.now() - t0))
            if (i + 1) % 500 == 0:
                learning_rate /= 10
            costs.append(cost)

        if show_fig:
            plt.plot(costs)
            plt.show()
Esempio n. 25
0
model_dim = 64
batch_size = 128
epochs = 10

print("Data downloading and pre-processing ... ")
(x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=max_len,
                                                      num_words=vocab_size)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print('Model building ... ')
inputs = Input(shape=(max_len, ), name="inputs")
embeddings = Embedding(vocab_size, model_dim, scale=False)(inputs)
outputs = BiDirectional(GRU(model_dim, return_outputs=True))(embeddings)
x = GlobalAveragePooling1D()(outputs)
x = Dropout(0.2)(x)
x = Dense(10, activation='relu')(x)
outputs = Dense(2, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=Adam(beta_1=0.9, beta_2=0.98, epsilon=1e-9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("Model Training ... ")
es = EarlyStopping(patience=5)
model.fit(x_train,
          y_train,
          batch_size=batch_size,
Esempio n. 26
0
File: copy.py Progetto: kirk86/gru
import sys

# Uncomment to remove determinism
np.random.seed(0)

# Set to True to perform gradient checking
GRAD_CHECK = False

vec_size = 8
out_size = vec_size # Size of output bit vector at each time step
in_size = vec_size + 2 # Input vector size, bigger because of start+stop bits
hidden_size = 100 # Size of hidden layer of neurons
learning_rate = 1e-1

# An object that keeps the network state during training.
model = GRU(in_size, out_size, hidden_size)

# An object that keeps the optimizer state during training
optimizer = Adagrad(model.weights,learning_rate)

n = 0 # counts the number of sequences trained on

while True:

  # train on sequences of length from 1 to 4
  seq_length = np.random.randint(1,5)
  i, t = sequences.copy_sequence(seq_length, vec_size) 
  inputs = np.matrix(i)
  targets = np.matrix(t)

  # forward seq_length characters through the net and fetch gradient
Esempio n. 27
0
def train(config):
    #np.random.seed(24)
    #torch.manual_seed(24)

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Load dataset
    if config.dataset == 'randomcomb':
        print('Load random combinations dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.RandomCombinationsDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

    elif config.dataset == 'bss':
        print('Load bss dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = 2
        config.input_dim = 3
        dataset = datasets.BaumSweetSequenceDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = 4 * config.input_length

    elif config.dataset == 'bipalindrome':
        print('Load binary palindrome dataset ...')
        # Initialize the dataset and data loader
        config.num_classes = config.input_length
        dataset = datasets.BinaryPalindromeDataset(config.input_length)
        data_loader = DataLoader(dataset,
                                 config.batch_size,
                                 num_workers=1,
                                 drop_last=True)

        config.input_length = config.input_length * 4 + 2 - 1

    # Setup the model that we are going to use
    if config.model_type == 'LSTM':
        print("Initializing LSTM model ...")
        model = LSTM(config.input_length, config.input_dim, config.num_hidden,
                     config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'biLSTM':
        print("Initializing bidirectional LSTM model...")
        model = biLSTM(config.input_length, config.input_dim,
                       config.num_hidden, config.num_classes,
                       config.batch_size, device).to(device)

    elif config.model_type == 'GRU':
        print("Initializing GRU model ...")
        model = GRU(config.input_length, config.input_dim, config.num_hidden,
                    config.num_classes, config.batch_size, device).to(device)

    elif config.model_type == 'peepLSTM':
        print("Initializing peephole LSTM model ...")
        model = peepLSTM(config.input_length, config.input_dim,
                         config.num_hidden, config.num_classes,
                         config.batch_size, device).to(device)

    # Setup the loss and optimizer
    loss_function = torch.nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    accuracy_list = []
    loss_list = []
    old_loss = 1.0
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU

        batch_inputs = batch_inputs.to(device)  # [batch_size, seq_length,1]
        batch_targets = batch_targets.to(device)  # [batch_size]
        #print(batch_inputs[:,0,:].shape)
        #embedding = nn.Embedding(3, config.input_dim)
        #print(embedding(batch_inputs[:,0,:].long()).shape)
        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        # Compute the loss, gradients and update network parameters
        loss = loss_function(log_probs, batch_targets)
        loss.backward()

        #######################################################################
        # Check for yourself: what happens here and why?
        #######################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################################

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / log_probs.size(0)
        accuracy_list.append(accuracy)
        loss_list.append(loss.item())
        # print(predictions[0, ...], batch_targets[0, ...])

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % 60 == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                   Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        # Check if training is finished
        if step == config.train_steps or old_loss == loss.item():
            # If you receive a PyTorch data-loader error, check this bug report
            # https://github.com/pytorch/pytorch/pull/9655
            break
        else:
            old_loss = loss.item()
    print('Done training.')
    ###########################################################################
    ###########################################################################

    print('Evaluating...')
    acc = []
    for i in range(3):
        acc_sublist = []
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):
            model.eval()
            batch_inputs = batch_inputs.to(
                device)  # [batch_size, seq_length,1]
            batch_targets = batch_targets.to(device)
            pred = model(batch_inputs)
            predictions = torch.argmax(pred, dim=1)
            correct = (predictions == batch_targets).sum().item()
            accuracy = correct / pred.size(0)
            acc_sublist.append(accuracy)
            if step == 25:
                break
        acc.append(np.mean(acc_sublist))
    print('Mean accuracy is {} and standard deviation is {}'.format(
        np.mean(acc), np.std(acc)))
    return accuracy_list, loss_list
Esempio n. 28
0
vocabulary_size=2000
embedding_dim=48
hidden_dim=128
nepochs=20
model_output_file="model_output_file.mof"
input_data_file="./data/reddit-comments-2015.csv"
print_every=25000

if not model_output_file:
  ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
  MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, vocabulary_size, embedding_dim, hidden_dim)

# Load data
x_train, y_train, word2index, index2word = load_data(input_data_file, vocabulary_size)

model=GRU(vocabulary_size,hidden_dim=hidden_dim,bptt_truncate=-1)

t1=time.time()
model.sgd_step(x_train[10],y_train[10],learning_rate)
t2=time.time()
print "SGD Step time: %f (ms)" %((t2-t1)*1000)




def sgd_callback(model,num_examples_seen):
    loss=model.calculate_loss(x_train[:10000],y_train[:10000])
    print("train instance: ",num_examples_seen,"Loss:",loss)
    generate_sentences(model,10,index2word,word2index)

Esempio n. 29
0
import sys

# Uncomment to remove determinism
np.random.seed(0)

# Set to True to perform gradient checking
GRAD_CHECK = False

vec_size = 8
out_size = vec_size  # Size of output bit vector at each time step
in_size = vec_size + 2  # Input vector size, bigger because of start+stop bits
hidden_size = 100  # Size of hidden layer of neurons
learning_rate = 1e-1

# An object that keeps the network state during training.
model = GRU(in_size, out_size, hidden_size)

# An object that keeps the optimizer state during training
optimizer = Adagrad(model.weights, learning_rate)

n = 0  # counts the number of sequences trained on

while True:

    # train on sequences of length from 1 to 4
    seq_length = np.random.randint(1, 5)
    i, t = sequences.copy_sequence(seq_length, vec_size)
    inputs = np.matrix(i)
    targets = np.matrix(t)

    # forward seq_length characters through the net and fetch gradient
Esempio n. 30
0
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=gpu_options)
        with tf.Session(config=session_conf).as_default() as sess:
            initializer = tf.random_uniform_initializer(
                -1 * FLAGS.init_scale, 1 * FLAGS.init_scale)
            with tf.variable_scope("model",
                                   reuse=None,
                                   initializer=initializer):
                model = GRU(FLAGS.batch_size,
                            FLAGS.sequence_len,
                            embedding,
                            FLAGS.embedding_size,
                            FLAGS.attention_dim,
                            FLAGS.rnn_size,
                            FLAGS.num_rnn_layers,
                            num_classes,
                            FLAGS.max_grad_norm,
                            dropout=FLAGS.dropout,
                            is_training=True)

            with tf.variable_scope("model",
                                   reuse=True,
                                   initializer=initializer):
                valid_model = GRU(FLAGS.batch_size,
                                  FLAGS.sequence_len,
                                  embedding,
                                  FLAGS.embedding_size,
                                  FLAGS.attention_dim,
                                  FLAGS.rnn_size,
Esempio n. 31
0
                    help='whether to use combined policy and value nets')
args = parser.parse_args()

env = gym.make(args.env_name)

num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.0003)
else:
    policy_net = GRU(num_inputs, num_actions)
    old_policy_net = GRU(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.0003)
    opt_value = optim.Adam(value_net.parameters(), lr=0.0003)


def create_batch_inputs(batch_states_list, batch_actions_list,
                        batch_advantages_list, batch_targets_list):
    lengths = []
    for states in batch_states_list:
        lengths.append(states.size(0))

    max_length = max(lengths)
    batch_states = torch.zeros(len(batch_states_list), max_length, num_inputs)
    batch_actions = torch.zeros(len(batch_actions_list), max_length,
Esempio n. 32
0
    def build(self):
        log.info('building rnn cell....')
        if self.cell == 'gru':
            recurent_x = GRU(self.rng, self.n_input, self.n_hidden, self.x,
                             self.E, self.xmask, self.is_train, self.dropout)

            recurent_y = GRU(self.rng, self.n_input, self.n_hidden, self.y,
                             self.E, self.ymask, self.is_train, self.dropout)
        elif self.cell == 'lstm':
            recurent_x = LSTM(self.rng, self.n_input, self.n_hidden, self.x,
                              self.E, self.xmask, self.is_train, self.dropout)

            recurent_y = LSTM(self.rng, self.n_input, self.n_hidden, self.y,
                              self.E, self.ymask, self.is_train, self.dropout)
        log.info('build the sim matrix....')
        sim_layer = Similarity(recurent_x.activation,
                               recurent_y.activation,
                               metrics=self.sim)

        log.info('building convolution pooling layer....')
        conv_pool_layer = ConvPool(
            input=sim_layer.activation,
            filter_shape=(2, 1, 3, 3),  # feature_maps, 1, filter_h, filter_w
            input_shape=(self.batch_size, 1, 50,
                         50))  #sim_layer.activation.shape)
        projected_layer = basicLayer(conv_pool_layer.activation,
                                     input_shape=1152)
        rav_cost = T.nnet.binary_crossentropy(projected_layer.activation,
                                              self.label)
        cost = T.mean(rav_cost)
        acc = T.eq(projected_layer.activation > 0.5, self.label)
        log.info('cost calculated.....')

        self.params = [
            self.E,
        ]
        self.params += recurent_x.params
        self.params += recurent_y.params
        self.params += conv_pool_layer.params
        self.params += projected_layer.params

        lr = T.scalar('lr')
        gparams = [T.clip(T.grad(cost, p), -3, 3) for p in self.params]
        #gparams = [T.grad(cost, p) for p in self.params]

        if self.optimizer == 'sgd':
            updates = sgd(self.params, gparams, lr)
        elif self.optimizer == 'adam':
            updates = adam(self.params, gparams, lr)
        elif self.optimizer == 'rmsprop':
            updates = rmsprop(self.params, gparams, lr)

        log.info('gradient calculated.....')

        self.train = theano.function(
            inputs=[self.x, self.xmask, self.y, self.ymask, self.label, lr],
            outputs=[cost, acc],
            updates=updates,
            givens={self.is_train: np.cast['int32'](1)})

        self.predict = theano.function(
            inputs=[self.x, self.xmask, self.y, self.ymask, self.label],
            outputs=[rav_cost, acc],
            givens={self.is_train: np.cast['int32'](0)})

        self.test = theano.function(
            inputs=[self.x, self.xmask, self.y, self.ymask],
            outputs=projected_layer.activation,
            givens={self.is_train: np.cast['int32'](0)})
def get_gru(options):
    model = GRU(options)
    return model