예제 #1
0
    def __init__(self, vocab_size, tag2Idx, embedding_dim, hidden_dim):
        super(BiLSTM_CRF, self).__init__()
        with self.name_scope():
            self.embedding_dim = embedding_dim
            self.hidden_dim = hidden_dim
            self.vocab_size = vocab_size
            self.tag2idx = tag2Idx
            self.tagset_size = len(tag2Idx)
            self.word_embeds = nn.Embedding(vocab_size, embedding_dim)
            self.lstm = rnn.LSTM(hidden_dim // 2,
                                 num_layers=1,
                                 bidirectional=True)

            # Maps the output of the LSTM into tag space.
            self.hidden2tag = nn.Dense(self.tagset_size)

            # Matrix of transition parameters.  Entry i,j is the score of
            # transitioning *to* i *from* j.
            self.transitions = self.params.get("crf_transition_matrix",
                                               shape=(self.tagset_size,
                                                      self.tagset_size))
            self.hidden = self.init_hidden()
예제 #2
0
 def __init__(self, vocab, embed_size, num_hiddens, num_layers,
              dense_layers, **kwargs):
     super(mix_net, self).__init__(**kwargs)
     self.encoder = rnn.LSTM(num_hiddens,
                             num_layers=num_layers,
                             bidirectional=True,
                             input_size=embed_size)
     self.decoder = nn.Dense(1, activation='relu')
     #self.bn = nn.BatchNorm(axis = 0, use_global_stats = True)
     self.bn = nn.LayerNorm(axis=0)
     self.dnn = nn.HybridSequential()
     for i in nd.arange(int(dense_layers * 1 / 5)):
         self.dnn.add(nn.Dense(int(22 * 2), activation='relu'))
     for i in nd.arange(int(dense_layers * 1 / 5)):
         self.dnn.add(nn.Dense(int(22 * 4), activation='relu'))
     for i in nd.arange(int(dense_layers * 1 / 5)):
         self.dnn.add(nn.Dense(int(22 * 8), activation='relu'))
     for i in nd.arange(int(dense_layers * 1 / 5)):
         self.dnn.add(nn.Dense(int(22 * 2), activation='relu'))
     for i in nd.arange(int(dense_layers * 1 / 5)):
         self.dnn.add(nn.Dense(int(22 * 1), activation='relu'))
     self.out_dense = nn.Dense(5)
예제 #3
0
    def __init__(self, rnn_type, hidden_size, emb_size, output_size, dropout, target_len, teaching_force, force_prob, ctx):
        """TODO: to be defined.

        :hidden_size: TODO
        :emb_size: TODO
        :dropout: TODO
        :target_len: TODO

        """
        nn.Block.__init__(self)

        self.hidden_size = hidden_size
        self.emb_size = emb_size
        self.dropout = dropout
        self.target_len = target_len
        self.teaching_force = teaching_force
        self.force_prob = force_prob
        self.ctx = ctx

        rnn_type = rnn_type.upper()

        self.rnn_type = rnn_type.upper()

        if rnn_type == 'LSTM':
            self.rnn = rnn.LSTM(hidden_size, layout='NTC', dropout=dropout)
        elif rnn_type == 'GRU':
            self.rnn = rnn.GRU(hidden_size, layout='NTC', dropout=dropout)
        elif rnn_type == 'DLSTM':
            self.rnn = DLSTMCell(hidden_size)
        else:
            raise ValueError('Unspport rnn type %s'%rnn_type)

        # self.attention = LuongAttention(64)

        self.input_linear = nn.Dense(emb_size)
        self.output_layer = nn.Dense(output_size)

        self.attention = BahdanauAttention(64)
        self.head_attention = BahdanauAttention(64)
    def __init__(self,rnn_type, hidden_size, output_size, num_layers, dropout, bidirectional=True):
        """TODO: to be defined.

        :hidden_size: TODO
        :num_layers: TODO
        :dropout: TODO
        :bidirectional: TODO

        """
        nn.Block.__init__(self)
        
        self._rnn_type = rnn_type.upper()
        self._hidden_size = hidden_size
        self._output_size = output_size
        self._num_layers = num_layers
        self._dropout = dropout
        self._bidirectional = bidirectional
       
        if self._rnn_type == 'LSTM':
            self.rnn = rnn.LSTM(self._hidden_size, self._num_layers, 'NTC', self._dropout, self._bidirectional)
        elif self._rnn_type == 'GRU':
            self.rnn = rnn.GRU(self._hidden_size, self.num_layers, 'NTC', self._dropout, self._bidirectional)
예제 #5
0
 def __init__(self, embedding_dim, model_dim, dropout, head_count, vocab_size, extended_size,gpu):
     super(Decoder,self).__init__()
     self.ctx = gpu
     self.model_dim = model_dim
     self.dropout = dropout
     self.head_count = head_count
     self.vocab_size = vocab_size
     self.extended.size = extended_size
     self.decoder_ltsm = rnn.LSTM(
         2*self.model_dim, layout='NTC', 
         input_size= self.embedding_dim, 
         i2h_weight_initializer= 'Orthogonal',
     h2h_weight_initializer = 'Orthogonal')
     self.self_attn = MultiHeadAttentionCell(base_cell=base_cell, 
                                            query_units= 2*self.model_dim, use_bias=True,
                                       key_units = 2*self.model_dim, value_units= 2*self.model_dim, num_heads=self.head_count, weight_initializer= 'Xavier')
     self.fnn = Resblock(2*self.model_dim, self.dropout)
     self.V1 = nn.Dense(2*self.model_dim, in_units= 3*self.model_dim)
     self.V2 = nn.Dense(self.vocab_size, in_units= 2*self.model_dim)
     self.W_c = nn.Dense(1)
     self.W_s = nn.Dense(1)
     self.W_x = nn.Dense(1)
예제 #6
0
def net_define_eu():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Embedding(config.MAX_WORDS, config.EMBEDDING_DIM))
        '''
        net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.25))
        net.add(transpose(axes=(0,2,1)))
        net.add(nn.MaxPool1D(pool_size=5))
        net.add(transpose(axes=(0,2,1)))
        net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.25))
        net.add(transpose(axes=(0,2,1)))
        net.add(nn.GlobalMaxPool1D())
        net.add(extendDim(axes=3))
        net.add(PrimeConvCap(16,32, kernel_size=(1,1), padding=(0,0)))
        net.add(CapFullyNGBlock(16, num_cap=32, input_units=32, units=16, route_num=3))
        net.add(nn.Dropout(0.25))
        net.add(nn.Dense(6, activation='sigmoid'))
        '''
        # net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.2))
        net.add(
            rnn.LSTM(128,
                     layout='NTC',
                     bidirectional=True,
                     num_layers=1,
                     dropout=0.2))
        net.add(transpose(axes=(0, 2, 1)))
        net.add(nn.GlobalMaxPool1D())
        net.add(extendDim(axes=3))
        net.add(PrimeConvCap(16, 32, kernel_size=(1, 1), padding=(0, 0)))
        net.add(
            CapFullyNGBlock(16,
                            num_cap=32,
                            input_units=32,
                            units=16,
                            route_num=3))
        net.add(nn.Dropout(0.2))
        net.add(nn.Dense(6, activation='sigmoid'))
    net.initialize(init=init.Xavier())
    return net
예제 #7
0
    def __init__(self, dropout=0.0, **kwargs):
        super(RNNClsModel, self).__init__(**kwargs)
        with self.name_scope():
            self.drop = nn.Dropout(dropout)
            # self.emb = nn.Embedding(vocab_size, num_embed,
            #                         weight_initializer=mx.init.Uniform(0.1))
            if opt.mode == 'rnn_relu':
                self.rnn = rnn.RNN(opt.num_hidden,
                                   activation='relu',
                                   num_layers=opt.num_layers,
                                   layout='NTC',
                                   dropout=dropout,
                                   input_size=opt.num_inputs)
            elif opt.mode == 'rnn_tanh':
                self.rnn = rnn.RNN(opt.num_hidden,
                                   num_layers=opt.num_layers,
                                   layout='NTC',
                                   dropout=dropout,
                                   input_size=opt.num_inputs)
            elif opt.mode == 'lstm':
                self.rnn = rnn.LSTM(opt.num_hidden,
                                    num_layers=opt.num_layers,
                                    layout='NTC',
                                    dropout=dropout,
                                    input_size=opt.num_inputs)
            elif opt.mode == 'gru':
                self.rnn = rnn.GRU(opt.num_hidden,
                                   num_layers=opt.num_layers,
                                   layout='NTC',
                                   dropout=dropout,
                                   input_size=opt.num_inputs)
            else:
                raise ValueError("Invalid mode %s. Options are rnn_relu, "
                                 "rnn_tanh, lstm, and gru" % opt.mode)

            self.fc = nn.Dense(opt.num_actions,
                               in_units=opt.num_hidden * opt.seq_len)
            self.num_hidden = opt.num_hidden
            self.seq_len = opt.seq_len
 def __init__(self,
              vocab_size=VOCAB_SIZE,
              embedding_size=32,
              rnn_size=128,
              num_layers=2,
              drop_rate=0.0,
              **kwargs):
     super(Model, self).__init__(**kwargs)
     self.args = {
         "vocab_size": vocab_size,
         "embedding_size": embedding_size,
         "rnn_size": rnn_size,
         "num_layers": num_layers,
         "drop_rate": drop_rate
     }
     with self.name_scope():
         self.encoder = nn.Embedding(vocab_size, embedding_size)
         self.dropout = nn.Dropout(drop_rate)
         self.rnn = rnn.LSTM(rnn_size,
                             num_layers,
                             dropout=drop_rate,
                             input_size=embedding_size)
         self.decoder = nn.Dense(vocab_size, in_units=rnn_size)
    def __init__(self, vocab, embed_size, num_hiddens, num_layers, **kwargs):
        """
            1、在此模型中,每个词先通过嵌入层得到特征向量;
            2、使用双向循环神经网络对特征序列进一步编码,从而得到序列信息;
            3、将编码后的序列信息通过全连接层变换成输出

            将双向长短期记忆在最初时间步和最终时间步的隐藏状态连结,作为特征序列的编码信息
            传递给输出层分类
        :param vocab:
        :param embed_size:
        :param num_hiddens:
        :param num_layers:
        :param kwargs:
        """
        super(BiRNN, self).__init__(**kwargs)
        self.embedding = nn.Embedding(len(vocab), embed_size)

        # bidirectional 设置为 True 即得到双向循环神经网络
        self.encoder = rnn.LSTM(num_hiddens,
                                num_layers=num_layers,
                                bidirectional=True,
                                input_size=embed_size)
        self.decoder = nn.Dense(2)
예제 #10
0
    def __init__(self,
                 n_hidden,
                 in_seq_len,
                 out_seq_len,
                 vocab_size,
                 enc_layer,
                 dec_layer=1,
                 **kwargs):
        super(calculator, self).__init__(**kwargs)
        self.in_seq_len = in_seq_len
        self.out_seq_len = out_seq_len
        self.n_hidden = n_hidden
        self.vocab_size = vocab_size
        self.enc_layer = enc_layer

        with self.name_scope():
            self.encoder = rnn.LSTM(hidden_size=n_hidden,
                                    num_layers=enc_layer,
                                    layout='NTC')
            self.decoder_0 = rnn.LSTMCell(hidden_size=n_hidden)
            self.decoder_1 = rnn.LSTMCell(hidden_size=n_hidden)
            self.batchnorm = nn.BatchNorm(axis=2)
            self.dense = nn.Dense(self.vocab_size, flatten=False)
예제 #11
0
    def __init__(self, n_inputs, n_hidden, n_layers=1, dropout=0.5):

        super(BILSTM, self).__init__()

        with self.name_scope():

            self.r = rnn.LSTM(n_hidden,
                              n_layers,
                              dropout=dropout,
                              input_size=n_inputs,
                              bidirectional=True)

            self.c_init = nn.Dense(2 * n_hidden,
                                   flatten=False,
                                   activation='tanh')

            self.h_init = nn.Dense(2 * n_hidden,
                                   flatten=False,
                                   activation='tanh')

            self.fc = nn.Dense(1, flatten=False)

            self.d = nn.Dropout(.5)
예제 #12
0
 def __init__(self, mode, vocab_size, embed_dim, hidden_dim,
             num_layers, dropout=0.5, **kwargs):
     super(RNNModel, self).__init__(**kwargs)
     with self.name_scope():
         self.drop = nn.Dropout(dropout)
         self.encoder = nn.Embedding(vocab_size, embed_dim,
                 weight_initializer=mx.init.Uniform(0.1))
         if mode == 'rnn_relu':
             self.rnn = rnn.RNN(hidden_dim, num_layers, activation='relu',
             dropout=dropout, input_size=embed_dim)
         elif mode == 'rnn_tanh':
             self.rnn = rnn.RNN(hidden_dim, num_layers, activation='tanh',
             dropout=dropout, input_size=embed_dim)
         elif mode == 'lstm':
             self.rnn = rnn.LSTM(hidden_dim, num_layers, dropout=dropout,
             input_size=embed_dim)
         elif mode == 'gru':
             self.rnn = rnn.GRU(hidden_dim, num_layers, dropout=dropout,
             input_size=embed_dim)
         else:
             raise ValueError("Invalid mode %s. Options are rnn_relu, "
             "rnn_tanh, lstm, and gru"%mode)
         self.decoder = nn.Dense(vocab_size, in_units=hidden_dim)
         self.hidden_dim = hidden_dim
예제 #13
0
 def __init__(self, prefix=None, params=None):
     super().__init__(prefix, params)
     with self.name_scope():
         self.lstm = rnn.LSTM(64,
                              num_layers=1,
                              bidirectional=True,
                              dropout=0.2,
                              layout='NTC')
         self.lstm_out = nn.MaxPool2D(pool_size=(FIXED_WORD_LENGTH, 1))
         #             self.att = nn.Sequential()
         #             self.att.add(nn.Dense(1, flatten=False,
         #                                   activation="tanh"))
         self.conv1 = MyConv2D(INFOBOX_LENGTH,
                               kernel_size=(INFOBOX_VALUE_LENGTH,
                                            DIMENSION),
                               strides=(1, 1),
                               dilation=(1, 1),
                               use_bias=False,
                               in_channels=1,
                               activation='relu')
         self.conv2 = MyConv2D(INFOBOX_LENGTH,
                               kernel_size=(INFOBOX_VALUE_LENGTH,
                                            DIMENSION),
                               strides=(1, 1),
                               dilation=(1, 1),
                               use_bias=False,
                               in_channels=1,
                               activation='relu')
         #             self.pool = nn.MaxPool2D(pool_size=(10,1), strides=(1, 1))
         self.dense1 = nn.Dense(384, activation="sigmoid")
         self.dense2 = nn.Dense(384, activation="sigmoid")
         self.output = nn.Sequential()
         self.output.add(nn.Flatten())
         self.output.add(nn.Activation(activation='relu'))
         self.output.add(nn.Dropout(0.5))
         self.output.add(nn.Dense(7))
예제 #14
0
 def __init__(self, hidden_size, num_layers, num_classes):
     super(RNN, self).__init__()
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.lstm = rnn.LSTM(hidden_size, num_layers)
     self.dense = nn.Dense(num_classes)
예제 #15
0
    return inputs, targets


if __name__ == '__main__':
    nb_epochs = 200
    sequence = np.linspace(10, 300, num=30, dtype='float32')
    n_steps = 3
    n_features = 1
    learning_rate = 0.01
    xnp, ynp = split_sequence(sequence, n_steps)

    x = mx.ndarray.from_numpy(xnp, zero_copy=False)
    y = mx.ndarray.from_numpy(ynp, zero_copy=False)

    network = nn.Sequential()
    network.add(rnn.LSTM(50, 1))
    network.add(nn.Dense(1))
    network.initialize(mx.init.Xavier())

    criterion = mx.gluon.loss.L2Loss()
    trainer = mx.gluon.Trainer(network.collect_params(),
                               optimizer='adam',
                               optimizer_params={'learning_rate': 0.03})

    for epoch in range(nb_epochs):
        with mx.autograd.record():
            out = network(x)
            loss = criterion(out, y)

        loss.backward()
        trainer.step(batch_size=x.shape[1])
예제 #16
0
            y = nd.dot(h, self.w_hq) + self.b_q
            outputs.append(y)

        y_hat = nd.concat(*outputs, dim=0)
        return y_hat, (h, c)


if __name__ == '__main__':
    _corpus_indices, _idx_to_char, _char_to_idx, _vocab_size = \
        load_jaychou_lyrics("../data/jaychou_lyrics.txt.zip")
    _num_hidden = 256
    _num_steps = 35
    _batch_size = 32
    _lr = 1e2
    use_gluon = False
    if use_gluon:
        _rnn_layer = rnn.LSTM(_num_hidden)
        model = BaseRNNGluon(_vocab_size, _idx_to_char, _char_to_idx,
                             _num_hidden, _rnn_layer)
    else:
        model = LSTMScratch(_vocab_size, _idx_to_char, _char_to_idx,
                            _num_hidden)

    model.fit(_corpus_indices,
              _num_steps, {
                  "lr": _lr,
                  "batch_size": _batch_size
              },
              epochs=250)
예제 #17
0
 def __init__(self, num_embed, num_hidden, num_layers, bidirectional=False, sequence_length=sequence_length, **kwargs):
     super(RNNModel, self).__init__(**kwargs)
     self.num_hidden = num_hidden
     with self.name_scope():
         self.rnn = rnn.LSTM(num_hidden, num_layers, input_size=num_embed, bidirectional=bidirectional, layout='TNC')
         self.decoder = nn.Dense(1, in_units=num_hidden)
예제 #18
0
import d2lzh as d2l
import load_data as ld
import math
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn, rnn
import time

#加载数据
(corpus_indices, char_to_idx, idx_to_char,
 vocab_size) = ld.load_data_jay_lyrics()

num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
ctx = d2l.try_gpu()

#定义模型
lstm_layer = rnn.LSTM(num_hiddens)
model = d2l.RNNModel(lstm_layer, vocab_size)

#训练模型并创作歌词
num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开']
d2l.train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes)
예제 #19
0
파일: model.py 프로젝트: zhould1990/d2l-en
 def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
              dropout=0, **kwargs):
     super(Seq2SeqEncoder, self).__init__(**kwargs)
     self.embedding = nn.Embedding(vocab_size, embed_size)
     self.rnn = rnn.LSTM(num_hiddens, num_layers, dropout=dropout)
예제 #20
0
print("Y_train: " + str(Y_train))
print("Y_test: " + str(Y_test))

## define network
num_classes = 1
num_hidden = 25
learning_rate = .01
epochs = 10
batch_size = 100

model = nn.Sequential()
with model.name_scope():
    model.embed = nn.Embedding(voca_size, num_embed)
    model.add(
        rnn.LSTM(num_hidden, layout='NTC', dropout=0.5, bidirectional=False))
    model.add(nn.Dense(num_classes))


def eval_accuracy(x, y, batch_size):
    accuracy = mx.metric.Accuracy()

    for i in range(x.shape[0] // batch_size):
        data = x[i * batch_size:(i * batch_size + batch_size), ]
        target = y[i * batch_size:(i * batch_size + batch_size), ]

        output = model(data)
        predictions = nd.array([(1 if out >= 0.5 else 0) for out in output],
                               context)

        # predictions = nd.argmax(output, axis=1)
예제 #21
0
    def __init__(self,
                 hidden_size: int,
                 embeddings: StackedEmbeddings,
                 tag_dictionary: Dictionary,
                 tag_type: str,
                 use_crf: bool = True,
                 use_rnn: bool = True,
                 rnn_layers: int = 1):

        super(SequenceTagger, self).__init__()
        self.embeddings = embeddings
        with self.name_scope():
            self.use_rnn = use_rnn
            self.hidden_size = hidden_size
            self.use_crf = use_crf
            self.use_viterbi = False
            self.rnn_layers = rnn_layers

            self.trained_epochs = 0

            # set the dictionaries
            self.tag_dictionary = tag_dictionary
            self.tag_type = tag_type
            self.tagset_size = len(tag_dictionary)

            # initialize the network architecture
            self.nlayers = rnn_layers
            self.hidden_word = None

            self.dropout = nn.Dropout(0.5, axes=[0])

            # self.dropout: nn.Block = LockedDropout(0.5)

            rnn_input_dim = self.embeddings.embedding_length

            self.relearn_embeddings = True

            if self.relearn_embeddings:
                self.embedding2nn = nn.Dense(in_units=rnn_input_dim, units=rnn_input_dim, flatten=False)

            # bidirectional LSTM on top of embedding layer
            self.rnn_type = 'LSTM'
            # if self.rnn_type in ['LSTM', 'GRU']:
            #
            #     if self.nlayers == 1:
            #         self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size,
            #                                               num_layers=self.nlayers,
            #                                               bidirectional=True)
            #     else:
            #         self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size,
            #                                               num_layers=self.nlayers,
            #                                               dropout=0.5,
            #                                               bidirectional=True)
            self.rnn = rnn.LSTM(input_size=rnn_input_dim, hidden_size=hidden_size, num_layers=self.nlayers,
                                bidirectional=True)

            # self.nonlinearity = nn.Tanh()

            # final linear map to tag space
            if self.use_rnn:
                self.linear = nn.Dense(in_units=hidden_size * 2, units=len(tag_dictionary), flatten=False)
            else:
                self.linear = nn.Dense(in_units=self.embeddings.embedding_length, units=len(tag_dictionary),
                                       flatten=False)

            if self.use_crf:
                transitions = nd.random.normal(0, 1, (self.tagset_size, self.tagset_size))
                transitions[self.tag_dictionary.get_idx_for_item(START_TAG), :] = -10000
                transitions[:, self.tag_dictionary.get_idx_for_item(STOP_TAG)] = -10000
                self.transitions = self.params.get('transitions', shape=(self.tagset_size, self.tagset_size),
                                                   init=mx.init.Constant(transitions))
            else:
                # this transition matrix will be updated through statistic, not GD
                transitions = nd.zeros((self.tagset_size, self.tagset_size))
                self.transitions = transitions
예제 #22
0
 def __init__(self, bert, prefix=None, params=None, \
                 n_rnn_layers=0, rnn_hidden_size=600, num_rnn_layers=1, n_dense_layers=0, units_dense=600, \
                 add_query=False, \
                 apply_coattention=False, bert_out_dim=768,\
                 apply_self_attention=False, self_attention_dimension=None, n_attention_heads=4,
                 apply_transformer=False,
                 qanet_style_out=False,
                 bidaf_style_out=False,
                 remove_special_token=False):
     super(BertForQA, self).__init__(prefix=prefix, params=params)
     self.add_query=add_query
     self.apply_coattention = apply_coattention
     self.apply_self_attention = apply_self_attention
     self.apply_transformer = apply_transformer
     self.qanet_style_out = qanet_style_out
     self.bidaf_style_out = bidaf_style_out
     self.remove_special_token = remove_special_token
     self.bert = bert
     if self.apply_coattention:
         with self.name_scope():
             #self.co_attention_ = CoAttention("co-attention_", bert_out_dim) # try multiple layers
             self.co_attention = CoAttention("co-attention", bert_out_dim)
             if self.qanet_style_out:
                 self.project = gluon.nn.Dense(
                     units=bert_out_dim,
                     flatten=False,
                     use_bias=False,
                     weight_initializer=Xavier(),
                     prefix='projection_'
                 )
                 self.dropout = gluon.nn.Dropout(0.1)
                 self.model_encoder = TransformerEncoder(units=bert_out_dim)
                 self.predict_begin = gluon.nn.Dense(
                     units=1,
                     use_bias=True,
                     flatten=False,
                     weight_initializer=Xavier(
                         rnd_type='uniform', factor_type='in', magnitude=1),
                     bias_initializer=Uniform(1.0/bert_out_dim),
                     prefix='predict_start_'
                 )
                 self.predict_end = gluon.nn.Dense(
                     units=1,
                     use_bias=True,
                     flatten=False,
                     weight_initializer=Xavier(
                         rnd_type='uniform', factor_type='in', magnitude=1),
                     bias_initializer=Uniform(1.0/bert_out_dim),
                     prefix='predict_end_'
                 )
                 self.flatten = gluon.nn.Flatten()
             elif self.bidaf_style_out:
                 # BiDAF mode
                 self.modeling_layer = rnn.LSTM( hidden_size=int(bert_out_dim / 2), 
                                                 num_layers=2, 
                                                 dropout=0.0, 
                                                 bidirectional=True,
                                                 input_size=int(bert_out_dim * 4))
                 self.output_layer = BiDAFOutputLayer(span_start_input_dim=int(bert_out_dim / 2),
                                                     nlayers=1,
                                                     dropout=0.2)
             # '''
             # for the cls's encoding
             # used in version 2.0
             self.cls_mapping = nn.Dense(
                 units=2,
                 flatten=False,
                 weight_initializer=Xavier(),
                 prefix='cls_mapping_'
             )
             # '''
     if self.apply_self_attention:
         if self_attention_dimension is None:
             self_attention_dimension = bert_out_dim
         with self.name_scope():
             self.multi_head_attention = MultiHeadAttentionCell(DotProductAttentionCell(), \
                     self_attention_dimension, self_attention_dimension, self_attention_dimension, n_attention_heads)
     if self.apply_transformer:
         with self.name_scope():
             self.transformer = TransformerEncoder(units=bert_out_dim)
     if self.apply_coattention and (self.qanet_style_out or self.bidaf_style_out):
         self.span_classifier = None
     else:
         self.span_classifier = nn.HybridSequential()
         with self.span_classifier.name_scope():
             for i in range(n_rnn_layers):
                 self.span_classifier.add(rnn.LSTM( hidden_size=rnn_hidden_size, 
                                                     num_layers=num_rnn_layers, 
                                                     dropout=0.0, 
                                                     bidirectional=True))
             for i in range(n_dense_layers):
                 self.span_classifier.add(nn.Dense(units=units_dense, flatten=False, activation='relu'))
             self.span_classifier.add(nn.Dense(units=2, flatten=False))
예제 #23
0
    if not max(args.filter_list) <= args.q:
        raise AssertionError("no filter can be larger than q")
    if not args.q >= math.ceil(args.seasonal_period / args.time_interval):
        raise AssertionError("size of skip connections cannot exceed q")

    # Build data iterators
    train_iter, val_iter, test_iter = build_iters(args.data_dir,
                                                  args.max_records, args.q,
                                                  args.horizon, args.splits,
                                                  args.batch_size)
    input_feature_shape = train_iter.provide_data[0][1]

    # Choose cells for recurrent layers: each cell will take the output of the previous cell in the list
    rcells = [rnn.GRU(hidden_size=args.recurrent_state_size, layout='NTC')]
    skiprcells = [
        rnn.LSTM(hidden_size=args.recurrent_state_size, layout='NTC')
    ]

    # Define net
    p = int(args.seasonal_period / args.time_interval)
    net = TCN(input_feature_shape, args.q, args.filter_list, args.num_filters,
              args.dropout, rcells, skiprcells, p)
    ctx = mx.cpu() if args.gpus is None or args.gpus is '' else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    net.initialize(mx.initializer.Uniform(0.1), ctx=ctx)
    loss = gluon.loss.HuberLoss(rho=0.1)
    #print("Loss weight: %e"%float(1.0/label_scale))
    #loss = gluon.loss.L2Loss(weight=float(1.0/label_scale)) # won't help!
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.005})
예제 #24
0
    def __init__(self,
                 dictionary: Dictionary,
                 is_forward_lm: bool,
                 hidden_size: int,
                 nlayers: int,
                 embedding_size: int = 100,
                 nout=None,
                 dropout=0.5,
                 init_params: Dict = None):

        super(ContextualStringModel, self).__init__()

        self.dictionary = dictionary
        self.is_forward_lm = is_forward_lm

        self.dropout = dropout
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.nlayers = nlayers

        with self.name_scope():
            self.drop = nn.Dropout(dropout)
            self.encoder = nn.Embedding(
                len(dictionary),
                embedding_size,
                weight_initializer=mx.initializer.Constant(
                    init_params['encoder.weight'])
                if init_params else mx.initializer.Uniform(0.1))

            if nlayers == 1:
                if init_params:
                    self.rnn = rnn.LSTM(
                        hidden_size,
                        nlayers,
                        dropout=dropout,
                        input_size=embedding_size,
                        i2h_weight_initializer=mx.initializer.Constant(
                            init_params['rnn.weight_ih_l0']),
                        h2h_weight_initializer=mx.initializer.Constant(
                            init_params['rnn.weight_hh_l0']),
                        i2h_bias_initializer=mx.initializer.Constant(
                            init_params['rnn.bias_ih_l0']),
                        h2h_bias_initializer=mx.initializer.Constant(
                            init_params['rnn.bias_hh_l0']))
                else:
                    self.rnn = rnn.LSTM(hidden_size,
                                        nlayers,
                                        input_size=embedding_size)
            else:
                self.rnn = rnn.LSTM(hidden_size,
                                    nlayers,
                                    dropout=dropout,
                                    input_size=embedding_size)

            self.hidden = None

            self.nout = nout
            if nout is not None:
                self.proj = nn.Dense(nout,
                                     weight_initializer='Xavier',
                                     in_units=hidden_size)
                self.decoder = nn.Dense(
                    len(dictionary),
                    weight_initializer=mx.initializer.Uniform(0.1),
                    bias_initializer='zero',
                    in_units=nout)
            else:
                self.proj = None
                self.decoder = nn.Dense(
                    len(dictionary),
                    weight_initializer=mx.initializer.Constant(
                        init_params['decoder.weight'])
                    if init_params else mx.initializer.Uniform(0.1),
                    bias_initializer=mx.initializer.Constant(
                        init_params['decoder.bias'])
                    if init_params else 'zero',
                    in_units=hidden_size)
예제 #25
0
train_iter = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
                                   batch_size=batch_size,
                                   shuffle=False)
print("train_data shape: ", X.shape, y.shape)

X = _data[m:]
y = _label[m:]
eval_iter = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
                                  batch_size=1,
                                  shuffle=False)
print("eval_data shape: ", X.shape, y.shape)

net = gluon.nn.Sequential()
#net = gluon.nn.HybridSequential() # doesn't work since LSTM is not hybrid
with net.name_scope():
    net.add(rnn.LSTM(num_hidden))  # note, check also: LSTMCell
    #net.add(nn.Dense(3))  # do not add this, worse accuracy
    net.add(nn.Dense(
        1))  # output dimension is 1, "in_units" is skipped and infered

net.collect_params().initialize(mx.init.Normal(sigma=0.1))
#softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
square_loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.005,
    'momentum': 0.9
})
#metric = mx.metric.MSE()

epochs = 20
loss_sequence = []
예제 #26
0
    """"Network"""
    def __init__(self):
        super(Network, self).__init__()
        # self.body = nn.

    def hybrid_forward(self, F, x, *args, **kwargs):
        """Forward"""


def run(opts):
    """Run"""
    ctx = set_context(args)
    data = nd.zeros((args.batch_size, 60, 20, 240, 240), ctx=ctx)
    train_list, val_list = get_subject_list(opts)
    j = 0
    jj = 0
    data[jj] = np.load("%sIMG.npy" % val_list[int(j)])


if __name__ == "__main__":
    args = parse_args()
    # run(args)

    model = nn.Sequential()
    with model.name_scope():
        model.add(nn.Flatten())
        model.add(nn.Embedding(30, 10))
        model.add(rnn.LSTM(20))
        # model.add(nn.Dense(5, flatten=False))
    model.initialize()
    print(model(nd.ones((2, 3, 5))))
예제 #27
0
    def __init__(self,
                 hidden_size: int,
                 embeddings: TokenEmbeddings,
                 tag_dictionary: Dictionary,
                 tag_type: str,
                 use_crf: bool = True,
                 use_rnn: bool = True,
                 rnn_layers: int = 1,
                 attention=False):

        super(SequenceTagger, self).__init__()

        self.use_rnn = use_rnn
        self.hidden_size = hidden_size
        self.use_crf = use_crf
        self.rnn_layers = rnn_layers

        self.trained_epochs = 0

        self.embeddings = embeddings

        # set the dictionaries
        self.tag_dictionary = tag_dictionary
        self.tag_type = tag_type
        self.tagset_size = len(tag_dictionary)

        # initialize the network architecture
        self.nlayers = rnn_layers
        self.hidden_word = None

        self.dropout = nn.Dropout(0.5, axes=[0])

        # self.dropout: nn.Block = LockedDropout(0.5)

        rnn_input_dim = self.embeddings.embedding_length

        self.relearn_embeddings = True

        if self.relearn_embeddings:
            self.embedding2nn = nn.Dense(in_units=rnn_input_dim,
                                         units=rnn_input_dim,
                                         flatten=False)

        # bidirectional LSTM on top of embedding layer
        self.rnn_type = 'LSTM'
        # if self.rnn_type in ['LSTM', 'GRU']:
        #
        #     if self.nlayers == 1:
        #         self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size,
        #                                               num_layers=self.nlayers,
        #                                               bidirectional=True)
        #     else:
        #         self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size,
        #                                               num_layers=self.nlayers,
        #                                               dropout=0.5,
        #                                               bidirectional=True)
        self.rnn = rnn.LSTM(input_size=rnn_input_dim,
                            hidden_size=hidden_size,
                            num_layers=self.nlayers,
                            bidirectional=True)
        if attention:
            self.attention = nlp.model.DotProductAttentionCell(units=512,
                                                               dropout=0.5)
        else:
            self.attention = None

        # self.nonlinearity = nn.Tanh()

        # final linear map to tag space
        if self.use_rnn:
            self.linear = nn.Dense(in_units=hidden_size * 2,
                                   units=len(tag_dictionary),
                                   flatten=False)
        else:
            self.linear = nn.Dense(in_units=self.embeddings.embedding_length,
                                   units=len(tag_dictionary),
                                   flatten=False)

        if self.use_crf:
            transitions = nd.random.normal(
                0, 1, (self.tagset_size, self.tagset_size))
            transitions[
                self.tag_dictionary.get_idx_for_item(START_TAG), :] = -10000
            transitions[:, self.tag_dictionary.get_idx_for_item(STOP_TAG
                                                                )] = -10000
            self.transitions = self.params.get(
                'transitions',
                shape=(self.tagset_size, self.tagset_size),
                init=mx.init.Constant(transitions))
        else:
            self.softmax = SoftmaxCrossEntropyLoss(axis=-1, batch_axis=0)
예제 #28
0
    def __init__(self, prefix=None, params=None):
        super().__init__(prefix, params)

        self.gru = rnn.LSTM(64, num_layers=1, bidirectional=True, dropout=0.2)
        self.output = nn.Dense(6)
예제 #29
0
    def forward(self, x):
        #import pdb
        #pdb.set_trace()
        X_ = self.attn(x) # (n, w) -> (n,num_hidden)
        # should be dot(X_, W)
        E = self.attn(X_)  # (n, hidden) -> (n, hidden)
        attn_weights = F.softmax(E, axis=1) # (n, hidden)
        attn_applied = F.elemwise_mul(attn_weights, X_) #(n,hidden)
        output = self.c*(F.elemwise_mul(X_, attn_weights)) + (1-self.c)*X_
        output = self.out(output) #(n,hidden) -> (n,output_size)
        return output


net = nn.Sequential()
with net.name_scope():
    net.add(rnn.LSTM(num_hidden, num_layers, layout='NTC')) # T: sequence_length, N: batch_size, C: feature_dimension
    net.add(nn.BatchNorm())
    net.add(nn.Dense(sequence_length)) # this is to conver (nwc) to (nw)
    net.add(Attn(sequence_length, num_hidden)) # last layer attn, in (nw) o (nw)

net.collect_params().initialize(mx.init.Normal(sigma=0.1), ctx=ctx)
print(net.collect_params)
#params = net.collect_params()
#params.load('try3.params', ctx=ctx)
square_loss = gluon.loss.L1Loss()
learning_settings = {'learning_rate': 0.001, 'momentum':0.9}
trainer = gluon.Trainer(net.collect_params(), 'sgd', learning_settings)
#metric = mx.metric.MSE()

epochs = 20
loss_sequence = []
예제 #30
0

    def begin_state(self, *args, **kwargs):
        return self.rnn.begin_state(*args, **kwargs)
if __name__ == "__main__":
    #vocab_size = 60
    my_seq = list(range(60))
    time_machine = my_seq
    character_list = list(set(time_machine))
    vocab_size = len(character_list)
    character_dict = {}
    for e, char in enumerate(character_list):
        character_dict[char] = e
    time_numerical = [character_dict[char] for char in time_machine]
    corpus_indices = my_seq
    idx_to_char = time_machine
    char_to_idx = character_dict
    num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
    num_epochs, num_steps, batch_size, lr, clipping_theta = 20, 5, 2, 1e2, 1e-2
    pred_period, pred_len, prefixes = 8, 5, ['9','21']


    lstm_layer = rnn.LSTM(256)

    model = RNNModel(lstm_layer,vocab_size=vocab_size)

    train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes)
    model.export('gluon11')