def __init__(self, vocab_size, tag2Idx, embedding_dim, hidden_dim): super(BiLSTM_CRF, self).__init__() with self.name_scope(): self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag2idx = tag2Idx self.tagset_size = len(tag2Idx) self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.lstm = rnn.LSTM(hidden_dim // 2, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Dense(self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = self.params.get("crf_transition_matrix", shape=(self.tagset_size, self.tagset_size)) self.hidden = self.init_hidden()
def __init__(self, vocab, embed_size, num_hiddens, num_layers, dense_layers, **kwargs): super(mix_net, self).__init__(**kwargs) self.encoder = rnn.LSTM(num_hiddens, num_layers=num_layers, bidirectional=True, input_size=embed_size) self.decoder = nn.Dense(1, activation='relu') #self.bn = nn.BatchNorm(axis = 0, use_global_stats = True) self.bn = nn.LayerNorm(axis=0) self.dnn = nn.HybridSequential() for i in nd.arange(int(dense_layers * 1 / 5)): self.dnn.add(nn.Dense(int(22 * 2), activation='relu')) for i in nd.arange(int(dense_layers * 1 / 5)): self.dnn.add(nn.Dense(int(22 * 4), activation='relu')) for i in nd.arange(int(dense_layers * 1 / 5)): self.dnn.add(nn.Dense(int(22 * 8), activation='relu')) for i in nd.arange(int(dense_layers * 1 / 5)): self.dnn.add(nn.Dense(int(22 * 2), activation='relu')) for i in nd.arange(int(dense_layers * 1 / 5)): self.dnn.add(nn.Dense(int(22 * 1), activation='relu')) self.out_dense = nn.Dense(5)
def __init__(self, rnn_type, hidden_size, emb_size, output_size, dropout, target_len, teaching_force, force_prob, ctx): """TODO: to be defined. :hidden_size: TODO :emb_size: TODO :dropout: TODO :target_len: TODO """ nn.Block.__init__(self) self.hidden_size = hidden_size self.emb_size = emb_size self.dropout = dropout self.target_len = target_len self.teaching_force = teaching_force self.force_prob = force_prob self.ctx = ctx rnn_type = rnn_type.upper() self.rnn_type = rnn_type.upper() if rnn_type == 'LSTM': self.rnn = rnn.LSTM(hidden_size, layout='NTC', dropout=dropout) elif rnn_type == 'GRU': self.rnn = rnn.GRU(hidden_size, layout='NTC', dropout=dropout) elif rnn_type == 'DLSTM': self.rnn = DLSTMCell(hidden_size) else: raise ValueError('Unspport rnn type %s'%rnn_type) # self.attention = LuongAttention(64) self.input_linear = nn.Dense(emb_size) self.output_layer = nn.Dense(output_size) self.attention = BahdanauAttention(64) self.head_attention = BahdanauAttention(64)
def __init__(self,rnn_type, hidden_size, output_size, num_layers, dropout, bidirectional=True): """TODO: to be defined. :hidden_size: TODO :num_layers: TODO :dropout: TODO :bidirectional: TODO """ nn.Block.__init__(self) self._rnn_type = rnn_type.upper() self._hidden_size = hidden_size self._output_size = output_size self._num_layers = num_layers self._dropout = dropout self._bidirectional = bidirectional if self._rnn_type == 'LSTM': self.rnn = rnn.LSTM(self._hidden_size, self._num_layers, 'NTC', self._dropout, self._bidirectional) elif self._rnn_type == 'GRU': self.rnn = rnn.GRU(self._hidden_size, self.num_layers, 'NTC', self._dropout, self._bidirectional)
def __init__(self, embedding_dim, model_dim, dropout, head_count, vocab_size, extended_size,gpu): super(Decoder,self).__init__() self.ctx = gpu self.model_dim = model_dim self.dropout = dropout self.head_count = head_count self.vocab_size = vocab_size self.extended.size = extended_size self.decoder_ltsm = rnn.LSTM( 2*self.model_dim, layout='NTC', input_size= self.embedding_dim, i2h_weight_initializer= 'Orthogonal', h2h_weight_initializer = 'Orthogonal') self.self_attn = MultiHeadAttentionCell(base_cell=base_cell, query_units= 2*self.model_dim, use_bias=True, key_units = 2*self.model_dim, value_units= 2*self.model_dim, num_heads=self.head_count, weight_initializer= 'Xavier') self.fnn = Resblock(2*self.model_dim, self.dropout) self.V1 = nn.Dense(2*self.model_dim, in_units= 3*self.model_dim) self.V2 = nn.Dense(self.vocab_size, in_units= 2*self.model_dim) self.W_c = nn.Dense(1) self.W_s = nn.Dense(1) self.W_x = nn.Dense(1)
def net_define_eu(): net = nn.Sequential() with net.name_scope(): net.add(nn.Embedding(config.MAX_WORDS, config.EMBEDDING_DIM)) ''' net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.25)) net.add(transpose(axes=(0,2,1))) net.add(nn.MaxPool1D(pool_size=5)) net.add(transpose(axes=(0,2,1))) net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.25)) net.add(transpose(axes=(0,2,1))) net.add(nn.GlobalMaxPool1D()) net.add(extendDim(axes=3)) net.add(PrimeConvCap(16,32, kernel_size=(1,1), padding=(0,0))) net.add(CapFullyNGBlock(16, num_cap=32, input_units=32, units=16, route_num=3)) net.add(nn.Dropout(0.25)) net.add(nn.Dense(6, activation='sigmoid')) ''' # net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.2)) net.add( rnn.LSTM(128, layout='NTC', bidirectional=True, num_layers=1, dropout=0.2)) net.add(transpose(axes=(0, 2, 1))) net.add(nn.GlobalMaxPool1D()) net.add(extendDim(axes=3)) net.add(PrimeConvCap(16, 32, kernel_size=(1, 1), padding=(0, 0))) net.add( CapFullyNGBlock(16, num_cap=32, input_units=32, units=16, route_num=3)) net.add(nn.Dropout(0.2)) net.add(nn.Dense(6, activation='sigmoid')) net.initialize(init=init.Xavier()) return net
def __init__(self, dropout=0.0, **kwargs): super(RNNClsModel, self).__init__(**kwargs) with self.name_scope(): self.drop = nn.Dropout(dropout) # self.emb = nn.Embedding(vocab_size, num_embed, # weight_initializer=mx.init.Uniform(0.1)) if opt.mode == 'rnn_relu': self.rnn = rnn.RNN(opt.num_hidden, activation='relu', num_layers=opt.num_layers, layout='NTC', dropout=dropout, input_size=opt.num_inputs) elif opt.mode == 'rnn_tanh': self.rnn = rnn.RNN(opt.num_hidden, num_layers=opt.num_layers, layout='NTC', dropout=dropout, input_size=opt.num_inputs) elif opt.mode == 'lstm': self.rnn = rnn.LSTM(opt.num_hidden, num_layers=opt.num_layers, layout='NTC', dropout=dropout, input_size=opt.num_inputs) elif opt.mode == 'gru': self.rnn = rnn.GRU(opt.num_hidden, num_layers=opt.num_layers, layout='NTC', dropout=dropout, input_size=opt.num_inputs) else: raise ValueError("Invalid mode %s. Options are rnn_relu, " "rnn_tanh, lstm, and gru" % opt.mode) self.fc = nn.Dense(opt.num_actions, in_units=opt.num_hidden * opt.seq_len) self.num_hidden = opt.num_hidden self.seq_len = opt.seq_len
def __init__(self, vocab_size=VOCAB_SIZE, embedding_size=32, rnn_size=128, num_layers=2, drop_rate=0.0, **kwargs): super(Model, self).__init__(**kwargs) self.args = { "vocab_size": vocab_size, "embedding_size": embedding_size, "rnn_size": rnn_size, "num_layers": num_layers, "drop_rate": drop_rate } with self.name_scope(): self.encoder = nn.Embedding(vocab_size, embedding_size) self.dropout = nn.Dropout(drop_rate) self.rnn = rnn.LSTM(rnn_size, num_layers, dropout=drop_rate, input_size=embedding_size) self.decoder = nn.Dense(vocab_size, in_units=rnn_size)
def __init__(self, vocab, embed_size, num_hiddens, num_layers, **kwargs): """ 1、在此模型中,每个词先通过嵌入层得到特征向量; 2、使用双向循环神经网络对特征序列进一步编码,从而得到序列信息; 3、将编码后的序列信息通过全连接层变换成输出 将双向长短期记忆在最初时间步和最终时间步的隐藏状态连结,作为特征序列的编码信息 传递给输出层分类 :param vocab: :param embed_size: :param num_hiddens: :param num_layers: :param kwargs: """ super(BiRNN, self).__init__(**kwargs) self.embedding = nn.Embedding(len(vocab), embed_size) # bidirectional 设置为 True 即得到双向循环神经网络 self.encoder = rnn.LSTM(num_hiddens, num_layers=num_layers, bidirectional=True, input_size=embed_size) self.decoder = nn.Dense(2)
def __init__(self, n_hidden, in_seq_len, out_seq_len, vocab_size, enc_layer, dec_layer=1, **kwargs): super(calculator, self).__init__(**kwargs) self.in_seq_len = in_seq_len self.out_seq_len = out_seq_len self.n_hidden = n_hidden self.vocab_size = vocab_size self.enc_layer = enc_layer with self.name_scope(): self.encoder = rnn.LSTM(hidden_size=n_hidden, num_layers=enc_layer, layout='NTC') self.decoder_0 = rnn.LSTMCell(hidden_size=n_hidden) self.decoder_1 = rnn.LSTMCell(hidden_size=n_hidden) self.batchnorm = nn.BatchNorm(axis=2) self.dense = nn.Dense(self.vocab_size, flatten=False)
def __init__(self, n_inputs, n_hidden, n_layers=1, dropout=0.5): super(BILSTM, self).__init__() with self.name_scope(): self.r = rnn.LSTM(n_hidden, n_layers, dropout=dropout, input_size=n_inputs, bidirectional=True) self.c_init = nn.Dense(2 * n_hidden, flatten=False, activation='tanh') self.h_init = nn.Dense(2 * n_hidden, flatten=False, activation='tanh') self.fc = nn.Dense(1, flatten=False) self.d = nn.Dropout(.5)
def __init__(self, mode, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.5, **kwargs): super(RNNModel, self).__init__(**kwargs) with self.name_scope(): self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(vocab_size, embed_dim, weight_initializer=mx.init.Uniform(0.1)) if mode == 'rnn_relu': self.rnn = rnn.RNN(hidden_dim, num_layers, activation='relu', dropout=dropout, input_size=embed_dim) elif mode == 'rnn_tanh': self.rnn = rnn.RNN(hidden_dim, num_layers, activation='tanh', dropout=dropout, input_size=embed_dim) elif mode == 'lstm': self.rnn = rnn.LSTM(hidden_dim, num_layers, dropout=dropout, input_size=embed_dim) elif mode == 'gru': self.rnn = rnn.GRU(hidden_dim, num_layers, dropout=dropout, input_size=embed_dim) else: raise ValueError("Invalid mode %s. Options are rnn_relu, " "rnn_tanh, lstm, and gru"%mode) self.decoder = nn.Dense(vocab_size, in_units=hidden_dim) self.hidden_dim = hidden_dim
def __init__(self, prefix=None, params=None): super().__init__(prefix, params) with self.name_scope(): self.lstm = rnn.LSTM(64, num_layers=1, bidirectional=True, dropout=0.2, layout='NTC') self.lstm_out = nn.MaxPool2D(pool_size=(FIXED_WORD_LENGTH, 1)) # self.att = nn.Sequential() # self.att.add(nn.Dense(1, flatten=False, # activation="tanh")) self.conv1 = MyConv2D(INFOBOX_LENGTH, kernel_size=(INFOBOX_VALUE_LENGTH, DIMENSION), strides=(1, 1), dilation=(1, 1), use_bias=False, in_channels=1, activation='relu') self.conv2 = MyConv2D(INFOBOX_LENGTH, kernel_size=(INFOBOX_VALUE_LENGTH, DIMENSION), strides=(1, 1), dilation=(1, 1), use_bias=False, in_channels=1, activation='relu') # self.pool = nn.MaxPool2D(pool_size=(10,1), strides=(1, 1)) self.dense1 = nn.Dense(384, activation="sigmoid") self.dense2 = nn.Dense(384, activation="sigmoid") self.output = nn.Sequential() self.output.add(nn.Flatten()) self.output.add(nn.Activation(activation='relu')) self.output.add(nn.Dropout(0.5)) self.output.add(nn.Dense(7))
def __init__(self, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = rnn.LSTM(hidden_size, num_layers) self.dense = nn.Dense(num_classes)
return inputs, targets if __name__ == '__main__': nb_epochs = 200 sequence = np.linspace(10, 300, num=30, dtype='float32') n_steps = 3 n_features = 1 learning_rate = 0.01 xnp, ynp = split_sequence(sequence, n_steps) x = mx.ndarray.from_numpy(xnp, zero_copy=False) y = mx.ndarray.from_numpy(ynp, zero_copy=False) network = nn.Sequential() network.add(rnn.LSTM(50, 1)) network.add(nn.Dense(1)) network.initialize(mx.init.Xavier()) criterion = mx.gluon.loss.L2Loss() trainer = mx.gluon.Trainer(network.collect_params(), optimizer='adam', optimizer_params={'learning_rate': 0.03}) for epoch in range(nb_epochs): with mx.autograd.record(): out = network(x) loss = criterion(out, y) loss.backward() trainer.step(batch_size=x.shape[1])
y = nd.dot(h, self.w_hq) + self.b_q outputs.append(y) y_hat = nd.concat(*outputs, dim=0) return y_hat, (h, c) if __name__ == '__main__': _corpus_indices, _idx_to_char, _char_to_idx, _vocab_size = \ load_jaychou_lyrics("../data/jaychou_lyrics.txt.zip") _num_hidden = 256 _num_steps = 35 _batch_size = 32 _lr = 1e2 use_gluon = False if use_gluon: _rnn_layer = rnn.LSTM(_num_hidden) model = BaseRNNGluon(_vocab_size, _idx_to_char, _char_to_idx, _num_hidden, _rnn_layer) else: model = LSTMScratch(_vocab_size, _idx_to_char, _char_to_idx, _num_hidden) model.fit(_corpus_indices, _num_steps, { "lr": _lr, "batch_size": _batch_size }, epochs=250)
def __init__(self, num_embed, num_hidden, num_layers, bidirectional=False, sequence_length=sequence_length, **kwargs): super(RNNModel, self).__init__(**kwargs) self.num_hidden = num_hidden with self.name_scope(): self.rnn = rnn.LSTM(num_hidden, num_layers, input_size=num_embed, bidirectional=bidirectional, layout='TNC') self.decoder = nn.Dense(1, in_units=num_hidden)
import d2lzh as d2l import load_data as ld import math from mxnet import autograd, gluon, init, nd from mxnet.gluon import loss as gloss, nn, rnn import time #加载数据 (corpus_indices, char_to_idx, idx_to_char, vocab_size) = ld.load_data_jay_lyrics() num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size ctx = d2l.try_gpu() #定义模型 lstm_layer = rnn.LSTM(num_hiddens) model = d2l.RNNModel(lstm_layer, vocab_size) #训练模型并创作歌词 num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] d2l.train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, dropout=0, **kwargs): super(Seq2SeqEncoder, self).__init__(**kwargs) self.embedding = nn.Embedding(vocab_size, embed_size) self.rnn = rnn.LSTM(num_hiddens, num_layers, dropout=dropout)
print("Y_train: " + str(Y_train)) print("Y_test: " + str(Y_test)) ## define network num_classes = 1 num_hidden = 25 learning_rate = .01 epochs = 10 batch_size = 100 model = nn.Sequential() with model.name_scope(): model.embed = nn.Embedding(voca_size, num_embed) model.add( rnn.LSTM(num_hidden, layout='NTC', dropout=0.5, bidirectional=False)) model.add(nn.Dense(num_classes)) def eval_accuracy(x, y, batch_size): accuracy = mx.metric.Accuracy() for i in range(x.shape[0] // batch_size): data = x[i * batch_size:(i * batch_size + batch_size), ] target = y[i * batch_size:(i * batch_size + batch_size), ] output = model(data) predictions = nd.array([(1 if out >= 0.5 else 0) for out in output], context) # predictions = nd.argmax(output, axis=1)
def __init__(self, hidden_size: int, embeddings: StackedEmbeddings, tag_dictionary: Dictionary, tag_type: str, use_crf: bool = True, use_rnn: bool = True, rnn_layers: int = 1): super(SequenceTagger, self).__init__() self.embeddings = embeddings with self.name_scope(): self.use_rnn = use_rnn self.hidden_size = hidden_size self.use_crf = use_crf self.use_viterbi = False self.rnn_layers = rnn_layers self.trained_epochs = 0 # set the dictionaries self.tag_dictionary = tag_dictionary self.tag_type = tag_type self.tagset_size = len(tag_dictionary) # initialize the network architecture self.nlayers = rnn_layers self.hidden_word = None self.dropout = nn.Dropout(0.5, axes=[0]) # self.dropout: nn.Block = LockedDropout(0.5) rnn_input_dim = self.embeddings.embedding_length self.relearn_embeddings = True if self.relearn_embeddings: self.embedding2nn = nn.Dense(in_units=rnn_input_dim, units=rnn_input_dim, flatten=False) # bidirectional LSTM on top of embedding layer self.rnn_type = 'LSTM' # if self.rnn_type in ['LSTM', 'GRU']: # # if self.nlayers == 1: # self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size, # num_layers=self.nlayers, # bidirectional=True) # else: # self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size, # num_layers=self.nlayers, # dropout=0.5, # bidirectional=True) self.rnn = rnn.LSTM(input_size=rnn_input_dim, hidden_size=hidden_size, num_layers=self.nlayers, bidirectional=True) # self.nonlinearity = nn.Tanh() # final linear map to tag space if self.use_rnn: self.linear = nn.Dense(in_units=hidden_size * 2, units=len(tag_dictionary), flatten=False) else: self.linear = nn.Dense(in_units=self.embeddings.embedding_length, units=len(tag_dictionary), flatten=False) if self.use_crf: transitions = nd.random.normal(0, 1, (self.tagset_size, self.tagset_size)) transitions[self.tag_dictionary.get_idx_for_item(START_TAG), :] = -10000 transitions[:, self.tag_dictionary.get_idx_for_item(STOP_TAG)] = -10000 self.transitions = self.params.get('transitions', shape=(self.tagset_size, self.tagset_size), init=mx.init.Constant(transitions)) else: # this transition matrix will be updated through statistic, not GD transitions = nd.zeros((self.tagset_size, self.tagset_size)) self.transitions = transitions
def __init__(self, bert, prefix=None, params=None, \ n_rnn_layers=0, rnn_hidden_size=600, num_rnn_layers=1, n_dense_layers=0, units_dense=600, \ add_query=False, \ apply_coattention=False, bert_out_dim=768,\ apply_self_attention=False, self_attention_dimension=None, n_attention_heads=4, apply_transformer=False, qanet_style_out=False, bidaf_style_out=False, remove_special_token=False): super(BertForQA, self).__init__(prefix=prefix, params=params) self.add_query=add_query self.apply_coattention = apply_coattention self.apply_self_attention = apply_self_attention self.apply_transformer = apply_transformer self.qanet_style_out = qanet_style_out self.bidaf_style_out = bidaf_style_out self.remove_special_token = remove_special_token self.bert = bert if self.apply_coattention: with self.name_scope(): #self.co_attention_ = CoAttention("co-attention_", bert_out_dim) # try multiple layers self.co_attention = CoAttention("co-attention", bert_out_dim) if self.qanet_style_out: self.project = gluon.nn.Dense( units=bert_out_dim, flatten=False, use_bias=False, weight_initializer=Xavier(), prefix='projection_' ) self.dropout = gluon.nn.Dropout(0.1) self.model_encoder = TransformerEncoder(units=bert_out_dim) self.predict_begin = gluon.nn.Dense( units=1, use_bias=True, flatten=False, weight_initializer=Xavier( rnd_type='uniform', factor_type='in', magnitude=1), bias_initializer=Uniform(1.0/bert_out_dim), prefix='predict_start_' ) self.predict_end = gluon.nn.Dense( units=1, use_bias=True, flatten=False, weight_initializer=Xavier( rnd_type='uniform', factor_type='in', magnitude=1), bias_initializer=Uniform(1.0/bert_out_dim), prefix='predict_end_' ) self.flatten = gluon.nn.Flatten() elif self.bidaf_style_out: # BiDAF mode self.modeling_layer = rnn.LSTM( hidden_size=int(bert_out_dim / 2), num_layers=2, dropout=0.0, bidirectional=True, input_size=int(bert_out_dim * 4)) self.output_layer = BiDAFOutputLayer(span_start_input_dim=int(bert_out_dim / 2), nlayers=1, dropout=0.2) # ''' # for the cls's encoding # used in version 2.0 self.cls_mapping = nn.Dense( units=2, flatten=False, weight_initializer=Xavier(), prefix='cls_mapping_' ) # ''' if self.apply_self_attention: if self_attention_dimension is None: self_attention_dimension = bert_out_dim with self.name_scope(): self.multi_head_attention = MultiHeadAttentionCell(DotProductAttentionCell(), \ self_attention_dimension, self_attention_dimension, self_attention_dimension, n_attention_heads) if self.apply_transformer: with self.name_scope(): self.transformer = TransformerEncoder(units=bert_out_dim) if self.apply_coattention and (self.qanet_style_out or self.bidaf_style_out): self.span_classifier = None else: self.span_classifier = nn.HybridSequential() with self.span_classifier.name_scope(): for i in range(n_rnn_layers): self.span_classifier.add(rnn.LSTM( hidden_size=rnn_hidden_size, num_layers=num_rnn_layers, dropout=0.0, bidirectional=True)) for i in range(n_dense_layers): self.span_classifier.add(nn.Dense(units=units_dense, flatten=False, activation='relu')) self.span_classifier.add(nn.Dense(units=2, flatten=False))
if not max(args.filter_list) <= args.q: raise AssertionError("no filter can be larger than q") if not args.q >= math.ceil(args.seasonal_period / args.time_interval): raise AssertionError("size of skip connections cannot exceed q") # Build data iterators train_iter, val_iter, test_iter = build_iters(args.data_dir, args.max_records, args.q, args.horizon, args.splits, args.batch_size) input_feature_shape = train_iter.provide_data[0][1] # Choose cells for recurrent layers: each cell will take the output of the previous cell in the list rcells = [rnn.GRU(hidden_size=args.recurrent_state_size, layout='NTC')] skiprcells = [ rnn.LSTM(hidden_size=args.recurrent_state_size, layout='NTC') ] # Define net p = int(args.seasonal_period / args.time_interval) net = TCN(input_feature_shape, args.q, args.filter_list, args.num_filters, args.dropout, rcells, skiprcells, p) ctx = mx.cpu() if args.gpus is None or args.gpus is '' else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] net.initialize(mx.initializer.Uniform(0.1), ctx=ctx) loss = gluon.loss.HuberLoss(rho=0.1) #print("Loss weight: %e"%float(1.0/label_scale)) #loss = gluon.loss.L2Loss(weight=float(1.0/label_scale)) # won't help! trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005})
def __init__(self, dictionary: Dictionary, is_forward_lm: bool, hidden_size: int, nlayers: int, embedding_size: int = 100, nout=None, dropout=0.5, init_params: Dict = None): super(ContextualStringModel, self).__init__() self.dictionary = dictionary self.is_forward_lm = is_forward_lm self.dropout = dropout self.hidden_size = hidden_size self.embedding_size = embedding_size self.nlayers = nlayers with self.name_scope(): self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding( len(dictionary), embedding_size, weight_initializer=mx.initializer.Constant( init_params['encoder.weight']) if init_params else mx.initializer.Uniform(0.1)) if nlayers == 1: if init_params: self.rnn = rnn.LSTM( hidden_size, nlayers, dropout=dropout, input_size=embedding_size, i2h_weight_initializer=mx.initializer.Constant( init_params['rnn.weight_ih_l0']), h2h_weight_initializer=mx.initializer.Constant( init_params['rnn.weight_hh_l0']), i2h_bias_initializer=mx.initializer.Constant( init_params['rnn.bias_ih_l0']), h2h_bias_initializer=mx.initializer.Constant( init_params['rnn.bias_hh_l0'])) else: self.rnn = rnn.LSTM(hidden_size, nlayers, input_size=embedding_size) else: self.rnn = rnn.LSTM(hidden_size, nlayers, dropout=dropout, input_size=embedding_size) self.hidden = None self.nout = nout if nout is not None: self.proj = nn.Dense(nout, weight_initializer='Xavier', in_units=hidden_size) self.decoder = nn.Dense( len(dictionary), weight_initializer=mx.initializer.Uniform(0.1), bias_initializer='zero', in_units=nout) else: self.proj = None self.decoder = nn.Dense( len(dictionary), weight_initializer=mx.initializer.Constant( init_params['decoder.weight']) if init_params else mx.initializer.Uniform(0.1), bias_initializer=mx.initializer.Constant( init_params['decoder.bias']) if init_params else 'zero', in_units=hidden_size)
train_iter = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=False) print("train_data shape: ", X.shape, y.shape) X = _data[m:] y = _label[m:] eval_iter = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=1, shuffle=False) print("eval_data shape: ", X.shape, y.shape) net = gluon.nn.Sequential() #net = gluon.nn.HybridSequential() # doesn't work since LSTM is not hybrid with net.name_scope(): net.add(rnn.LSTM(num_hidden)) # note, check also: LSTMCell #net.add(nn.Dense(3)) # do not add this, worse accuracy net.add(nn.Dense( 1)) # output dimension is 1, "in_units" is skipped and infered net.collect_params().initialize(mx.init.Normal(sigma=0.1)) #softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() square_loss = gluon.loss.L2Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.005, 'momentum': 0.9 }) #metric = mx.metric.MSE() epochs = 20 loss_sequence = []
""""Network""" def __init__(self): super(Network, self).__init__() # self.body = nn. def hybrid_forward(self, F, x, *args, **kwargs): """Forward""" def run(opts): """Run""" ctx = set_context(args) data = nd.zeros((args.batch_size, 60, 20, 240, 240), ctx=ctx) train_list, val_list = get_subject_list(opts) j = 0 jj = 0 data[jj] = np.load("%sIMG.npy" % val_list[int(j)]) if __name__ == "__main__": args = parse_args() # run(args) model = nn.Sequential() with model.name_scope(): model.add(nn.Flatten()) model.add(nn.Embedding(30, 10)) model.add(rnn.LSTM(20)) # model.add(nn.Dense(5, flatten=False)) model.initialize() print(model(nd.ones((2, 3, 5))))
def __init__(self, hidden_size: int, embeddings: TokenEmbeddings, tag_dictionary: Dictionary, tag_type: str, use_crf: bool = True, use_rnn: bool = True, rnn_layers: int = 1, attention=False): super(SequenceTagger, self).__init__() self.use_rnn = use_rnn self.hidden_size = hidden_size self.use_crf = use_crf self.rnn_layers = rnn_layers self.trained_epochs = 0 self.embeddings = embeddings # set the dictionaries self.tag_dictionary = tag_dictionary self.tag_type = tag_type self.tagset_size = len(tag_dictionary) # initialize the network architecture self.nlayers = rnn_layers self.hidden_word = None self.dropout = nn.Dropout(0.5, axes=[0]) # self.dropout: nn.Block = LockedDropout(0.5) rnn_input_dim = self.embeddings.embedding_length self.relearn_embeddings = True if self.relearn_embeddings: self.embedding2nn = nn.Dense(in_units=rnn_input_dim, units=rnn_input_dim, flatten=False) # bidirectional LSTM on top of embedding layer self.rnn_type = 'LSTM' # if self.rnn_type in ['LSTM', 'GRU']: # # if self.nlayers == 1: # self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size, # num_layers=self.nlayers, # bidirectional=True) # else: # self.rnn = getattr(rnn, self.rnn_type)(rnn_input_dim, hidden_size, # num_layers=self.nlayers, # dropout=0.5, # bidirectional=True) self.rnn = rnn.LSTM(input_size=rnn_input_dim, hidden_size=hidden_size, num_layers=self.nlayers, bidirectional=True) if attention: self.attention = nlp.model.DotProductAttentionCell(units=512, dropout=0.5) else: self.attention = None # self.nonlinearity = nn.Tanh() # final linear map to tag space if self.use_rnn: self.linear = nn.Dense(in_units=hidden_size * 2, units=len(tag_dictionary), flatten=False) else: self.linear = nn.Dense(in_units=self.embeddings.embedding_length, units=len(tag_dictionary), flatten=False) if self.use_crf: transitions = nd.random.normal( 0, 1, (self.tagset_size, self.tagset_size)) transitions[ self.tag_dictionary.get_idx_for_item(START_TAG), :] = -10000 transitions[:, self.tag_dictionary.get_idx_for_item(STOP_TAG )] = -10000 self.transitions = self.params.get( 'transitions', shape=(self.tagset_size, self.tagset_size), init=mx.init.Constant(transitions)) else: self.softmax = SoftmaxCrossEntropyLoss(axis=-1, batch_axis=0)
def __init__(self, prefix=None, params=None): super().__init__(prefix, params) self.gru = rnn.LSTM(64, num_layers=1, bidirectional=True, dropout=0.2) self.output = nn.Dense(6)
def forward(self, x): #import pdb #pdb.set_trace() X_ = self.attn(x) # (n, w) -> (n,num_hidden) # should be dot(X_, W) E = self.attn(X_) # (n, hidden) -> (n, hidden) attn_weights = F.softmax(E, axis=1) # (n, hidden) attn_applied = F.elemwise_mul(attn_weights, X_) #(n,hidden) output = self.c*(F.elemwise_mul(X_, attn_weights)) + (1-self.c)*X_ output = self.out(output) #(n,hidden) -> (n,output_size) return output net = nn.Sequential() with net.name_scope(): net.add(rnn.LSTM(num_hidden, num_layers, layout='NTC')) # T: sequence_length, N: batch_size, C: feature_dimension net.add(nn.BatchNorm()) net.add(nn.Dense(sequence_length)) # this is to conver (nwc) to (nw) net.add(Attn(sequence_length, num_hidden)) # last layer attn, in (nw) o (nw) net.collect_params().initialize(mx.init.Normal(sigma=0.1), ctx=ctx) print(net.collect_params) #params = net.collect_params() #params.load('try3.params', ctx=ctx) square_loss = gluon.loss.L1Loss() learning_settings = {'learning_rate': 0.001, 'momentum':0.9} trainer = gluon.Trainer(net.collect_params(), 'sgd', learning_settings) #metric = mx.metric.MSE() epochs = 20 loss_sequence = []
def begin_state(self, *args, **kwargs): return self.rnn.begin_state(*args, **kwargs) if __name__ == "__main__": #vocab_size = 60 my_seq = list(range(60)) time_machine = my_seq character_list = list(set(time_machine)) vocab_size = len(character_list) character_dict = {} for e, char in enumerate(character_list): character_dict[char] = e time_numerical = [character_dict[char] for char in time_machine] corpus_indices = my_seq idx_to_char = time_machine char_to_idx = character_dict num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size num_epochs, num_steps, batch_size, lr, clipping_theta = 20, 5, 2, 1e2, 1e-2 pred_period, pred_len, prefixes = 8, 5, ['9','21'] lstm_layer = rnn.LSTM(256) model = RNNModel(lstm_layer,vocab_size=vocab_size) train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes) model.export('gluon11')