def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): """Train an Gluon RNN model and predict the next item in the sequence.""" loss = gloss.SoftmaxCrossEntropyLoss() loss = gloss.CTCLoss(layout='NTC', label_layout='NT') model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0, 'wd': 0}) for epoch in range(num_epochs): l_sum, n, start = 0.0, 0, time.time() data_iter_fn = data_iter_random data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) # data_iter = data_iter_consecutive( # corpus_indices, batch_size, num_steps, ctx) state = model.begin_state(batch_size=batch_size, ctx=ctx) model.hybridize() for X, Y in data_iter: for s in state: s.detach() with autograd.record(): # X = nd.one_hot(X.T, vocab_size) #print(type(X)) (output, state) = model(X,state) y = Y.T.reshape((-1,)) #l = loss(output, y) # y = nd.one_hot(y,60) #model.forward(X,state) output = nd.expand_dims(output,axis=1) y = nd.expand_dims(y, axis=1) #print(output.shape, y.shape) l = loss(output, y).mean() # if(epoch == 0 ): # sw.add_graph(model) l.backward() params = [p.data() for p in model.collect_params().values()] grad_clipping(params, clipping_theta, ctx) trainer.step(1) l_sum += l.asscalar() * y.size n += y.size if (epoch + 1) % pred_period == 0: print('epoch %d, perplexity %f, time %.2f sec' % ( epoch + 1, math.exp(l_sum / n), time.time() - start)) for prefix in prefixes: print(' -', predict_rnn_gluon( prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx)) #model.save_params("model_lstm.params") model.export("gluon")
def ctc_loss(net, train_features, train_labels): preds = net(train_features) return loss.CTCLoss()(preds, train_labels)
num_line = self.max_arrow - num_arrow percent = self.i * 100.0 / self.max_steps process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\ + '%.2f' % percent + '%, loss:' + str(loss) + '\r' sys.stdout.write(process_bar) sys.stdout.flush() if self.i >= self.max_steps: self.close() def close(self): print('') print(self.infoDone) self.i = 0 my_ctcloss = loss.CTCLoss() def train(net, num_epochs, lr, batch_size): with mx.Context(mx.cpu(0)): train_ls = [] trainer = gluon.Trainer(net.collect_params(), 'adam', { 'learning_rate': lr, }) for epoch in range(num_epochs): max_steps = len(os.listdir('./data')) // 3 // batch_size process_bar = ShowProcess(max_steps, 'OK') train_iter = get_iter(batch_size) for x, y in train_iter: with autograd.record(): l = my_ctcloss(net(x), y) # .mean()