Ejemplo n.º 1
0
def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes):
    """Train an Gluon RNN model and predict the next item in the sequence."""
    loss = gloss.SoftmaxCrossEntropyLoss()
    loss = gloss.CTCLoss(layout='NTC', label_layout='NT')
    model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd',
                            {'learning_rate': lr, 'momentum': 0, 'wd': 0})

    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter_fn = data_iter_random
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        # data_iter = data_iter_consecutive(
        #     corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        model.hybridize()
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                # X = nd.one_hot(X.T, vocab_size)
                #print(type(X))
                (output, state) = model(X,state)
                y = Y.T.reshape((-1,))
                #l = loss(output, y)
                # y = nd.one_hot(y,60)
                #model.forward(X,state)
                output = nd.expand_dims(output,axis=1)
                y = nd.expand_dims(y, axis=1)
                #print(output.shape, y.shape)
                l = loss(output, y).mean()
                # if(epoch == 0 ):
                #     sw.add_graph(model)
            l.backward()
            params = [p.data() for p in model.collect_params().values()]
            grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn_gluon(
                    prefix, pred_len, model, vocab_size, ctx, idx_to_char,
                    char_to_idx))
    #model.save_params("model_lstm.params")

    model.export("gluon")
Ejemplo n.º 2
0
def ctc_loss(net, train_features, train_labels):
    preds = net(train_features)
    return loss.CTCLoss()(preds, train_labels)
Ejemplo n.º 3
0
        num_line = self.max_arrow - num_arrow
        percent = self.i * 100.0 / self.max_steps
        process_bar = '[' + '>' * num_arrow + '-' * num_line + ']'\
                      + '%.2f' % percent + '%, loss:' + str(loss) + '\r'
        sys.stdout.write(process_bar)
        sys.stdout.flush()
        if self.i >= self.max_steps:
            self.close()

    def close(self):
        print('')
        print(self.infoDone)
        self.i = 0


my_ctcloss = loss.CTCLoss()


def train(net, num_epochs, lr, batch_size):
    with mx.Context(mx.cpu(0)):
        train_ls = []
        trainer = gluon.Trainer(net.collect_params(), 'adam', {
            'learning_rate': lr,
        })
        for epoch in range(num_epochs):
            max_steps = len(os.listdir('./data')) // 3 // batch_size
            process_bar = ShowProcess(max_steps, 'OK')
            train_iter = get_iter(batch_size)
            for x, y in train_iter:
                with autograd.record():
                    l = my_ctcloss(net(x), y)  # .mean()