예제 #1
0
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                               corpus_indices, idx_to_char, char_to_idx,
                               num_epochs, num_steps, lr, clipping_theta,
                               batch_size, pred_period, pred_len, prefixes):
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {
        'learning_rate': lr,
        'momentum': 0,
        'wd': 0
    })

    for epoch in range(num_epochs):
        l_sum, n, startTime = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size,
                                              num_steps, ctx)
        state = model.begin_state(batch_size=batch_size)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1, ))
                l = loss(output, y).meam()
            l.backward()

            params = [p.data() for p in model.collect_params().values()]
            d2l.grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            l_sum += l.asscalar() * y.size()
            n += y.size

    if (epoch + 1) % pred_period == 0:
        print('epochs %d, perplexity %f, time %0.2f' %
              (epoch + 1, math.exp(l_sum / n, )))
예제 #2
0
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes):
    # 定义损失函数 模型初始化 优化函数
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':0, 'wd':0})

    for epoch in range(num_epochs):
        l_sum, n, startTime = 0.0, 0, time.time()
        # get batch of train_data and init
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)

        for X,Y in data_iter:
            for s in state:     # TODO 从计算图分离,减少梯度计算随次数的增加而增加
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1, ))                 # Y(batch_size, num_steps),转置后拉成成为行向量
                l = loss(output, y).mean()              # 计算平均分类损失
            l.backward()

            # 剪裁梯度
            params = [p.data() for p in model.collect_params().values()]
            d2l.grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)                             # 前进一步
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch+1)%pred_period == 0:  # 每pred_period打印一次
            # TODO math.exp(l_sum/n)困惑度
            print('epochs %d, perplexity %f, time %0.2f' % (epoch+1, math.exp(l_sum/n), time.time()-startTime))
            for prefix in prefixes:
                print('-', predict_rnn_gluon(prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx))
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                               corpus_indices, idx_to_char, char_to_idx,
                               num_epochs, num_steps, lr, clipping_theta,
                               batch_size, pred_period, pred_len, prefixes):
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {
        'learning_rate': lr,
        'momentum': 0,
        'wd': 0
    })

    for epoch in range(num_epochs):
        l_sum, n, startTime = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size,
                                              num_steps, ctx)
        state = model.begin_state(batch_size=batch_size)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1, ))
                l = loss(output, y).meam()
            l.backward()
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes):
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':0, 'wd':0})

    for epoch in range(num_epochs):
        l_sum, n, startTime = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, )
예제 #5
0
model.initialize(force_reinit=True, ctx=ctx)
print(predict_rnn_gluon('分开', 10, model, vocab_size, ctx, idx_to_char, char_to_idx))



def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes):
    # 定义损失函数 模型初始化 优化函数
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':0, 'wd':0})

    for epoch in range(num_epochs):
        l_sum, n, startTime = 0.0, 0, time.time()
        #
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        for X,Y in data_iter:
            for s in state:     # TODO 从计算图分离,减少梯度计算随次数的增加而增加
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1, ))                 # Y(batch_size, num_steps),转置后拉成成为行向量
                l = loss(output, y).mean()              # 计算平均分类损失
            l.backward()

            # 剪裁梯度
            params = [p.data() for p in model.collect_params().values()]
            d2l.grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            l_sum += l.asscalar() * y.size
예제 #6
0
def train_and_predict_rnn_gluon(
    model,
    num_hiddens,
    vocab_size,
    ctx,
    corpus_indices,
    idx_to_char,
    char_to_idx,
    num_epochs,
    num_steps,
    lr,
    clipping_theta,
    batch_size,
    pred_period,
    pred_len,
    prefixes,
):
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01))
    trainer = gluon.Trainer(
        model.collect_params(), "sgd", {"learning_rate": lr, "momentum": 0, "wd": 0}
    )

    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(
            corpus_indices, batch_size, num_steps, ctx
        )
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1,))
                l = loss(output, y).mean()
            l.backward()
            # 梯度裁剪
            params = [p.data() for p in model.collect_params().values()]
            d2l.grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)  # 因为已经误差取过均值,梯度不用再做平均
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print(
                "epoch %d, perplexity %f, time %.2f sec"
                % (epoch + 1, math.exp(l_sum / n), time.time() - start)
            )
            for prefix in prefixes:
                print(
                    " -",
                    predict_rnn_gluon(
                        prefix,
                        pred_len,
                        model,
                        vocab_size,
                        ctx,
                        idx_to_char,
                        char_to_idx,
                    ),
                )