Example #1
0
def train_cpu(net,
              train_iter,
              test_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              trainer=None):
    for epoch in range(1, num_epochs + 1):
        train_l_sum = 0
        train_acc_sum = 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)
            l.backward()
            if trainer is None:
                gb.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)
            train_l_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat, y)
        test_acc = evaluate_accuracy(test_iter, net)
        print("epoch %d, loss %.4f, train acc %.3f, test acc %.3f" %
              (epoch, train_l_sum / len(train_iter),
               train_acc_sum / len(train_iter), test_acc))
Example #2
0
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps,
                          num_hiddens, lr, clipping_theta, batch_size,
                          vocab_size, pred_period, pred_len, prefixes,
                          get_params, get_inputs, ctx, corpus_indices,
                          idx_to_char, char_to_idx, is_lstm=False):
    if is_random_iter:
        data_iter = data_iter_random
    else:
        data_iter = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        # 如使用相邻采样,隐藏变量只需在该 epoch 开始时初始化。
        if not is_random_iter:
            state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
            if is_lstm:
                state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
        train_l_sum = nd.array([0], ctx=ctx)
        train_l_cnt = 0
        for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx):
            # 如使用随机采样,读取每个随机小批量前都需要初始化隐藏变量。
            if is_random_iter:
                state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)
                if is_lstm:
                    state_c = nd.zeros(shape=(batch_size, num_hiddens),
                                       ctx=ctx)
            # 如使用相邻采样,需要使用 detach 函数从计算图分离隐藏状态变量。
            else:
                state_h = state_h.detach()
                if is_lstm:
                    state_c = state_c.detach()
            with autograd.record():
                # outputs 形状:(batch_size, vocab_size)。
                if is_lstm:
                    outputs, state_h, state_c = rnn(
                        get_inputs(X, vocab_size), state_h, state_c, *params)
                else:
                    outputs, state_h = rnn(
                        get_inputs(X, vocab_size), state_h, *params)
                # 设 t_ib_j 为时间步 i 批量中的元素 j:
                # y 形状:(batch_size * num_steps,)
                # y = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]。
                y = Y.T.reshape((-1,))
                # 拼接 outputs,形状:(batch_size * num_steps, vocab_size)。
                outputs = nd.concat(*outputs, dim=0)
                l = loss(outputs, y)
            l.backward()
            # 裁剪梯度。
            grad_clipping(params, state_h, Y, clipping_theta, ctx)
            gb.sgd(params, lr, 1)
            train_l_sum = train_l_sum + l.sum()
            train_l_cnt += l.size
        if epoch % pred_period == 0:
            print('\nepoch %d, perplexity %f'
                  % (epoch, (train_l_sum / train_l_cnt).exp().asscalar()))
            for prefix in prefixes:
                print(' - ', predict_rnn(
                    rnn, prefix, pred_len, params, num_hiddens, vocab_size,
                    ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
Example #3
0
def train_ch3(net,
              train_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              trainer=None):
    for epoch in range(num_epochs):

        train_l_sum = 0
        train_acc_sum = 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)
            l.backward()
            if trainer is None:
                gb.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)

            train_l_sum += l.mean().asscalar()
            train_acc_sum += accuracy(y_hat, y)

        print('epoch %d, loss %.4f, train acc %.3f' %
              (epoch + 1, train_l_sum / len(train_iter),
               train_acc_sum / len(train_iter)))
Example #4
0
def train_ch3(net,
              train_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              trainer=None):

    for epoch in range(num_epochs):

        if epoch % 100 == 0:
            trainer.set_learning_rate(trainer.learning_rate * 0.5)
            print(trainer.learning_rate)

        train_acc_sum = 0
        train_acc_sum1 = 0
        train_acc_sum2 = 0
        train_acc_sum3 = 0

        flag = 0

        train_l_sum = 0
        train_acc_sum = 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)

            l.backward()
            if trainer is None:
                gb.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)

            train_l_sum += l.mean().asscalar()
            if epoch == (num_epochs - 1):
                labels[flag:flag + len(y)] = y
                labels_test[flag:flag + len(y)] = y_hat
                flag = flag + len(y)

            train_acc_sum += accuracy(y_hat, y, 0.51)
            train_acc_sum1 += accuracy_sum(y_hat, y, 1.5)
            train_acc_sum2 += accuracy_sum(y_hat, y, 3.0)
            train_acc_sum3 += accuracy_sum(y_hat, y, 4.5)

        print(
            'epoch %d, loss %.4f, train_acc %f, train_acc1 %f, train_acc2 %f, train_acc3 %f'
            % (epoch + 1, train_l_sum / len(train_iter),
               train_acc_sum / data_num, train_acc_sum1 / data_num,
               train_acc_sum2 / data_num, train_acc_sum3 / data_num))
Example #5
0
def train_batch(X, y, gpu_params, ctx, lr):
    gpu_Xs = split_and_load(X, ctx)
    gpu_ys = split_and_load(y, ctx)
    with autograd.record():
        ls = [
            loss(lenet(gpu_X, gpu_W), gpu_y)
            for gpu_X, gpu_y, gpu_W in zip(gpu_Xs, gpu_ys, gpu_params)
        ]
    for l in ls:
        l.backward()
    for i in range(len(gpu_params[0])):
        allreduce([gpu_params[c][i].grad for c in range(len(ctx))])
    for params in gpu_params:
        gb.sgd(params, lr, X.shape[0])
Example #6
0
File: test6.py Project: cszfz/mxnet
def train_ch3(net,
              train_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              trainer=None):

    for epoch in range(num_epochs):
        train_acc_sum = 0
        train_acc_sum1 = 0
        train_acc_sum2 = 0
        train_acc_sum3 = 0

        flag = 0
        train_l_sum = 0
        train_acc_sum = 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y)

            l.backward()
            if trainer is None:
                gb.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)

            train_l_sum += l.mean().asscalar()
            if epoch % 10 == 0:
                np.savetxt('y_hat' + str(flag) + '.txt',
                           y_hat.asnumpy(),
                           fmt='%f')
                np.savetxt('y' + str(flag) + '.txt', y.asnumpy(), fmt='%f')
                flag = flag + 1
            train_acc_sum += accuracy(y_hat, y, 0.51)
            train_acc_sum1 += accuracy_sum(y_hat, y, 0.51)
            train_acc_sum2 += accuracy_sum(y_hat, y, 1.01)
            train_acc_sum3 += accuracy_sum(y_hat, y, 1.51)

        print(
            'epoch %d, loss %.4f, train_acc %f, train_acc1 %f, train_acc2 %f, train_acc3 %f'
            % (epoch + 1, train_l_sum / len(train_iter),
               train_acc_sum / data_num, train_acc_sum1 / data_num,
               train_acc_sum2 / data_num, train_acc_sum3 / data_num))
Example #7
0
def fit_and_plot(lambd):
    w, b = params = init_params()
    train_ls = []
    test_ls = []
    for _ in range(num_epoch):
        for X, y in train_iter:
            with autograd.record():
                l = loss(net(X, w, b), y) + lambd * l2_penalty(w)
            l.backward()
            gb.sgd(params, lr, batch_size)
        train_ls.append(
            loss(net(train_feature, w, b), train_labels).mean().asscalar())
        test_ls.append(
            loss(net(test_feature, w, b), test_labels).mean().asscalar())
    gb.semilogy(range(1, num_epoch + 1), train_ls, 'epoch', 'loss',
                range(1, num_epoch + 1), test_ls, ['train', 'test'])
    return w[:10].T, b
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                          vocab_size, ctx, corpus_indices, idx_to_char,
                          char_to_idx, is_random_iter, num_epochs, num_steps,
                          lr, clipping_theta, batch_size, pred_period,
                          pred_len, prefixes):
    if is_random_iter:
        data_iter_fn = data_iter_random
    else:
        data_iter_fn = data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        if not is_random_iter:
            state = init_rnn_state(batch_size, num_hiddens, ctx)
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        loss_sum = 0.0
        start = time.time()
        for t, (X, Y) in enumerate(data_iter):
            if is_random_iter:
                state = init_rnn_state(batch_size, num_hiddens, ctx)
            else:
                for s in state:
                    s.detach()
            with autograd.record():
                inputs = to_onehot(X, vocab_size)
                outputs, state = rnn(inputs, state, params)
                outputs = nd.concat(*outputs, dim=0)
                y = Y.T.reshape((-1, ))
                l = loss(outputs, y).mean()
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            gb.sgd(params, lr, 1)
            loss_sum += l.asscalar()

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' %
                  (epoch + 1, math.exp(loss_sum /
                                       (t + 1)), time.time() - start))
            for prefix in prefixes:
                print(
                    '-',
                    predict_rnn(prefix, pred_len, rnn, params, init_rnn_state,
                                num_hiddens, vocab_size, ctx, idx_to_char,
                                char_to_idx))
Example #9
0
        print('1st conv block:', h1.shape)
        print('2nd conv block:', h2.shape)
        print('1st dense:', h3.shape)
        print('2nd dense:', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear


for data, _ in train_data:
    net(data, verbose=True)
    break
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
learning_rate = .2
epochs = 5
for _ in range(epochs):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            Y_hat = net(data)
            l = softmax_cross_entropy(Y_hat)
        l.backward()
        gb.sgd(params, learning_rate, batch_size)
        train_loss += nd.mean(l).asscalar()
        train_acc += accuracy(Y_hat, label)
    test_acc = evaluate_accuracy(test_data, net, ctx)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" %
          (epoch, train_loss / len(train_data), train_acc / len(train_data),
           test_acc))