def train_ch3(net,
              train_iter,
              test_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)  # “softmax回归的简洁实现”
            y = y.astype('float32')
        train_l_sum += l.asscalar()
        train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
        n += y.size
        test_acc = d2l.evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' %
              (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
def train(num_gpus, batch_size, lr):
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    ctx = [mx.gpu(i) for i in range(num_gpus)]
    print('running on:', ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(4):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.spilt_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
        nd.waitall()
        train_time = time.time() - start
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
        print('epoch %d, time %.1f sec, test_acc %.2f' %
              (epoch + 1, train_time, test_acc))
예제 #3
0
def train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs):
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    best_test_acc = 0.0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx)

        if test_acc > best_test_acc:
            best_test_acc = test_acc
            net.save_parameters('./model/MNIST/mlp.params')
        else:
            lr_scheduler.learning_rate *= 0.9

        print(
            'epoch %d, lr %.2f, loss %.4f, train acc %.3f, test acc %.3f, best test acc %.3f, '
            'time %.1f sec' %
            (epoch + 1, lr_scheduler.learning_rate, train_l_sum / n,
             train_acc_sum / n, test_acc, best_test_acc, time.time() - start))
예제 #4
0
def train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period,
          lr_decay):
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': lr,
        'momentum': 0.9,
        'wd': wd
    })
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for X, y in train_iter:
            y = y.astype('float32').as_in_context(ctx)
            with autograd.record():
                y_hat = net(X.as_in_context(ctx))
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        time_s = "time %.2f sec" % (time.time() - start)
        if valid_iter is not None:
            valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx)
            epoch_s = (
                "epoch %d, loss %f, train acc %f, valid acc %f, " %
                (epoch + 1, train_l_sum / n, train_acc_sum / n, valid_acc))
        else:
            epoch_s = ("epoch %d, loss %f, train acc %f, " %
                       (epoch + 1, train_l_sum / n, train_acc_sum / n))
        print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
def train(num_gpus, batch_size, lr):
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    ctx = [mx.gpu(i) for i in range(num_gpus)]
    print('runing on:', ctx)
    #将模型参数复制到num_gpus块显卡的显存上
    gpu_params = [get_params(params, c) for c in ctx]
    for epoch in range(4):
        start = time.time()
        for X, y in train_iter:
            #对单个小批量进行多GPU训练
            train_batch(X, y, gpu_params, ctx, lr)
            nd.waitall()
        train_time = time.time() - start

        def net(x):  #在gpu(0)上验证模型
            return lenet(x, gpu_params[0])

        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
        print('epoch %d, time %.1f sec, test_acc %.2f' %
              (epoch + 1, train_time, test_acc))
예제 #6
0
def train(net,train_iter,valid_iter,lr,wd,lr_decay,lr_period,epochs,loss,ctx):
    trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr,'wd':wd,'momentum':0.9})
    for epoch in range(epochs):
        l_sum,acc_sum,n,start=.0,.0,0,time.time()
        if epoch>0 and epoch%lr_period==0:
            trainer.set_learning_rate(trainer.learning_rate*lr_decay)
        for X,y in train_iter:
            y=y.astype('float32').as_in_context(ctx)
            with autograd.record():
                y_hat=net(X.as_in_context(ctx))
                l=loss(y_hat,y).sum()
            l.backward()
            trainer.step(batch_size)
            l_sum+=l
            acc_sum+=(y_hat.argmax(axis=1)==y).sum().asscalar()
            n+=y.size
        train_time = time.time()
        if valid_iter is not None:
            valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx)
            print("epoch %d,valid_acc %f" %(epoch,valid_acc))
        print("epoch %d,cost %f sec,loss %f,train_acc %f,lr %f" %(epoch,train_time,l_sum/n,acc_sum/n,trainer.learning_rate))
예제 #7
0
def train(num_gpus, batch_size, lr):

    comm = MPI.COMM_WORLD
    comm_rank = comm.Get_rank()
    comm_size = comm.Get_size()

    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    #ctx = [mx.gpu(i) for i in range(num_gpus)]
    if comm_rank == 0:
        ctx = mx.gpu(0)
    else:
        ctx = mx.gpu(1)
    print('running on:', ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
    trainer = gluon.Trainer(net.collect_params(),
                            'sgd', {'learning_rate': lr},
                            SSP_FLAG=True,
                            thre=2)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(400000):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]
            for l in ls:
                l.backward()
            trainer.step(epoch, batch_size)
        train_time = time.time() - start
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[comm_rank])
        print('epoch %d, time %.1f sec, test acc %.2f, process %d' %
              (epoch + 1, train_time, test_acc, comm_rank))
예제 #8
0
batch_size = 64
# 构建数据集,将原来28x28的图片放大到224x224
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

ctx = d2l.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())
print('training on', ctx)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

for epoch in range(3):
    train_loss_sum = 0
    train_acc_sum = 0
    n = 0
    start = time.time()
    for X, y in train_iter:
        X, y = X.as_in_context(ctx), y.as_in_context(ctx)
        with autograd.record():
            y_hat = net(X)
            loss = softmax_cross_entropy(y_hat, y).sum()
        loss.backward()
        trainer.step(batch_size)
        y = y.astype('float32')
        train_loss_sum += loss.asscalar()
        train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
        n += y.size
    test_acc = d2l.evaluate_accuracy(test_iter, net, ctx)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc, time.time() - start))
예제 #9
0
    # 因为标签类型是int,要把y 变成浮点数 再进行相等判断。
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()


def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar()
        n += y.size
    return acc_sum / n


# *accuracy函数的小测试
print('accuracy test:', accuracy(y_hat, y))
print('accuracy,d2l_test:', d2l.evaluate_accuracy(data_iter=test_iter,
                                                  net=net))

# 6 # 训练模型 =========================================================================================
num_epoch, lr = 5, 0.1
d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epoch, batch_size,
              [W, b], lr)

# 7 # 分类预测 ========================================================
for X, y in test_iter:
    break
true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])
예제 #10
0
def softmax(x):
    x_exp = x.exp()
    partition = x_exp.sum(axis =1, keepdims = True)
    return x_exp/partition

def net(x):
    return softmax( nd.dot(x.reshape((-1, num_inputs)),w)+b )

def cross_entropy(y_hat, y):
   return - nd.pick(y_hat, y).log()

def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()

print (d2l.evaluate_accuracy(test_iter, net))

num_epochs, lr = 5, 0.1

def train_ch3(net, train_iter, test_iter, loss, num_epochs,batch_size,params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size) # “softmax回归的简洁实现”一节将用到
예제 #11
0
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for X, y in train_iter:
            y = y.astype('float32').as_in_context(ctx)
            with autograd.record():
                y_hat = net(X.as_in_context(ctx))
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        time_s = "time %.2f sec" % (time.time() - start)
        if valid_iter is not None:
            valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx)
            epoch_s = ("epoch %d, loss %f, train acc %f, valid acc %f, "
                       % (epoch + 1, train_l_sum / n, train_acc_sum / n,
                          valid_acc))
        else:
            epoch_s = ("epoch %d, loss %f, train acc %f, " %
                       (epoch + 1, train_l_sum / n, train_acc_sum / n))
        print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))


# In[ ]:


#训练并验证模型
ctx, num_epochs, lr, wd = d2l.try_gpu(), 1, 0.1, 5e-4
lr_period, lr_decay, net = 80, 0.1, get_net(ctx)
예제 #12
0
        with autograd.record():  # tell mxnet to record
            y_hat = net(X.as_in_context(ctx))
            l = loss(y_hat, y).sum()
        l.backward()  # backward
        trainer.step(batch_size)  # update params for a batch
        train_losses += l.asscalar()
        train_accuracy += (y_hat.argmax(
            axis=1) == y).sum().asscalar()  # have to be divide by num_samples
        num_samples += y.size

    epoch_time = "time %.2f sec" % (time.time() - start_time
                                    )  # cal time for train a epoch

    # valid per epoch
    if valid_iter is not None:
        epoch_valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx)

    # printing
    epoch_loss = train_losses / num_samples
    epoch_train_acc = train_accuracy / num_samples
    print("epoch: {}, training loss: {}, training acc: {}, validation acc: {}".
          format(epoch, epoch_loss, epoch_train_acc, epoch_valid_acc))

    # test per 5 epochs and save params
    if epoch % 5 == 0 and test_iter is not None:
        epoch_test_loss = d2l.evaluate_accuracy(test_iter, net, ctx)
        print("test at epoch {}, test acc: {}".format(epoch, epoch_test_loss))
        net.save_parameters(
            'trained_models/resnet18-epoch{}-loss{}.params'.format(
                epoch, epoch_loss))