Beispiel #1
0
# -*- coding:utf-8 -*-
import torch
from IPython import display
from d2l import torch as d2l

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

# 1、初始化模型参数
# 使用正态分布初始化权重W,偏置初始化为0
num_imputs = 784
num_outputs = 10
W = torch.normal(0, 0.01, size=(num_imputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)


# 2、定义softmax操作
# X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
# X.sum(0, keepdim=True), X.sum(1, keepdim=True)
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition


# 3、定义模型
def net(X):
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)


# 4、定义损失函数
Beispiel #2
0
            timer.stop()
            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % 50 == 0:
                print(f"epoch: {epoch} --- iter: {i} --- of {len(train_iter)}")
                print(f"train loss: {train_loss} --- train acc: {train_acc}")
        test_acc = evaluate_accuracy_gpu(net, test_iter)
    print(f'loss {train_loss:.3f}, train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')


print("train")
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
lr, num_epochs = 0.9, 2
train_func(net, train_iter, test_iter, num_epochs, lr)

print("-------------------------------------------")
print("alex net modifications")

net = nn.Sequential(nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1),
                    nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
                    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
                    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(),
                    nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),