def main(): # 3.7.1 获取和读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_from_fashion_mnist(batch_size) # 3.7.2 定义和初始化模型 num_inputs = 784 num_outputs = 10 class LinearNet(nn.Module): def __init__(self, num_inputs, num_outputs): super(LinearNet, self).__init__() self.linear = nn.Linear(num_inputs, num_outputs) def forward(self, x): y = self.linear(x.view(x.shape[0], -1)) return y net = LinearNet(num_inputs, num_outputs) nn.init.normal_(net.linear.weight, mean=0, std=0.01) nn.init.constant_(net.linear.bias, val=0) # 3.7.3 softmax 和交叉熵损失函数 # PyTorch 提供了一个包括 softmax 和交叉熵损失计算的函数,它的数值稳定性更好 loss = nn.CrossEntropyLoss() # 3.7.4 定义优化算法 optimizer = optim.SGD(net.parameters(), lr=0.1) # 3.7.5 训练模型 num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
def __manual(): # 获取和读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 定义模型参数 num_inputs, num_outputs, num_hiddens = 784, 10, 256 W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float) b1 = torch.zeros(num_hiddens, dtype=torch.float) W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float) b2 = torch.zeros(num_outputs, dtype=torch.float) params = [W1, b1, W2, b2] for param in params: param.requires_grad_(requires_grad=True) # 定义模型 def net(X): X = X.view(-1, num_inputs) H = relu(torch.matmul(X, W1) + b1) return torch.matmul(H, W2) + b2 # 交叉熵损失函数 loss = torch.nn.CrossEntropyLoss() # 训练模型 num_epochs, lr = 5, 100.0 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
def f1(): print("load_data_fashion_mnist start") # 获取和读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) print("load_data_fashion_mnist over") num_inputs = 784 num_outputs = 10 # 定义和初始化模型 net = LinearNet(num_inputs, num_outputs) net = nn.Sequential( # FlattenLayer(), # nn.Linear(num_inputs, num_outputs) OrderedDict([('flatten', FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs))])) init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) loss = nn.CrossEntropyLoss() # 定义优化算法 # 我们使用学习率为0.1的小批量随机梯度下降作为优化算法。 optimizer = torch.optim.SGD(net.parameters(), lr=0.1) # 训练模型 # 接下来,我们使用上一节中定义的训练函数来训练模型。 num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
def main(): # 3.6.1 获取和读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_from_fashion_mnist(batch_size) # 3.6.2 初始化模型参数 num_inputs = 784 num_outputs = 10 W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float32) b = torch.zeros(num_outputs, dtype=torch.float32) W.requires_grad_(True) b.requires_grad_(True) # 3.6.3 实现 softmax 运算 def softmax(X): X_exp = X.exp() partition = X_exp.sum(dim=1, keepdim=True) return X_exp / partition # 3.6.4 定义模型 def net(X): return softmax(torch.mm(X.view((-1, num_inputs)), W) + b) # 3.6.5 定义损失函数 def cross_entropy(y_hat, y): return -torch.log(y_hat.gather(1, y.view(-1, 1))) # 3.6.6 计算分类准确率(已在 d2lzh 包中实现) def accuracy(y_hat, y): return (y_hat.argmax(dim=1) == y).float().mean().item() # 3.6.7 训练模型 num_epochs, lr = 5, 0.1 d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr) # 3.6.8 预测 X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) titles = [ true + '\n' + pred for true, pred in zip(true_labels, pred_labels) ] d2l.show_fashion_mnist(X[0:9], titles[0:9])
def __simplified(): num_inputs, num_outputs, num_hiddens = 784, 10, 256 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.5) # 训练模型 num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
def main(): batch_size = 256 root = '../Datasets' train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, root=root) num_inputs = 784 num_outputs = 10 net = LinearNet(num_inputs, num_outputs) net = nn.Sequential( OrderedDict([('flatten', FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs))])) init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.1) num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
def main(): # 3.9.1 获取和读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_from_fashion_mnist(batch_size) # 3.9.2 定义模型参数 num_inputs, num_outputs, num_hiddens = 784, 10, 256 W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float32) b1 = torch.zeros(num_hiddens, dtype=torch.float32) W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float32) b2 = torch.zeros(num_outputs, dtype=torch.float32) params = [W1, b1, W2, b2] for param in params: param.requires_grad_(True) # 3.9.3 定义激活函数 def relu(X): return torch.max(input=X, other=torch.tensor(0.0)) # 3.9.4 定义模型 def net(X): X = X.view((-1, num_inputs)) H = relu(torch.mm(X, W1) + b1) return torch.mm(H, W2) + b2 # 3.9.5 定义损失函数 loss = torch.nn.CrossEntropyLoss() # 3.9.6 训练模型 num_epochs, lr = 5, 0.5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
def relu(X): return torch.max(input=X, other=torch.tensor(0.0)) def net(X): X = X.view((-1, num_inputs)) H = relu(torch.matmul(X, W1) + b1) return torch.matmul(H, W2) + b2 loss = torch.nn.CrossEntropyLoss() num_epochs, lr = 5, 100.0 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) ''' 3.10 多层感知机的简洁实现 https://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.10_mlp-pytorch ''' import torch from torch import nn from torch.nn import init import numpy as np import sys sys.path.append("..") import d2lzh_pytorch as d2l num_inputs, num_outputs, num_hiddens = 784, 10, 256
# 定义交叉熵损失函数 def cross_entropy(y_hat, y): # -1 表示该维度大小由其他维度决定 # gather(dim, index) 可以理解为映射, index(即y)是一个tensor,它的dim和必须和y_hat一样 # 以y的值为下标,从y_hat中抽取对应的值 # 比如 torch.gather(t, 1, torch.tensor([[0,0],[1,0]])) 的结果为 tensor([[1, 1], [4, 3]]) return -torch.log(y_hat.gather(1, y.view(-1, 1))).sum() / y_hat.shape[0] # 设置参数 batch_size, num_epochs, lr = 256, 3, 0.1 # 获取数据 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 训练模型 d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr) # 预测 X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) # 绘制 titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] for i in range(2): # test[:] 记住冒号右边的下标的元素不会包含在目标列表中! d2l.show_fashion_mnist(X[(5 * i):(5 * (i + 1))], titles[(5 * i):(5 * (i + 1))])
def net(X, is_training=True): X = X.view(-1, num_inputs) H1 = (torch.matmul(X, W1) + b1).relu() if is_training: H1 = d2l.dropout(H1, drop_prob1) # 在第一层全连接后添加丢弃层(激活之后丢弃) H2 = (torch.mm(H1, W2) + b2).relu() if is_training: H2 = d2l.dropout(H2, drop_prob2) return torch.matmul(H2, W3) + b3 num_epochs, lr, batch_size = 4, 100.0, 256 loss = torch.nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 简洁版 net_pytorch = nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, num_outputs)) for p in params: nn.init.normal_(p, mean=0, std=0.01) # d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) optimizer = torch.optim.SGD(net_pytorch.parameters(), lr=0.5) d2l.train_ch3(net_pytorch, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) num_inputs, num_outputs = 784, 10 class FlattenLayer(nn.Module): def __init__(self): super(FlattenLayer, self).__init__() def forward(self, x): return x.view(x.shape[0], -1) from collections import OrderedDict net = nn.Sequential( OrderedDict([('flattenlayer', FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs))])) init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) loss = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.1) num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, optimizer=optimizer)
# 定义参数 num_inputs, num_outputs, num_hiddens = 784, 10, 256 # 定义网络 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) # 初始化模型参数(但其实初始化网络时,参数已经初始化) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) # 定义损失函数 loss = torch.nn.CrossEntropyLoss() # 定义优化器 optimizer = torch.optim.SGD(net.parameters(), lr=0.5) # 获取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 训练模型 num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer) # lr已经包含在optimizer中
w1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float) b1 = torch.zeros(num_hiddens, dtype=torch.float) w2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float) b2 = torch.zeros(num_outputs, dtype=torch.float) params = [w1, b1, w2, b2] for param in params: param.requires_grad_(requires_grad=True) def relu(x): return torch.max(input=x, other=torch.tensor(0.0)) # 定义模型 def net(x): x = x.view((-1, num_inputs)) h = relu(torch.matmul(x, w1) + b1) return torch.matmul(h, w2) + b2 # 定义损失函数 loss = torch.nn.CrossEntropyLoss() # 训练模型 num_epoch, lr = 5, 0.01 d2l.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, params, lr)