def ResNet(): net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2, padding=1)) #堆砌残差网络 def resnet_block(in_channels, out_channels, num_residuals, first_block=False): if first_block: assert in_channels == out_channels blk = [] for i in range(num_residuals): if i == 0 and not first_block: blk.append( Residual(in_channels, out_channels, use_1x1conv=True, stride=2)) else: blk.append(Residual(out_channels, out_channels)) return nn.Sequential(*blk) net.add_module('resnet_block1', resnet_block(64, 64, 2, first_block=True)) net.add_module('resnet_block2', resnet_block(64, 128, 2)) #通道增加 图像大小减半 net.add_module('resnet_block3', resnet_block(128, 256, 2)) net.add_module('resnet_block4', resnet_block(256, 512, 2)) net.add_module('gloval_ave_pool', d2l.GlobalAvgPool2d()) #输出 (batch,512,1,1) net.add_module('fc', nn.Sequential(d2l.FlattenLayer(), nn.Linear(512, 10))) return net
def DenseNet(): #初始模块 net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) num_channels, growth_rate = 64, 32 num_convs_in_dense_blocks = [4, 4, 4, 4] # 稠密块 + 过渡块 for i, num_convs in enumerate(num_convs_in_dense_blocks): DenseBlk = DenseBlock(num_convs, num_channels, growth_rate) net.add_module('DenseBlock_%d' % i, DenseBlk) # 稠密块的输出通道作为过渡层的输入 num_channels = DenseBlk.out_channels #在稠密块(通道数增加) 之间加入过度层(图像大小减半,通道数减半) if i != len(num_convs_in_dense_blocks) - 1: TransBlk = transition_block(num_channels, num_channels // 2) net.add_module('Trasition_block_%d' % i, TransBlk) num_channels = num_channels // 2 net.add_module('BN', nn.BatchNorm2d(num_channels)) net.add_module('relu', nn.ReLU()) #GlobalAvgPool2d 输出 (Batch,num_channels,1,1) net.add_module('global_avg_pool', d2l.GlobalAvgPool2d()) net.add_module( 'fc', nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_channels, 10))) return net
def get_net(): # 构建网络 # ResNet模型 model_path = r"F:\PyCharm\Practice\hand_wrtten\logs\Epoch100-Loss0.0000-train_acc1.0000-test_acc0.9930.pth" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) net.add_module("resnet_block1", resnet_block(64, 64, 2, first_block=True)) net.add_module("resnet_block2", resnet_block(64, 128, 2)) net.add_module("resnet_block3", resnet_block(128, 256, 2)) net.add_module( "global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, 512, 1, 1) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(256, 10))) # 测试网络 # X = torch.rand((1, 1, 28, 28)) # for name, layer in net.named_children(): # X = layer(X) # print(name, ' output shape:\t', X.shape) # 加载网络模型 print("Load weight into state dict...") stat_dict = torch.load(model_path, map_location=device) net.load_state_dict(stat_dict) net.to(device) net.eval() print("Load finish!") return net
def vgg(conv_arch, fc_features, fc_hidden_units=4096): net = nn.Sequential() #Convolution layers for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch): net.add_module("vgg_block_" + str(i + 1), vgg_block(num_convs, in_channels, out_channels)) net.add_module( "fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(fc_features, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, 10))) return net
def vgg(conv_arch, fc_features, fc_hidden_units=4096): net = nn.Sequential() # 卷积层部分 for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch): # enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标 # 每经过一个vgg_block都会使宽高减半 net.add_module("vgg_block_" + str(i+1), vgg_block(num_convs, in_channels, out_channels)) # 将一个child module 添加到当前modle, 被添加的module可以通过name属性来获取, # 全连接层部分 net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(fc_features, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, 10) )) return net
def vgg(conv_arch, fc_features, fc_hidden_units=4096): net = nn.Sequential() # 卷积层部分 for i, (num_conv, in_channels, out_channels) in enumerate(conv_arch): # 每经过一个vgg_block都会使宽高减半 net.add_module("vgg_block_" + str(i + 1), vgg_block(num_conv, in_channels, out_channels)) # 全连接层 net.add_module( "fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(fc_features, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, 10))) return net
def vgg(conv_arch, fc_features, fc_hidden_units): net = nn.Sequential() # 卷积层部分 for i, (num_conv, in_channels, out_channels) in enumerate(conv_arch): net.add_module( 'vgg_block_' + str(i + 1), vgg_block(num_conv=num_conv, in_channels=in_channels, out_channels=out_channels)) # 全连接层部分 net.add_module( 'fc', nn.Sequential(d2l.FlattenLayer(), nn.Linear(fc_features, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, 10))) return net
def vgg(conv_arch, fc_features, fc_hidden_unit=4096): net = nn.Sequential() # 卷积层部分 # enumerate()用于将一个可遍历的数据对象(如列表、元组或字符串) # 组合为一个索引序列,同时列出数据和数据下标 for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch): net.add_module("vgg_blk_" + str(i + 1), vgg_block(num_convs, in_channels, out_channels)) # 全连接层部分 net.add_module( "fc", nn.Sequential( d2l.FlattenLayer(), # 四维转二维 nn.Linear(fc_features, fc_hidden_unit), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_unit, fc_hidden_unit), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_unit, 10))) return net
def __simplified(): num_inputs, num_outputs, num_hiddens = 784, 10, 256 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.5) # 训练模型 num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
def __init__(self): super(VGG, self).__init__() # conv_arch 有5个vgg block,每个block表示(卷积数,输入通道,输出通道) self.conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512)) self.fc_features = 512 * 7 * 7 # 经过5个conv 狂高减半5次 图片大小变为224/32 = 7 self.fc_hidden_unit = 4096 self.net = nn.Sequential() #卷积 for i, (num_convs, in_channels, out_channels) in enumerate(self.conv_arch): self.net.add_module( 'vgg_block_' + str(i + 1), self.vgg_block(num_convs, in_channels, out_channels)) #全链接 self.net.add_module( 'fc', nn.Sequential(d2l.FlattenLayer(), nn.Linear(self.fc_features, self.fc_hidden_unit), nn.ReLU(), nn.Dropout(0.5), nn.Linear(self.fc_hidden_unit, self.fc_hidden_unit), nn.ReLU(), nn.Dropout(0.5), nn.Linear(self.fc_hidden_unit, 10)))
b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1), nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32), Inception(256, 128, (128, 192), (32, 96), 64), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64), Inception(512, 160, (112, 224), (24, 64), 64), Inception(512, 128, (128, 256), (24, 64), 64), Inception(512, 112, (144, 288), (32, 64), 64), Inception(528, 256, (160, 320), (32, 128), 128), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128), Inception(832, 384, (192, 384), (48, 128), 128), d2l.GlobalAvgPool2d()) net = nn.Sequential(b1, b2, b3, b4, b5, d2l.FlattenLayer(), nn.Linear(1024, 10)) batch_size = 128 # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
feature = self.conv(X) output = self.fc(feature) return output net = torch.nn.Sequential( #Lelet Reshape(), nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2), #b*1*28*28 =>b*6*28*28 nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), #b*6*28*28 =>b*6*14*14 nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5), #b*6*14*14 =>b*16*10*10 nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), #b*16*10*10 => b*16*5*5 d2l.FlattenLayer(), #b*16*5*5 => b*400 nn.Linear(in_features=16 * 5 * 5, out_features=120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10)) LeNet = LeNet() print(LeNet) ####################### AlexNet ################## ''' LeNet: 在大的真实数据集上的表现并不尽如⼈意。 1.神经网络计算复杂。 2.还没有⼤量深⼊研究参数初始化和⾮凸优化算法等诸多领域。
for i, num_convs in enumerate(num_convs_in_dense_blocks): DB = DenseBlock(num_convs, num_channels, growth_rate) net.add_module("DenseBlosk_%d" % i, DB) # 上一个稠密块的输出通道数 num_channels = DB.out_channels # 在稠密块之间加入通道数减半的过渡层 if i != len(num_convs_in_dense_blocks) - 1: net.add_module("transition_block_%d" % i, transition_block(num_channels, num_channels // 2)) num_channels = num_channels // 2 # 最后接上全局池化层和全连接层来输出。 net.add_module("BN", nn.BatchNorm2d(num_channels)) net.add_module("relu", nn.ReLU()) net.add_module("global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, num_channels, 1, 1) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_channels, 10))) ''' # 尝试打印每个子模块的输出维度确保网络无误: X = torch.rand((1, 1, 96, 96)) for name, layer in net.named_children(): X = layer(X) print(name, ' output shape:\t', X.shape) ''' batch_size = 1 # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr)
def nin_block(in_channels, out_channels, kernel_size, stride, padding): blk = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding), nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU()) return blk net = nn.Sequential(nin_block(1, 96, kernel_size=11, stride=4, padding=0), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(96, 256, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(256, 384, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.Dropout(0.5), nin_block(384, 10, kernel_size=3, stride=1, padding=1), nn.AvgPool2d(kernel_size=5), d2l.FlattenLayer()) X = torch.rand(1, 1, 224, 224) for name, blk in net.named_children(): X = blk(X) print(name, 'output shape:', X.shape) batch_size = 128 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs = 0.002, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
self.moving_mean = self.moving_mean.to(device) self.moving_var = self.moving_var.to(device) Y, self.moving_mean, self.moving_var = batch_norm(self.training, X, self.gamma, self.beta, self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y net = nn.Sequential(nn.Conv2d(1, 6, 5), nn.Sigmoid(), nn.MaxPool2d(2, 2), nn.Conv2d(6, 16, 5), BatchNorm(16, num_dims=4), nn.Sigmoid(), nn.MaxPool2d(2, 2), d2l.FlattenLayer(), nn.Linear(16 * 4 * 4, 120), BatchNorm(120, num_dims=2), nn.Sigmoid(), nn.Linear(120, 84), BatchNorm(84, num_dims=2), nn.Sigmoid(), nn.Linear(84, 10)) batch_size = 64 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) lr, num_epochs = 0.001, 5 optimizer = optim.Adam(net.parameters(), lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs) # print(net[1].gamma.view((-1,)), net[1].beta.view((-1,)))
def net(X, is_training=True): X = X.view(-1, num_inputs) H1 = (torch.matmul(X, W1) + b1).relu() if is_training: H1 = d2l.dropout(H1, drop_prob1) # 在第一层全连接后添加丢弃层(激活之后丢弃) H2 = (torch.mm(H1, W2) + b2).relu() if is_training: H2 = d2l.dropout(H2, drop_prob2) return torch.matmul(H2, W3) + b3 num_epochs, lr, batch_size = 4, 100.0, 256 loss = torch.nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 简洁版 net_pytorch = nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, num_outputs)) for p in params: nn.init.normal_(p, mean=0, std=0.01) # d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) optimizer = torch.optim.SGD(net_pytorch.parameters(), lr=0.5) d2l.train_ch3(net_pytorch, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
else: num_workers = 4 # load data quickly train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter if __name__ == '__main__': # dataset num_inputs, num_outputs, num_hiddens = 784, 10, 256 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # model net = nn.Sequential( d2l.FlattenLayer(), # change x shape nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) for params in net.parameters(): nn.init.normal_(params, mean=0, std=0.01) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.5) # training num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer) n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5 features = torch.randn((n_train + n_test, 1)) poly_features = torch.cat((features, torch.pow(features, 2), torch.pow(features, 3)), 1) labels = (true_w[0] * poly_features[:, 0] + true_w[1] * poly_features[:, 1]
drop_prob1, drop_prob2 = 0.2, 0.5 # is_training表示是否处于训练模式,评估模式不使用丢弃法 # def net(X, is_training = True): # X = X.view(-1, num_inputs) # H1 = (torch.matmul(X, W1) + b1).relu() # if is_training: # H1 = dropout(H1, drop_prob1) # H2 = (torch.matmul(H1, W2) + b2).relu() # if is_training: # H2 = dropout(H2, drop_prob2) # return torch.matmul(H2, W3) + b3 num_epochs, lr, batch_size = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr) # 简洁实现 net = nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, num_outputs)) for param in net.parameters(): nn.init.normal_(param, mean=0, std=0.01) optimizer = torch.optim.SGD(net.parameters(), lr=0.5) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
self.moving_var = self.moving_var.to(X.device) Y, self.moving_mean, self.moving_var = batch_norm(self.training, X, self.gamma, self.beta, self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y net = nn.Sequential(nn.Conv2d(1, 6, 5), BatchNorm(6, num_dims=4), nn.Sigmod(), nn.MaxPool2d(2, 2), nn.Conv2d(6, 16, 5), BatchNorm(16, num_dims=4), nn.Sigmod(), nn.MaxPool2d(2, 2), d2l.FlattenLayer(), nn.Linear(16 * 4 * 4, 120), BatchNorm(120, num_dims=2), nn.Sigmoid(), nn.Linear(120, 84), BatchNorm(84, num_dims=2), nn.Sigmoid(), nn.Linear(84, 10)) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size) lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, otpimizer, device, num_epochs) net[1].gamma.view((-1)), net[1].beta.view((-1, )) net = nn.Sequential(nn.Conv2d(1, 6, 5), nn.BatchNorm2d(6), nn.Sigmoid(), nn.MaxPool2d(2, 2), nn.Conv2d(6, 16, 5),
def __init__(self, num_inputs, num_hiddens, num_outputs): super(mlp, self).__init__() self.FlattenLayer = d2l.FlattenLayer() self.Linear1 = nn.Linear(num_inputs, num_hiddens) self.ReLU = nn.ReLU() self.Linear2 = nn.Linear(num_hiddens, num_outputs)
blk = [] for i in range(num_residuals): if i == 0 and not first_block: blk.append( Residual(in_channels, out_channels, use_1x1conv=True, stride=2)) else: blk.append(Residual(out_channels, out_channels)) return nn.Sequential(*blk) # 对第一个模块进行特殊处理,输入通道数等于输出通道数,而后续的模块则将通道数翻倍 net.add_module("resnet_block1", resnet_block(64, 64, 2, first_block=True)) net.add_module("resnet_block2", resnet_block(64, 128, 2)) net.add_module("resnet_block3", resnet_block(128, 256, 2)) net.add_module("resnet_block4", resnet_block(256, 512, 2)) # 全局平均池化后接上全连接层 net.add_module("global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, 512, 1, 1) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(512, 10))) batch_size = 2 # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
# 定义NiN模型 # 不使用全连接层,通过卷积层是输出通道数等于类别标签数 # 减小模型参数尺寸,缓解过拟合,但有时会造成训练时间增加 net = nn.Sequential( nin_block(1, 96, kernel_size=11, stride=4, padding=0), # (224-11+4-1)/4=54 nn.MaxPool2d(kernel_size=3, stride=2), # (54-1)/2=26 nin_block(96, 256, kernel_size=5, stride=1, padding=2), # 不变 nn.MaxPool2d(kernel_size=3, stride=2), # (26-1)/2=12 nin_block(256, 384, kernel_size=3, stride=1, padding=1), # 不变 nn.MaxPool2d(kernel_size=3, stride=2), # (12-1)/2=5 nn.Dropout(0.5), # 丢弃法 nin_block(384, 10, kernel_size=3, stride=1, padding=1), # 类别标签数是10 d2l.GlobalAvgPool2d(), # 全局平均池化层 (batch_size, 10, 1, 1) d2l.FlattenLayer() # 维度转化 (batch_size, 10) ) # 测试 # x = torch.rand(1,1,224,224) # for name, blk in net.named_children(): # name_children()返回每一模块的名字和对象 # x = blk(x) # print(name, 'output shape: ', x.shape) # 参数 lr, num_epochs, batch_size = 0.002, 5, 64 # 数据 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) # 优化器 optimizer = optim.Adam(net.parameters(), lr=lr) # 训练 d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device,
print (dropout(X, 0.5)) print (dropout(X, 1.0)) ''' num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 W1 = torch.tensor(np.random.normal(0, 0.01, size = (num_inputs, num_hiddens1)), dtype = torch.float, requires_grad = True) b1 = torch.zeros(num_hiddens1, requires_grad = True) W2 = torch.tensor(np.random.normal(0, 0.01, size = (num_hiddens1, num_hiddens2)), dtype = torch.float, requires_grad = True) b2 = torch.zeros(num_hiddens2, requires_grad = True) W3 = torch.tensor(np.random.normal(0, 0.01, size = (num_hiddens2, num_outputs)), dtype = torch.float, requires_grad = True) b3 = torch.zeros(num_outputs, requires_grad = True) params = [W1, b1, W2, b2, W3, b3] drop_prob1, drop_prob2 = 0.2, 0.5 net = nn.Sequential( d2l.FlattenLayer(),#对X的形状转换 nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, num_outputs) ) for param in net.parameters(): nn.init.normal_(param, mean = 0, std = 0.01) optimizer = torch.optim.SGD(net.parameters(), lr = 0.5) num_epochs, lr, batch_size = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
class GlobalAvgPool2d(nn.Module): def __init__(self): super(GlobalAvgPool2d, self).__init__() def forward(self, x): return F.avg_pool2d(x, kernel_size=x.size()[2:]) net = nn.Sequential(nin_block(1, 96, kernel_size=11, stride=4, padding=0), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(96, 256, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(256, 384, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.Dropout(0.5), nin_block(384, 10, kernel_size=3, stride=1, padding=1), GlobalAvgPool2d(), d2l.FlattenLayer()) X = t.rand(1, 1, 224, 224) for name, blk in net.named_children(): X = blk(X) print(name, "output shape : ", X.shape) batch_size = 128 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs = 0.002, 5 optimizer = t.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
Inception(528, 256, (160, 320), (32, 128), 128), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) # In[7]: b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128), Inception(832, 384, (192, 384), (48, 128), 128), d2l.GlobalAvgPool2d()) # In[8]: net = nn.Sequential(b1, b2, b3, b4, b5, d2l.FlattenLayer(), nn.Linear(1024, 10)) X = torch.rand(1, 1, 96, 96) for blk in net.children(): X = blk(X) print('output shape: ', X.shape) # ## 5.9.3 获取数据和训练模型 # In[9]: batch_size = 128 # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
net.add_module("DenseBlock_%d" % i, DB) # 上一个稠密块的输出通道数 num_channels = DB.out_channels # 在稠密块之间加入通道减半的过渡层 if i != len(num_convs_in_dense_blocks) - 1: net.add_module("transition_block_%d" % i, transition_block(num_channels, num_channels // 2)) num_channels = num_channels // 2 # 同ResNet一样,最后接上全局池化层和全连接层来输出。 net.add_module("BN", nn.BatchNorm2d(num_channels)) net.add_module("relu", nn.ReLU()) # GlobalAvgPool2d的输出: (Batch, num_channels, 1, 1) net.add_module("global_avg_pool", d2l.GlobalAvgPool2d()) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_channels, 10))) # 我们尝试打印每个子模块的输出维度确保网络无误: X = torch.rand((1, 1, 96, 96)) for name, layer in net.named_children(): X = layer(X) print(name, ' output shape:\t', X.shape) blk = DenseBlock(2, 3, 10) X = torch.rand(4, 3, 8, 8) Y = blk(X) print(Y.shape) # summary(blk, (3, 8, 8)) blk = transition_block(23, 10) print(blk(Y).shape)
device = torch.device( torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')) # device = torch.device(torch.device('cpu')) ## Load data. batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) num_inputs = 28 * 28 num_outputs = 10 ## Define model. net = nn.Sequential( # FlattenLayer() # nn.Linear(num_inputs, num_outputs) OrderedDict([('flatten', d2l.FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs))])).to(device) ## Init paras. init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) ## Define loss function. loss = nn.CrossEntropyLoss().to(device) ## Define optimization function. optimizer = torch.optim.SGD(net.parameters(), lr=0.1) ## Train model. num_epoches = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epoches, batch_size, None,
3.10 多层感知机的简洁实现 https://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.10_mlp-pytorch ''' import torch from torch import nn from torch.nn import init import numpy as np import sys sys.path.append("..") import d2lzh_pytorch as d2l num_inputs, num_outputs, num_hiddens = 784, 10, 256 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.5) num_epochs = 5
""" import torch import torchvision import numpy as np import sys from torch import nn from collections import OrderedDict from torch.nn import init import d2lzh_pytorch as d2l # 定义模型 # 神经网络,用来求导,计算梯度 num_inputs, num_outputs = 784, 10 net = nn.Sequential() net.add_module('flatten', d2l.FlattenLayer()) net.add_module('linear', nn.Linear(num_inputs, num_outputs)) # 初始化参数(但其实初始化网络时,参数已经初始化) init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) # 定义损失函数 # PyTorch提供了一个包括softmax运算的交叉熵损失函数 loss = nn.CrossEntropyLoss() # 定义最优化算法,用来更新参数 optimizer = torch.optim.SGD(net.parameters(), lr=0.1) # 初始化参数 batch_size, num_epochs = 256, 5