def train(self): net = self.net() optimizer=torch.optim.SGD(net.parameters(),lr=0.5) n_epochs,lr,batch_size=5,100,256 loss = nn.CrossEntropyLoss() train_iter,test_iter=utils.load_data_fashion_mnist(batch_size) utils.train_ch3(net,train_iter,test_iter,loss,n_epochs,batch_size,None,None,optimizer=optimizer)
def main(): num_outputs = 10 architecture = ((2, 64), (2, 128), (4, 256), (4, 512), (4, 512)) net = nn.Sequential() with net.name_scope(): net.add( vgg_stack(architecture), nn.Flatten(), nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(num_outputs)) train_data, test_data = utils.load_data_fashion_mnist( batch_size=64, resize=96) ctx = utils.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.05}) utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=100)
def train(): batch_size = 256 train_data, test_data = utils.load_data_fashion_mnist(batch_size) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() learning_rate = 0.2 for epoch in range(5): train_loss = 0. train_acc = 0. for data, label in train_data: label = label.as_in_context(ctx) with autograd.record(): output = net(data, is_training=True) loss = softmax_cross_entropy(output, label) loss.backward() utils.SGD(params, learning_rate / batch_size) train_loss += nd.mean(loss).asscalar() train_acc += utils.accuracy(output, label) test_acc = utils.evaluate_accuracy(test_data, net, ctx) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))
def __init__(self): self.batch_size = 256 self.train_iter, self.test_iter = utils.load_data_fashion_mnist() self.input_num = 784 #28x28 self.hide_num = 256 self.output_num = 256 self.params = self.init_weight()
def __init__(self): self.num_inputs = 784 #28x28 self.num_outputs = 10 self.batch_size = 256 self.train_iter, self.test_iter = utils.load_data_fashion_mnist() self.num_epoch = 3 self.net = LinearNet(self.num_inputs, self.num_outputs)
def test(self): _,test_iter=utils.load_data_fashion_mnist() X,y=iter(test_iter).next() true_labels=get_fashion_mnist_labels(y) pre_labels=get_fashion_mnist_labels(self.net(X).argmax(dim=1).numpy()) titles=[true+'\n'+pre for true,pre in zip(true_labels,pre_labels)] show_fashion_mnist(X[0:9],titles[0:9])
def test_softmax_org(): obj=SoftmaxOrg() x = torch.rand((2,5)) print(x) # print(obj.softmax(x)) y_hat=torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]]) y=torch.LongTensor([0,2]) # print(obj.cross_entropy(y_hat,y)) # print(obj.accuracy(y_hat,y)) _,test_iter=utils.load_data_fashion_mnist() print(utils.evaluate_accuracy(test_iter,obj.net))
def train(num_gpus, batch_size, lr): train_data, test_data = utils.load_data_fashion_mnist(batch_size) ctx = [gpu[i] for i in range(num_gpus)] print('running on', ctx) dev_params = [get_params(params, c) for c in ctx] for epoch in range(5): start = time() for data, label in train_data: train_batch(data, label, dev_params, ctx, lr) nd.waitall() print('Epoch: %d, training time = %.1f sec'%(epoch, time() - start)) # valiting on GPU 0 net = lambda data : lenet(data, dev_params[0]) test_acc = utils.evaluate_accuracy(test_data, net, ctx[0]) print('Validataion Accuracy = %.4f'%(test_acc))
def train(self): num_epoch,lr=5,0.1 batch_size=256 train_iter,test_iter=utils.load_data_fashion_mnist() utils.train_ch3(self.net,train_iter,test_iter,self.cross_entropy,num_epoch,batch_size,[self.W,self.b],lr)
import torch import torchvision import numpy as np import sys sys.path.append("/home/kesci/input") import utils as d2l print(torch.__version__) print(torchvision.__version__) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist( batch_size, root='input/FashionMNIST2065') num_inputs = 784 print(28 * 28) num_outputs = 10 W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float) # [784, 10] b = torch.zeros(num_outputs, dtype=torch.float) # [10,] num_inputs = 784 print(28 * 28) num_outputs = 10 W.requires_grad_(requires_grad=True) b.requires_grad_(requires_grad=True) X = torch.tensor([[1, 2, 3], [4, 5, 6]])
def load_data_set(batch_size, re_size): train_data, test_data = utils.load_data_fashion_mnist(batch_size, resize=re_size) return train_data, test_data
def __init__(self): self.train_iter,self.test_iter=utils.load_data_fashion_mnist(256) self.input_num=28*28 self.output_num=10 self.W,self.b=self.init_model_param()
if self.verbose: print('Block %d output: %s' % (i + 1, out.shape)) return out net = GoogleNet(10, verbose=True) net.initialize() x = nd.random.uniform(shape=(4, 3, 96, 96)) y = net(x) ctx = get_ctx() net = GoogleNet(10, verbose=False) net.initialize(ctx=ctx, init=init.Xavier()) batch_size = 64 train_data, test_data = load_data_fashion_mnist(batch_size=batch_size, transform=transform_resize) softmax_xentropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(params=net.collect_params(), optimizer='sgd', optimizer_params={'learning_rate': 0.01}) train(train_data, test_data, net, softmax_xentropy, trainer, batch_size, epochs=1)
x = self.flatten(x) x = self.dense0(x) out = self.dense1(x) return out # 实例化网络、gpu、损失函数、优化器 net = CNN() ctx = utils.try_gpu() net.initialize(ctx=ctx) cross_loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), "adam", {'learning_rate': 0.1}) # 准备数据 batch_size = 256 train_data, test_data = load_data_fashion_mnist(batch_size, root='./fashion-mnist') # 训练 epoches = 10 for e in range(epoches): train_loss = 0 train_acc = 0 for data, label in train_data: data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss = cross_loss(output, label) loss.backward() trainer.step(batch_size)
DOWNLOAD = False transform1 = transforms.ToTensor() #可以把下载到的数据转化成张量格式 #transforms.Compose()定义多重数据变化 transform2 = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) #归一化[-1,1] # todo for yxt # have bug ,result from mxnet framwork # it may be have a dataset problem # mT_trainset = dsets.MNIST(root="E:\py_cache\dataset",train=True,transform=transform1,download=DOWNLOAD) # mT_testset = dsets.MNIST(root="E:\py_cache\dataset",train=False,transform=transform1,download=DOWNLOAD) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, root='E:\py_cache\dataset') num_inputs, num_outputs, num_hiddens = 784, 10, 256 W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float) b1 = torch.zeros(num_hiddens, dtype=torch.float) W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float) b2 = torch.zeros(num_outputs, dtype=torch.float) params = [W1, b1, W2, b2] for param in params: param.requires_grad_(requires_grad=True)
# -*- coding: utf-8 -*- import sys sys.path.append('..') import utils from mxnet import gluon from mxnet import ndarray as nd from mxnet import autograd batch_size = 256 train_data, test_data = utils.load_data_fashion_mnist(256) #model net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(10)) net.initialize() #损失函数 softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() #训练器 trainer = gluon.Trainer(net.collect_params(), 'SGD', {'learning_rate': 0.2}) #训练 epoch = 20 for e in range(epoch): train_loss = 0
activation='relu')) out.add(nn.MaxPool2D(pool_size=2, strides=2)) return out def vgg_stack(arch): out = nn.Sequential() for (num_convs, channels) in arch: out.add(vgg_block(num_convs, channels)) return out block = vgg_block(2, 128) block.initialize() x = nd.random.uniform(shape=(2, 3, 16, 16)) y = block(x) num_outputs = 10 arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512)) net = nn.Sequential() with net.name_scope(): net.add(vgg_stack(arch), nn.Flatten(), nn.Dense(4096, activation='relu'), nn.Dropout(0.5), nn.Dense(4096, activation='relu'), nn.Dropout(0.5), nn.Dense(num_outputs)) net.initialize(ctx=ctx, init=init.Xavier()) train_data, test_data = utils.load_data_fashion_mnist(batch_size=10, resize=96) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05}) utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=1)
def train(self): n_epochs,lr,batch_size=5,100,256 loss = nn.CrossEntropyLoss() train_iter,test_iter=utils.load_data_fashion_mnist(batch_size) utils.train_ch3(self.net,train_iter,test_iter,loss,n_epochs,batch_size,self.params,lr)
from mxnet import ndarray as nd from mxnet import autograd import random from mxnet import gluon import matplotlib.pyplot as plt#画图 import sys sys.path.append('..') import utils #包含了自己定义的一些通用函数 如下载 载入数据集等 ########################################################## #### 准备输入数据 ### #一个稍微复杂点的数据集,它跟MNIST非常像,但是内容不再是分类数字,而是服饰 ## 准备 训练和测试数据集 batch_size = 256#每次训练 输入的图片数量 train_data, test_data = utils.load_data_fashion_mnist(batch_size) ########################################################### ### 定义模型 ################## #@@@@初始化模型参数 权重和偏置@@@@ num_inputs = 28*28##输入为图像尺寸 28*28 num_outputs = 10#输出10个标签 num_hidden = 256#定义一个只有一个隐含层的模型,这个隐含层输出256个节点 weight_scale = .01#初始化权重参数的 均匀分布均值 #输入到隐含层 权重 + 偏置 W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale) b1 = nd.zeros(num_hidden) #隐含层到输出层 权重 + 偏置 W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)
# coding=utf-8 import sys sys.path.append('..') import utils batch_size = 256 train_data, test_data = utils.load_data_fashion_mnist(batch_size) from mxnet import gluon net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(256, activation="relu")) net.add(gluon.nn.Dense(256, activation="relu")) net.add(gluon.nn.Dense(256, activation="relu")) net.add(gluon.nn.Dense(256, activation="relu")) net.add(gluon.nn.Dense(10)) net.initialize() import sys sys.path.append('..') from mxnet import ndarray as nd from mxnet import autograd import utils batch_size = 256
Inception(512, 112, (144, 288), (32, 64), 64), Inception(528, 256, (160, 320), (32, 128), 128), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128), Inception(832, 384, (192, 384), (48, 128), 128), utils.GlobalAvgPool2d()) print('查看网络结构') net = nn.Sequential(b1, b2, b3, b4, b5, utils.FlattenLayer(), nn.Linear(1024, 10)) X = torch.rand(1, 1, 96, 96) for blk in net.children(): X = blk(X) print('output shape: ', X.shape) print('获取和读取数据,这里缩减尺寸为 96') batch_size = 256 train_iter, test_iter = utils.load_data_fashion_mnist(batch_size, resize=96) print('训练模型,只 1 轮') lr, num_epochs = 0.002, 1 optimizer = torch.optim.Adam(net.parameters(), lr=lr) utils.train_cnn(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs) ''' 训练模型,只 1 轮 training on cpu epoch 1, loss 0.0058, train acc 0.414, test acc 0.754, time 2282.1 sec '''
nn.MaxPool2D(pool_size=3, strides=2), # 第二阶段 nn.Conv2D(channels=256, kernel_size=5, padding=2, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # 第三阶段 nn.Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(channels=256, kernel_size=3, padding=1, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # 第四阶段 nn.Flatten(), nn.Dense(4096, activation="relu"), nn.Dropout(.5), # 第五阶段 nn.Dense(4096, activation="relu"), nn.Dropout(.5), # 第六阶段 nn.Dense(1)) train_data, test_data = utils.load_data_fashion_mnist(batch_size=64, resize=224) net.initialize(ctx=ctx, init=init.Xavier()) ############### 그래프 ############### import gluoncv gluoncv.utils.viz.plot_network(net) ##################################### loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=1)
nin_block(1, 96, kernel_size=11, stride=4, padding=0), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(96, 256, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(256, 384, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.Dropout(0.5), # 标签类别数是10 nin_block(384, 10, kernel_size=3, stride=1, padding=1), GlobalAvgPool2d(), # 将四维的输出转成二维的输出,其形状为(批量大小, 10) d2l.FlattenLayer()) X = torch.rand(1, 1, 224, 224) for name, blk in net.named_children(): X = blk(X) print(name, 'output shape: ', X.shape) batch_size = 128 # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs = 0.002, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs) # NiN重复使⽤由卷积层和代替全连接层的1×1卷积层构成的NiN块来构建深层⽹络。 # NiN去除了容易造成过拟合的全连接输出层,而是将其替换成输出通道数等于标签类别数 的NiN块和全局平均池化层。 # NiN的以上设计思想影响了后⾯⼀系列卷积神经⽹络的设计。
import sys sys.path.append('..') import utils from utils import load_data_fashion_mnist #环境 try: ctx = mx.gpu() _ = nd.zeros((1, ), ctx=ctx) except: ctx = mx.cpu() ctx #数据 batch_size = 256 train_data, test_data = load_data_fashion_mnist(batch_size) learning_rate = 0.5 #模型 net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu')) net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, activation='relu')) net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(256, activation='relu')) net.add(gluon.nn.Dense(64, activation='relu')) net.add(gluon.nn.Dense(10)) net.initialize()
nn.AvgPool2d(2, 2), nn.Conv2d(6, 16, 5), nn.Sigmoid(), nn.AvgPool2d(2, 2)) self.fc = nn.Sequential(nn.Linear(16 * 4 * 4, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10)) def forward(self, img): feature = self.conv(img) output = self.fc(feature.view(feature.shape[0], -1)) return output if __name__ == '__main__': net = LeNet() batch_size = 256 train_iter, test_iter = load_data_fashion_mnist( batch_size, root='./dataset/FashionMNIST') learning_rate = 1e-3 num_epochs = 20 optimizer = optim.Adam(net.parameters(), lr=learning_rate) net = net.to(device) print('training on ', device) loss = nn.CrossEntropyLoss() batch_count = 0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0., 0., 0, time.time() for X, y in train_iter: X = X.to(device) y = y.to(device) y_pred = net(X) l = loss(y_pred, y) optimizer.zero_grad()