def main(): (train_x, train_label), (test_x, test_label) = load_mnist() # 为了再现过拟合,减少学习数据 train_x = train_x[: 300] train_label = train_label[: 300] # 设定是否使用Dropuout,以及比例 ======================== use_dropout = False # use_dropout = True dropout_ratio = 0.2 network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ratio=dropout_ratio) trainer = Trainer(network, train_x, train_label, test_x, test_label, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) trainer.train() train_acc_list = trainer.train_acc_list test_acc_list = trainer.test_acc_list draw(train_acc_list, test_acc_list)
def main(): # 获取MNIST数据 (train_x, train_label), (test_x, test_label) = load_mnist(flatten=False) # 构造深层CNN network = DeepConvNet() # 生成一个训练器 trainer = Trainer(network, train_x, train_label, test_x, test_label, epochs=20, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000, verbose=True) trainer.train() # 训练上面构造好的神经网络 network.save_params() # 训练完成后持久化参数 print("Saved Network Parameters!") # 获取训练过程中训练集和测试集的准确率 train_acc_list = trainer.train_acc_list test_acc_list = trainer.test_acc_list draw(train_acc_list, test_acc_list) # 绘制准确率变化图
def get_test_data(): """ 获取测试集数据 :return: """ _, (test_X, test_y) = load_mnist(normalize=True, flatten=True, one_hot_label=False) return test_X, test_y
def train(): (train_x, train_label), (test_x, test_label) =\ load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iterations = 10000 """ 为了实现每次随机选取一个batch_size的样本来训练,一般有2种做法 1. 先打乱训练集的样本顺序,然后按顺序每次取batch_size个样本 2. 不改变训练集的样本顺序,每次随机选取batch_size个样本。 但是2的做法极有可能一个epoch不能遍历所有样本,因此我就想到先声明一个 下标列表,然后打乱顺序,再按顺序访问即可 """ batch_mask = np.arange(train_x.shape[0]) np.random.shuffle(batch_mask) batch_size = 100 learning_rate = 0.1 train_loss_list_ = [] train_acc_list_ = [] test_acc_list_ = [] left = 0 # 按顺序取样本的时候的一个游标 epoch_num = 0 # 没有严格训练iteration次,而是改成了以在iteration次迭代中最大的epoch数为准 # 这样就保证所有样本被训练的次数是一样的(一个epoch就是全部样本遍历一次) for i in range(int(iterations / batch_size) * batch_size): batch_x = train_x[batch_mask[left:left + batch_size]] batch_label = train_label[batch_mask[left:left + batch_size]] # 使用数值方法计算梯度的效率太低,跑十几个小时也才跑了1000多个iteration # grad = network.numerical_gradient(batch_x, batch_label) # 使用BP算法来计算梯度效率高太多,几十秒就能跑完10000个iteration grad = network.gradient(batch_x, batch_label) # 更新神经网络参数 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(batch_x, batch_label) train_loss_list_.append(loss) left += batch_size # 在一次迭代结束时更新游标 # 如果完成一个epoch,就计算一次准确率 if left >= train_x.shape[0]: left = 0 epoch_num += 1 train_acc = network.accuracy(train_x, train_label) test_acc = network.accuracy(test_x, test_label) train_acc_list_.append(train_acc) test_acc_list_.append(test_acc) print("No.%d epoch:" % epoch_num) print("train acc: %f\ttest acc: %f" % (train_acc, test_acc)) return train_acc_list_, test_acc_list_, train_loss_list_
def test(): # 获取MNIST数据 _, (test_x, test_label) = load_mnist(flatten=False) network = DeepConvNet() network.load_params() test_acc = network.accuracy(test_x, test_label) print("test acc:", test_acc)
def main(): """ 和chapter4的train_neural_network一样,都是利用two_layer_net中的TwoLayer类 构造神经网络,然后对MNIST的数据集进行预测并计算准确率 但是比chapter4中的实现要方便巧妙得多,因为将每一层设计成一个类,然后在构造 神经网络的时候就不用管层里面的具体细节,方便构造也方便求梯度 :return: """ (train_x, train_y), (test_x, test_y) = load_mnist(one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iterations = 100000 train_size = train_x.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) left = 0 epoch_num = 0 for i in range(int(iterations / batch_size) * batch_size): batch_x = train_x[batch_mask[left:left + batch_size]] batch_y = train_y[batch_mask[left:left + batch_size]] grad = network.gradient(batch_x, batch_y) for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(batch_x, batch_y) train_loss_list.append(loss) left += batch_size if left >= train_size: left = 0 epoch_num += 1 train_acc = network.accuracy(train_x, train_y) test_acc = network.accuracy(test_x, test_y) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("No.%d epoch:" % epoch_num) print("train acc: %f\ttest acc: %f" % (train_acc, test_acc)) return train_acc_list, test_acc_list, train_loss_list
def main(): _, (test_x, test_label) = load_mnist(flatten=False) # sampled = 1000 # test_x = test_x[:sampled] # test_label = test_label[:sampled] network = DeepConvNet() # 构造神经网络 network.load_params() # 加载已经训练好的参数 # 对测试集进行预测,获得每个测试样例的预测结果 classified_ids = get_predict_result(network, test_x, test_label) classified_ids = np.array(classified_ids).flatten() # 输出预测错误的图像 draw(classified_ids, test_x, test_label)
def main(): (train_x, train_label), (test_x, test_label) = load_mnist() # 为了再现过拟合,减少学习数据 train_x = train_x[:300] train_label = train_label[:300] max_epoch = 201 batch_size = 100 learning_rate = 0.01 x = np.arange(max_epoch) # 画图时的x轴 train_acc_list, bn_train_acc_list = train(train_x, train_label, test_x, test_label, learning_rate, max_epoch, batch_size) draw(train_acc_list, bn_train_acc_list, x)
def main(): _, (test_x, test_label) = load_mnist(flatten=False) sampled = 1000 # 为了实现高速化,减小样本规模 test_x = test_x[:sampled] test_label = test_label[:sampled] network = DeepConvNet() # 构造神经网络 network.load_params() # 加载已经训练好的参数 print("caluculate accuracy (float64) ... ") print(network.accuracy(test_x, test_label)) # 转换为float16型 test_x = test_x.astype(np.float16) for key, val in network.params.items(): network.params[key] = val.astype(np.float16) print("caluculate accuracy (float16) ... ") print(network.accuracy(test_x, test_label))
def main(): """ 对比数值方法求梯度和反向传播方法求梯度的准确性。 由于数值方法计算速度慢,但是设计简单,编程不易出错,因此在实践中一般用来检验 反向传播求梯度的代码有没写错 :return: """ (train_x, train_y), _ = load_mnist(one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) x_batch = train_x[:3] y_batch = train_y[:3] grad_numerical = network.numerical_gradient(x_batch, y_batch) grad_backprop = network.gradient(x_batch, y_batch) for key in grad_numerical.keys(): # 求两种梯度计算方法的差的绝对值的平均 # 由于计算机计算时存在精度的问题,因此只要diff较小就可以认为结果一致 diff = np.average(np.abs(grad_numerical[key] - grad_backprop[key])) print(key + ": " + str(diff))
def main(): (train_x, train_label), _ = load_mnist() train_x = train_x[: 1000] train_label = train_label[: 1000] max_epoch = 20 batch_size = 100 learning_rate = 0.01 weight_scale_list = np.logspace(0, -4, num=16) # 测试16种初始化方式 x = np.arange(max_epoch) # 画图时的x轴 for i, w in enumerate(weight_scale_list): print("============== " + str(i + 1) + "/16" + " ==============") train_acc_list, bn_train_acc_list = train(train_x, train_label, w, learning_rate, max_epoch, batch_size) draw(train_acc_list, bn_train_acc_list, i, w, x) plt.tight_layout() plt.show()
def main(): # 读入数据 (train_x, train_label), _ = load_mnist(one_hot_label=True) # 构造神经网络 network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100], output_size=10, use_batchnorm=True) # 仅用一个训练样本来测试 batch_x = train_x[:1] batch_label = train_label[:1] # 用反向传播和数值方法分别计算梯度 grad_backprop = network.gradient(batch_x, batch_label) grad_numerical = network.numerical_gradient(batch_x, batch_label) # 比较两种方法的计算结果 for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ":" + str(diff))
def main(): # 获取MNIST数据 (train_x, train_label), (test_x, test_label) = load_mnist(flatten=False) # 构造CNN network = SimpleConvNet(input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01) # 生成一个训练器 trainer = Trainer(network, train_x, train_label, test_x, test_label, epochs=20, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000, verbose=True) trainer.train() # 训练上面构造好的神经网络 network.save_params() # 训练完成后持久化参数 print("Saved Network Parameters!") # 获取训练过程中训练集和测试集的准确率 train_acc_list = trainer.train_acc_list test_acc_list = trainer.test_acc_list draw(train_acc_list, test_acc_list) # 绘制准确率变化图
def main(): # 获取MNIST数据集,为了加速测试,只使用训练集前500个样本 (train_x, train_label), _ = load_mnist() train_x = train_x[: 500] train_label = train_label[: 500] # 从训练集中划分一部分作为验证集 validation_rate = 0.2 validation_num = int(train_x.shape[0] * validation_rate) # 先打乱训练集再划分 train_x, train_label = shuffle_dataset(train_x, train_label) val_x = train_x[: validation_num] val_label = train_label[: validation_num] train_x = train_x[validation_num:] train_label = train_label[validation_num:] # 迭代100次来寻找最优超参数 optimization_trial = 100 val_result = {} train_result = {} for _ in range(optimization_trial): # 在指定的搜索范围随机对L2正则化强度和学习率进行采样 weight_decay = 10 ** np.random.uniform(-8, -4) lr = 10 ** np.random.uniform(-6, -2) # 利用本次迭代采样得到的两个超参数进行训练,得到验证集和训练集上的准确率 val_acc_list, train_acc_list = train(train_x, train_label, val_x, val_label, lr, weight_decay) print("val acc: " + str(val_acc_list[-1]) + " | lr: " + str(lr) + " | weight decay: " + str(weight_decay)) # 把本次迭代的结果保存起来,记录所用的超参数以及测试结果 key = "lr: " + str(lr) + ", weight decay: " + str(weight_decay) val_result[key] = val_acc_list train_result[key] = train_acc_list print("\n========== Hyper-Parameter Optimization Result ===========") draw(val_result, train_result)
# coding: utf-8 import sys, os sys.path.append(os.pardir) import numpy as np from common.mnist import load_mnist from two_layer_net import TwoLayerNet # 데이터 읽기 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) x_batch = x_train[:3] t_batch = t_train[:3] grad_numerical = network.numerical_gradient(x_batch, t_batch) grad_backprop = network.gradient(x_batch, t_batch) # 각 가중치의 절대 오차의 평균을 구한다. for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ":" + str(diff))
def get_data(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False) return x_test, t_test
# coding: utf-8 from common.mnist import load_mnist from deep_convnet import DeepConvNet from trainer import Trainer (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) network = DeepConvNet() trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=20, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000) trainer.train() # 保存参数 network.save_params("deep_convnet_params.pkl") print("Saved Network Parameters!")
# -*- coding: utf-8 -*- # @Time : 2018-08-02 16:13 # @Author : Jayce Wong # @ProjectName : Deep_Learning_From_Scratch # @FileName : mnist_show.py # @Blog : http://blog.51cto.com/jayce1111 # @Github : https://github.com/SysuJayce import numpy as np from PIL import Image from common.mnist import load_mnist def img_show(img_): pil_img = Image.fromarray(np.uint8(img_)) pil_img.show() (train_X, train_y), (test_X, test_y) = load_mnist(normalize=False, flatten=True, one_hot_label=False) img = train_X[0] label = train_y[0] print(label) print(img.shape) img = img.reshape(28, 28) print(img.shape) img_show(img)
from common.mnist import load_mnist from common.multi_layer_net import MultiLayerNet from optimizer import * import numpy as np (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) x_train = x_train[:300] t_train = t_train[:300] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads)
def main(): (train_x, train_label), _ = load_mnist() train_size = train_x.shape[0] batch_size = 128 max_iterations = 2000 # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本 batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) # 使用SGD优化器 optimizer = SGD(lr=0.01) # 将要对比的权重初始化方式: 0.01, Xavier, He 三种 weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} # 为每个优化器生成一个五层全连接神经网络 networks = {} train_loss_list = {} for key, weight_init_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_init_type) train_loss_list[key] = [] # 每个优化器都记录在训练集上的损失值 left = 0 for i in range(max_iterations): # 获取一个batch batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) # 计算梯度,然后用不同优化器去更新参数 # 记录每次更新后的损失值 for key in weight_init_types.keys(): grads = networks[key].gradient(batch_x, batch_label) optimizer.update(networks[key].params, grads) loss = networks[key].loss(batch_x, batch_label) train_loss_list[key].append(loss) # 每迭代100次就输出一次当前各优化器的损失值 if i % 100 == 0: print("=" * 15 + "iteration: " + str(i) + "=" * 15) for key in weight_init_types.keys(): loss = train_loss_list[key][-1] print(key + ": " + str(loss)) # 绘制损失值随迭代次数变化图 markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(max_iterations) for key in weight_init_types.keys(): plt.plot(x, smooth_curve(train_loss_list[key]), marker=markers[key], markevery=100, label=key) plt.xlabel("iterations") plt.ylabel("loss") plt.ylim(0, 2.5) plt.legend() plt.show()
def main(): (train_x, train_label), _ = load_mnist() train_size = train_x.shape[0] batch_size = 128 max_iterations = 2000 # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本 batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) # 将要对比的优化器 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() optimizers['RMSProp'] = RMSProp() # 为每个优化器生成一个五层全连接神经网络 networks = {} train_loss_list = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss_list[key] = [] # 每个优化器都记录在训练集上的损失值 left = 0 for i in range(max_iterations): # 获取一个batch batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) # 计算梯度,然后用不同优化器去更新参数 # 记录每次更新后的损失值 for key in optimizers.keys(): grads = networks[key].gradient(batch_x, batch_label) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(batch_x, batch_label) train_loss_list[key].append(loss) # 每迭代100次就输出一次当前各优化器的损失值 if i % 100 == 0: print("=" * 15 + "iteration: " + str(i) + "=" * 15) for key in optimizers.keys(): loss = train_loss_list[key][-1] print(key + ": " + str(loss)) # 绘制损失值随迭代次数变化图 markers = { 'SGD': 'o', 'Momentum': 'x', 'AdaGrad': 's', 'Adam': 'D', 'RMSProp': 'v' } x = np.arange(max_iterations) for key in optimizers.keys(): plt.plot(x, smooth_curve(train_loss_list[key]), marker=markers[key], markevery=100, label=key) plt.xlabel('iterations') plt.ylabel('loss') plt.ylim(0, 1) plt.legend() plt.show()