コード例 #1
0
def main():
    (train_x, train_label), (test_x, test_label) = load_mnist()
    # 为了再现过拟合,减少学习数据
    train_x = train_x[: 300]
    train_label = train_label[: 300]

    # 设定是否使用Dropuout,以及比例 ========================
    use_dropout = False
    # use_dropout = True
    dropout_ratio = 0.2

    network = MultiLayerNetExtend(
        input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
        output_size=10, use_dropout=use_dropout, dropout_ratio=dropout_ratio)

    trainer = Trainer(network, train_x, train_label, test_x, test_label,
                      epochs=301, mini_batch_size=100, optimizer='sgd',
                      optimizer_param={'lr': 0.01}, verbose=True)

    trainer.train()

    train_acc_list = trainer.train_acc_list
    test_acc_list = trainer.test_acc_list

    draw(train_acc_list, test_acc_list)
コード例 #2
0
def main():
    # 获取MNIST数据
    (train_x, train_label), (test_x, test_label) = load_mnist(flatten=False)

    # 构造深层CNN
    network = DeepConvNet()

    # 生成一个训练器
    trainer = Trainer(network,
                      train_x,
                      train_label,
                      test_x,
                      test_label,
                      epochs=20,
                      mini_batch_size=100,
                      optimizer='Adam',
                      optimizer_param={'lr': 0.001},
                      evaluate_sample_num_per_epoch=1000,
                      verbose=True)

    trainer.train()  # 训练上面构造好的神经网络

    network.save_params()  # 训练完成后持久化参数
    print("Saved Network Parameters!")

    # 获取训练过程中训练集和测试集的准确率
    train_acc_list = trainer.train_acc_list
    test_acc_list = trainer.test_acc_list

    draw(train_acc_list, test_acc_list)  # 绘制准确率变化图
コード例 #3
0
def get_test_data():
    """
    获取测试集数据
    :return:
    """
    _, (test_X, test_y) = load_mnist(normalize=True,
                                     flatten=True,
                                     one_hot_label=False)
    return test_X, test_y
コード例 #4
0
def train():
    (train_x, train_label), (test_x, test_label) =\
        load_mnist(normalize=True, one_hot_label=True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
    iterations = 10000
    """
    为了实现每次随机选取一个batch_size的样本来训练,一般有2种做法
    1. 先打乱训练集的样本顺序,然后按顺序每次取batch_size个样本
    2. 不改变训练集的样本顺序,每次随机选取batch_size个样本。
    
    但是2的做法极有可能一个epoch不能遍历所有样本,因此我就想到先声明一个
    下标列表,然后打乱顺序,再按顺序访问即可
    """
    batch_mask = np.arange(train_x.shape[0])
    np.random.shuffle(batch_mask)

    batch_size = 100
    learning_rate = 0.1

    train_loss_list_ = []
    train_acc_list_ = []
    test_acc_list_ = []

    left = 0  # 按顺序取样本的时候的一个游标
    epoch_num = 0
    # 没有严格训练iteration次,而是改成了以在iteration次迭代中最大的epoch数为准
    # 这样就保证所有样本被训练的次数是一样的(一个epoch就是全部样本遍历一次)
    for i in range(int(iterations / batch_size) * batch_size):
        batch_x = train_x[batch_mask[left:left + batch_size]]
        batch_label = train_label[batch_mask[left:left + batch_size]]

        # 使用数值方法计算梯度的效率太低,跑十几个小时也才跑了1000多个iteration
        # grad = network.numerical_gradient(batch_x, batch_label)
        # 使用BP算法来计算梯度效率高太多,几十秒就能跑完10000个iteration
        grad = network.gradient(batch_x, batch_label)

        # 更新神经网络参数
        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]

        loss = network.loss(batch_x, batch_label)
        train_loss_list_.append(loss)

        left += batch_size  # 在一次迭代结束时更新游标

        # 如果完成一个epoch,就计算一次准确率
        if left >= train_x.shape[0]:
            left = 0
            epoch_num += 1
            train_acc = network.accuracy(train_x, train_label)
            test_acc = network.accuracy(test_x, test_label)
            train_acc_list_.append(train_acc)
            test_acc_list_.append(test_acc)
            print("No.%d epoch:" % epoch_num)
            print("train acc: %f\ttest acc: %f" % (train_acc, test_acc))

    return train_acc_list_, test_acc_list_, train_loss_list_
コード例 #5
0
def test():
    # 获取MNIST数据
    _, (test_x, test_label) = load_mnist(flatten=False)

    network = DeepConvNet()
    network.load_params()

    test_acc = network.accuracy(test_x, test_label)
    print("test acc:", test_acc)
コード例 #6
0
def main():
    """
    和chapter4的train_neural_network一样,都是利用two_layer_net中的TwoLayer类
    构造神经网络,然后对MNIST的数据集进行预测并计算准确率

    但是比chapter4中的实现要方便巧妙得多,因为将每一层设计成一个类,然后在构造
    神经网络的时候就不用管层里面的具体细节,方便构造也方便求梯度
    :return:
    """
    (train_x, train_y), (test_x, test_y) = load_mnist(one_hot_label=True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    iterations = 100000
    train_size = train_x.shape[0]
    batch_size = 100
    learning_rate = 0.1

    train_loss_list = []
    train_acc_list = []
    test_acc_list = []

    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    left = 0
    epoch_num = 0
    for i in range(int(iterations / batch_size) * batch_size):
        batch_x = train_x[batch_mask[left:left + batch_size]]
        batch_y = train_y[batch_mask[left:left + batch_size]]

        grad = network.gradient(batch_x, batch_y)

        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]

        loss = network.loss(batch_x, batch_y)
        train_loss_list.append(loss)

        left += batch_size
        if left >= train_size:
            left = 0
            epoch_num += 1
            train_acc = network.accuracy(train_x, train_y)
            test_acc = network.accuracy(test_x, test_y)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print("No.%d epoch:" % epoch_num)
            print("train acc: %f\ttest acc: %f" % (train_acc, test_acc))

    return train_acc_list, test_acc_list, train_loss_list
コード例 #7
0
def main():
    _, (test_x, test_label) = load_mnist(flatten=False)
    # sampled = 1000
    # test_x = test_x[:sampled]
    # test_label = test_label[:sampled]

    network = DeepConvNet()  # 构造神经网络
    network.load_params()  # 加载已经训练好的参数

    # 对测试集进行预测,获得每个测试样例的预测结果
    classified_ids = get_predict_result(network, test_x, test_label)
    classified_ids = np.array(classified_ids).flatten()

    # 输出预测错误的图像
    draw(classified_ids, test_x, test_label)
コード例 #8
0
def main():
    (train_x, train_label), (test_x, test_label) = load_mnist()
    # 为了再现过拟合,减少学习数据
    train_x = train_x[:300]
    train_label = train_label[:300]

    max_epoch = 201
    batch_size = 100
    learning_rate = 0.01

    x = np.arange(max_epoch)  # 画图时的x轴

    train_acc_list, bn_train_acc_list = train(train_x, train_label, test_x,
                                              test_label, learning_rate,
                                              max_epoch, batch_size)
    draw(train_acc_list, bn_train_acc_list, x)
コード例 #9
0
def main():
    _, (test_x, test_label) = load_mnist(flatten=False)

    sampled = 1000  # 为了实现高速化,减小样本规模
    test_x = test_x[:sampled]
    test_label = test_label[:sampled]

    network = DeepConvNet()  # 构造神经网络
    network.load_params()  # 加载已经训练好的参数

    print("caluculate accuracy (float64) ... ")
    print(network.accuracy(test_x, test_label))

    # 转换为float16型
    test_x = test_x.astype(np.float16)
    for key, val in network.params.items():
        network.params[key] = val.astype(np.float16)

    print("caluculate accuracy (float16) ... ")
    print(network.accuracy(test_x, test_label))
コード例 #10
0
def main():
    """
    对比数值方法求梯度和反向传播方法求梯度的准确性。
    由于数值方法计算速度慢,但是设计简单,编程不易出错,因此在实践中一般用来检验
    反向传播求梯度的代码有没写错
    :return:
    """
    (train_x, train_y), _ = load_mnist(one_hot_label=True)
    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    x_batch = train_x[:3]
    y_batch = train_y[:3]

    grad_numerical = network.numerical_gradient(x_batch, y_batch)
    grad_backprop = network.gradient(x_batch, y_batch)

    for key in grad_numerical.keys():
        # 求两种梯度计算方法的差的绝对值的平均
        # 由于计算机计算时存在精度的问题,因此只要diff较小就可以认为结果一致
        diff = np.average(np.abs(grad_numerical[key] - grad_backprop[key]))
        print(key + ": " + str(diff))
コード例 #11
0
def main():
    (train_x, train_label), _ = load_mnist()
    train_x = train_x[: 1000]
    train_label = train_label[: 1000]

    max_epoch = 20
    batch_size = 100
    learning_rate = 0.01

    weight_scale_list = np.logspace(0, -4, num=16)  # 测试16种初始化方式
    x = np.arange(max_epoch)  # 画图时的x轴

    for i, w in enumerate(weight_scale_list):
        print("============== " + str(i + 1) + "/16" + " ==============")
        train_acc_list, bn_train_acc_list = train(train_x, train_label, w,
                                                  learning_rate, max_epoch,
                                                  batch_size)
        draw(train_acc_list, bn_train_acc_list, i, w, x)

    plt.tight_layout()
    plt.show()
コード例 #12
0
def main():
    # 读入数据
    (train_x, train_label), _ = load_mnist(one_hot_label=True)

    # 构造神经网络
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100],
                                  output_size=10,
                                  use_batchnorm=True)

    # 仅用一个训练样本来测试
    batch_x = train_x[:1]
    batch_label = train_label[:1]

    # 用反向传播和数值方法分别计算梯度
    grad_backprop = network.gradient(batch_x, batch_label)
    grad_numerical = network.numerical_gradient(batch_x, batch_label)

    # 比较两种方法的计算结果
    for key in grad_numerical.keys():
        diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
        print(key + ":" + str(diff))
コード例 #13
0
def main():
    # 获取MNIST数据
    (train_x, train_label), (test_x, test_label) = load_mnist(flatten=False)

    # 构造CNN
    network = SimpleConvNet(input_dim=(1, 28, 28),
                            conv_param={
                                'filter_num': 30,
                                'filter_size': 5,
                                'pad': 0,
                                'stride': 1
                            },
                            hidden_size=100,
                            output_size=10,
                            weight_init_std=0.01)

    # 生成一个训练器
    trainer = Trainer(network,
                      train_x,
                      train_label,
                      test_x,
                      test_label,
                      epochs=20,
                      mini_batch_size=100,
                      optimizer='Adam',
                      optimizer_param={'lr': 0.001},
                      evaluate_sample_num_per_epoch=1000,
                      verbose=True)

    trainer.train()  # 训练上面构造好的神经网络

    network.save_params()  # 训练完成后持久化参数
    print("Saved Network Parameters!")

    # 获取训练过程中训练集和测试集的准确率
    train_acc_list = trainer.train_acc_list
    test_acc_list = trainer.test_acc_list

    draw(train_acc_list, test_acc_list)  # 绘制准确率变化图
def main():
    # 获取MNIST数据集,为了加速测试,只使用训练集前500个样本
    (train_x, train_label), _ = load_mnist()
    train_x = train_x[: 500]
    train_label = train_label[: 500]

    # 从训练集中划分一部分作为验证集
    validation_rate = 0.2
    validation_num = int(train_x.shape[0] * validation_rate)
    # 先打乱训练集再划分
    train_x, train_label = shuffle_dataset(train_x, train_label)
    val_x = train_x[: validation_num]
    val_label = train_label[: validation_num]
    train_x = train_x[validation_num:]
    train_label = train_label[validation_num:]

    # 迭代100次来寻找最优超参数
    optimization_trial = 100
    val_result = {}
    train_result = {}

    for _ in range(optimization_trial):
        # 在指定的搜索范围随机对L2正则化强度和学习率进行采样
        weight_decay = 10 ** np.random.uniform(-8, -4)
        lr = 10 ** np.random.uniform(-6, -2)

        # 利用本次迭代采样得到的两个超参数进行训练,得到验证集和训练集上的准确率
        val_acc_list, train_acc_list = train(train_x, train_label, val_x,
                                             val_label, lr, weight_decay)
        print("val acc: " + str(val_acc_list[-1]) + " | lr: " + str(lr) +
              " | weight decay: " + str(weight_decay))
        # 把本次迭代的结果保存起来,记录所用的超参数以及测试结果
        key = "lr: " + str(lr) + ", weight decay: " + str(weight_decay)
        val_result[key] = val_acc_list
        train_result[key] = train_acc_list

    print("\n========== Hyper-Parameter Optimization Result ===========")
    draw(val_result, train_result)
コード例 #15
0
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.mnist import load_mnist
from two_layer_net import TwoLayerNet

# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                  one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# 각 가중치의 절대 오차의 평균을 구한다.
for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))
コード例 #16
0
def get_data():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      flatten=True,
                                                      one_hot_label=False)
    return x_test, t_test
コード例 #17
0
# coding: utf-8
from common.mnist import load_mnist
from deep_convnet import DeepConvNet
from trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

network = DeepConvNet()
trainer = Trainer(network,
                  x_train,
                  t_train,
                  x_test,
                  t_test,
                  epochs=20,
                  mini_batch_size=100,
                  optimizer='Adam',
                  optimizer_param={'lr': 0.001},
                  evaluate_sample_num_per_epoch=1000)
trainer.train()

# 保存参数
network.save_params("deep_convnet_params.pkl")
print("Saved Network Parameters!")
コード例 #18
0
# -*- coding: utf-8 -*-
# @Time         : 2018-08-02 16:13
# @Author       : Jayce Wong
# @ProjectName  : Deep_Learning_From_Scratch
# @FileName     : mnist_show.py
# @Blog         : http://blog.51cto.com/jayce1111
# @Github       : https://github.com/SysuJayce

import numpy as np
from PIL import Image
from common.mnist import load_mnist


def img_show(img_):
    pil_img = Image.fromarray(np.uint8(img_))
    pil_img.show()


(train_X, train_y), (test_X, test_y) = load_mnist(normalize=False,
                                                  flatten=True,
                                                  one_hot_label=False)

img = train_X[0]
label = train_y[0]
print(label)
print(img.shape)
img = img.reshape(28, 28)
print(img.shape)
img_show(img)
コード例 #19
0
from common.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from optimizer import *
import numpy as np

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
x_train = x_train[:300]
t_train = t_train[:300]

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10)
optimizer = SGD(lr=0.01)
max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grads = network.gradient(x_batch, t_batch)
    optimizer.update(network.params, grads)
コード例 #20
0
def main():
    (train_x, train_label), _ = load_mnist()
    train_size = train_x.shape[0]
    batch_size = 128
    max_iterations = 2000

    # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本
    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    # 使用SGD优化器
    optimizer = SGD(lr=0.01)

    # 将要对比的权重初始化方式: 0.01, Xavier, He 三种
    weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}

    # 为每个优化器生成一个五层全连接神经网络
    networks = {}
    train_loss_list = {}
    for key, weight_init_type in weight_init_types.items():
        networks[key] = MultiLayerNet(input_size=784,
                                      hidden_size_list=[100, 100, 100, 100],
                                      output_size=10,
                                      weight_init_std=weight_init_type)
        train_loss_list[key] = []  # 每个优化器都记录在训练集上的损失值

    left = 0
    for i in range(max_iterations):
        # 获取一个batch
        batch_x, batch_label, left = get_batch(train_x, train_label,
                                               batch_mask, batch_size, left)

        # 计算梯度,然后用不同优化器去更新参数
        # 记录每次更新后的损失值
        for key in weight_init_types.keys():
            grads = networks[key].gradient(batch_x, batch_label)
            optimizer.update(networks[key].params, grads)

            loss = networks[key].loss(batch_x, batch_label)
            train_loss_list[key].append(loss)

        # 每迭代100次就输出一次当前各优化器的损失值
        if i % 100 == 0:
            print("=" * 15 + "iteration: " + str(i) + "=" * 15)
            for key in weight_init_types.keys():
                loss = train_loss_list[key][-1]
                print(key + ": " + str(loss))

    # 绘制损失值随迭代次数变化图
    markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
    x = np.arange(max_iterations)
    for key in weight_init_types.keys():
        plt.plot(x,
                 smooth_curve(train_loss_list[key]),
                 marker=markers[key],
                 markevery=100,
                 label=key)
    plt.xlabel("iterations")
    plt.ylabel("loss")
    plt.ylim(0, 2.5)
    plt.legend()
    plt.show()
def main():
    (train_x, train_label), _ = load_mnist()
    train_size = train_x.shape[0]
    batch_size = 128
    max_iterations = 2000

    # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本
    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    # 将要对比的优化器
    optimizers = {}
    optimizers['SGD'] = SGD()
    optimizers['Momentum'] = Momentum()
    optimizers['AdaGrad'] = AdaGrad()
    optimizers['Adam'] = Adam()
    optimizers['RMSProp'] = RMSProp()

    # 为每个优化器生成一个五层全连接神经网络
    networks = {}
    train_loss_list = {}
    for key in optimizers.keys():
        networks[key] = MultiLayerNet(input_size=784,
                                      hidden_size_list=[100, 100, 100, 100],
                                      output_size=10)
        train_loss_list[key] = []  # 每个优化器都记录在训练集上的损失值

    left = 0
    for i in range(max_iterations):
        # 获取一个batch
        batch_x, batch_label, left = get_batch(train_x, train_label,
                                               batch_mask, batch_size, left)

        # 计算梯度,然后用不同优化器去更新参数
        # 记录每次更新后的损失值
        for key in optimizers.keys():
            grads = networks[key].gradient(batch_x, batch_label)
            optimizers[key].update(networks[key].params, grads)

            loss = networks[key].loss(batch_x, batch_label)
            train_loss_list[key].append(loss)

        # 每迭代100次就输出一次当前各优化器的损失值
        if i % 100 == 0:
            print("=" * 15 + "iteration: " + str(i) + "=" * 15)
            for key in optimizers.keys():
                loss = train_loss_list[key][-1]
                print(key + ": " + str(loss))

    # 绘制损失值随迭代次数变化图
    markers = {
        'SGD': 'o',
        'Momentum': 'x',
        'AdaGrad': 's',
        'Adam': 'D',
        'RMSProp': 'v'
    }
    x = np.arange(max_iterations)
    for key in optimizers.keys():
        plt.plot(x,
                 smooth_curve(train_loss_list[key]),
                 marker=markers[key],
                 markevery=100,
                 label=key)

    plt.xlabel('iterations')
    plt.ylabel('loss')
    plt.ylim(0, 1)
    plt.legend()
    plt.show()