예제 #1
0
def main():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      one_hot_label=True)
    x_train = x_train[:300]
    t_train = t_train[:300]

    max_epochs = 201
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.01
    # weight_decay_lambda = 0
    weight_decay_lambda = 0.1

    train_acc_list = []
    test_acc_list = []

    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10,
                            weight_decay_lambda=weight_decay_lambda)
    optimizer = SGD(lr=learning_rate)

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)

            print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \
                    ", test acc:" + str(test_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    markers = {'train': 'o', 'test': 's'}
    x = np.arange(max_epochs)
    plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
    plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.show()
예제 #2
0
def __train(lr, weight_decay, epocs = 50):
    network = MultiLayerNet(input_size = 784, hidden_size_list = [100, 100, 100, 100, 100, 100], output_size = 10, weight_decay_lambda= weight_decay)
    trainer = Trainer(network, x_train, t_train, x_val, t_val, epochs = epocs, mini_batch_size=100, optimizer="sgd", optimizer_param={"lr":lr}, verbose=False)

    trainer.train()

    return trainer.test_acc_list, trainer.train_acc_list
def train(train_x, train_label, test_x, test_label, learning_rate, max_epoch,
          batch_size):
    # weight decay(权值衰减——L2正则项强度)的设定 =======================
    weight_decay_lambda = 0  # 不使用权值衰减的情况
    # weight_decay_lambda = 0.1

    # 构造神经网络
    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10,
                            weight_decay_lambda=weight_decay_lambda)

    optimizer = SGD(learning_rate)

    train_acc_list = []
    test_acc_list = []

    train_size = train_x.shape[0]
    iter_per_epoch = max(train_size / batch_size, 1)

    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    epoch_cnt = 0
    left = 0
    iteration = int(iter_per_epoch * max_epoch)

    for i in range(iteration):
        # 获取一个batch的数据,更新left值
        batch_x, batch_label, left = get_batch(train_x, train_label,
                                               batch_mask, batch_size, left)

        grads = network.gradient(batch_x, batch_label)
        optimizer.update(network.params, grads)

        # 每一个epoch记录一个在测试集上的准确率
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(train_x, train_label)
            test_acc = network.accuracy(test_x, test_label)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)

            print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) +
                  ", test acc:" + str(test_acc))
            epoch_cnt += 1

    return train_acc_list, test_acc_list
def train(train_x, train_label, val_x, val_label, lr, weight_decay, epochs=50):
    # 按照给定的超参数进行训练一个神经网络,并返回在验证集和训练集上的准确率
    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10, weight_decay_lambda=weight_decay)
    trainer = Trainer(network, train_x, train_label, val_x, val_label,
                      epochs=epochs, mini_batch_size=100, optimizer='SGD',
                      optimizer_param={'lr': lr}, verbose=True)
    trainer.train()

    return trainer.test_acc_list, trainer.train_acc_list
def main():
    #0:MNISTデータの読み込み
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

    train_size = x_train.shape[0]
    batch_size = 128
    max_iteraations = 2000

    # 1:実験の設定
    optimizers = {}
    optimizers['SGD'] = SGD()
    optimizers['Momentum'] = Momentum()
    optimizers['AdaGrad'] = AdaGrad()
    optimizers['Adam'] = Adam()
    #optimizers['RMSprop'] = RMSprop()

    networks = {}
    train_loss = {}
    for key in optimizers.keys():
        networks[key] = MultiLayerNet(
                input_size=784, hidden_size_list=[100, 100, 100, 100],
                output_size=10)
        train_loss[key] = []

    # 2:訓練の開始
    for i in range(max_iteraations):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for key in optimizers.keys():
            grads = networks[key].gradient(x_batch, t_batch)
            optimizers[key].update(networks[key].params, grads)

            loss = networks[key].loss(x_batch, t_batch)
            train_loss[key].append(loss)

        if i % 100 == 0:
            print ("==========" + "iteration:" + str(i) + "==========")
            for key in optimizers.keys():
                loss = networks[key].loss(x_batch, t_batch)
                print (key + ":" + str(loss))

    # 3.グラフの描画
    markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"}
    x = np.arange(max_iteraations)
    for key in optimizers.keys():
        plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
    plt.xlabel=("iterations")
    plt.ylabel=("loss")
    plt.ylim(0, 1)
    plt.legend()
    plt.show()
예제 #6
0
def main():
    x_train, t_train, x_test, x_test = get_data()

    # 実験の設定
    train_size = x_train.shape[0]
    batch_size = 128
    max_iterations = 2000

    weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
    optimizer = SGD(lr=0.01)

    networks = {}
    train_loss = {}
    for key, weight_type in weight_init_types.items():
        networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
                                      output_size=10, weight_init_std=weight_type)
        train_loss[key] = []

    # 訓練の開始
    for i in range(max_iterations):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 実験のパラメータごとに実行
        for key in weight_init_types.keys():
            # 勾配を計算
            grads = networks[key].gradient(x_batch, t_batch)
            # パラメータを更新
            optimizer.update(networks[key].params, grads)

            # 更新されたパラメータで改めて損失を算出
            loss = networks[key].loss(x_batch, t_batch)
            train_loss[key].append(loss)

        # terminal check
        if i % 100 == 0:
            print(f'=========iteration:{str(i)}===========')
            for key in weight_init_types.keys():
                loss = networks[key].loss(x_batch, t_batch)
                print(f'{key}:{str(loss)}')

    # グラフの描画
    fig = plt.figure()
    markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
    x = np.arange(max_iterations)
    for key in weight_init_types.keys():
        plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
    plt.xlabel('iterations')
    plt.ylabel('loss')
    plt.ylim(0, 2.5)
    plt.legend()
    fig.savefig('../images/weight_init_compare.png')
예제 #7
0
def __train(lr, weight_decay, epocs = 50):
    #네트워크 생성
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size= 10, weight_decay_lambda=weight_decay)

    #훈련 초기화
    trainer = Trainer(network,x_train,t_train,x_test,t_test,epocs,mini_batch_size=100,
                      optimizer='adam', optimizer_param={'lr':lr},verbose=False)

    #훈련 시작
    trainer.train()

    return trainer.test_acc_list, trainer.train_acc_list
예제 #8
0
def __train(epocs=50):
    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10,
                            weight_decay_lambda=1.865405500969014e-05)
    trainer = Trainer(network,
                      x_train,
                      t_train,
                      x_test,
                      t_test,
                      epochs=epocs,
                      mini_batch_size=100,
                      optimizer='AdaGrad',
                      optimizer_param={'lr': 0.002737364082615975})
    trainer.train()

    return trainer.test_acc_list, trainer.train_acc_list
def __train(lr, weight_decay, epoches=50):

    net = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10,
                        weight_decay_lambda=weight_decay)
    trainer = Trainer(net,
                      x_train,
                      t_train,
                      x_valuation,
                      t_valuation,
                      epoches=epoches,
                      mini_batch_size=100,
                      optimizer='SGD',
                      optimizer_param={'lr': lr},
                      verbose=False)
    trainer.train()
    return trainer.train_acc_list, trainer.test_acc_list
예제 #10
0
def __train(weight_init_std):
    bn_network = MultiLayerNet(input_size=784,
                               hidden_size_list=[100, 100, 100, 100, 100],
                               output_size=10,
                               weight_init_std=weight_init_std,
                               use_batchnorm=True)
    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100],
                            output_size=10,
                            weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " +
                  str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return train_acc_list, bn_train_acc_list
예제 #11
0
batch_size = 128
max_iterations = 2000

# 1:実験の設定==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(input_size=10,
                                  hidden_size_list=[50, 50, 50, 50],
                                  output_size=10)
    train_loss[key] = []

# 2:訓練の開始==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in optimizers.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizers[key].update(networks[key].params, grads)

        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
예제 #12
0
(x_train, t_train), (x_test, t_test) = load_mnist(dataset_dir,
                                                  save_file,
                                                  normalize=True)

# Over Fitting을 위해 적은 데이터만 사용
x_train = x_train[:300]
t_train = t_train[:300]

# Set Using Drop-out & Ratio
use_dropout = False
dropout_ratio = 0.2
# ====================================================

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10,
                        use_dropout=use_dropout,
                        dropout_ration=dropout_ratio)
trainer = Trainer(network,
                  x_train,
                  t_train,
                  x_test,
                  t_test,
                  epochs=301,
                  mini_batch_size=100,
                  optimizer='sgd',
                  optimizer_param={'lr': 0.01},
                  verbose=True)
trainer.train()

train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
예제 #13
0
from common.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from optimizer import *
import numpy as np

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
x_train = x_train[:300]
t_train = t_train[:300]

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10)
optimizer = SGD(lr=0.01)
max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grads = network.gradient(x_batch, t_batch)
    optimizer.update(network.params, grads)
# 0:MNISTデータの読み込み==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

# 1:実験の設定==========
weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
optimizer = SGD(lr=0.01)

networks = {}
train_loss = {}
for key, weight_type in weight_init_types.items():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_type)
    train_loss[key] = []

# 2:訓練の開始==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in weight_init_types.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizer.update(networks[key].params, grads)

        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
예제 #15
0
# 0:MNISTデータの読み込み==========
(x_train, t_train), (x_test, t_test) = load_liner(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

# 1:実験の設定==========
weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
optimizer = SGD(lr=0.01)

networks = {}
train_loss = {}
for key, weight_type in weight_init_types.items():
    networks[key] = MultiLayerNet(input_size=10,
                                  hidden_size_list=[50, 50, 50, 50],
                                  output_size=10,
                                  weight_init_std=weight_type)
    train_loss[key] = []

# 2:訓練の開始==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in weight_init_types.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizer.update(networks[key].params, grads)

        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
예제 #16
0
x_train = x_train.astype(np.float32)
x_train = x_train / 255.0
x_test = x_test.astype(np.float32)
x_test = x_test / 255.0

y_train = preprocessing.LabelBinarizer().fit_transform(y_train_label)
y_test = preprocessing.LabelBinarizer().fit_transform(y_test_label)

from common.multi_layer_net import MultiLayerNet
from common.optimizer import *

weight_decay_lambda = 0
network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100],
                        output_size=10,
                        activation="sigmoid",
                        weight_decay_lambda=weight_decay_lambda)
optimizer = Adam()

max_epochs = 30
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0
def main():
    (train_x, train_label), _ = load_mnist()
    train_size = train_x.shape[0]
    batch_size = 128
    max_iterations = 2000

    # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本
    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    # 将要对比的优化器
    optimizers = {}
    optimizers['SGD'] = SGD()
    optimizers['Momentum'] = Momentum()
    optimizers['AdaGrad'] = AdaGrad()
    optimizers['Adam'] = Adam()
    optimizers['RMSProp'] = RMSProp()

    # 为每个优化器生成一个五层全连接神经网络
    networks = {}
    train_loss_list = {}
    for key in optimizers.keys():
        networks[key] = MultiLayerNet(input_size=784,
                                      hidden_size_list=[100, 100, 100, 100],
                                      output_size=10)
        train_loss_list[key] = []  # 每个优化器都记录在训练集上的损失值

    left = 0
    for i in range(max_iterations):
        # 获取一个batch
        batch_x, batch_label, left = get_batch(train_x, train_label,
                                               batch_mask, batch_size, left)

        # 计算梯度,然后用不同优化器去更新参数
        # 记录每次更新后的损失值
        for key in optimizers.keys():
            grads = networks[key].gradient(batch_x, batch_label)
            optimizers[key].update(networks[key].params, grads)

            loss = networks[key].loss(batch_x, batch_label)
            train_loss_list[key].append(loss)

        # 每迭代100次就输出一次当前各优化器的损失值
        if i % 100 == 0:
            print("=" * 15 + "iteration: " + str(i) + "=" * 15)
            for key in optimizers.keys():
                loss = train_loss_list[key][-1]
                print(key + ": " + str(loss))

    # 绘制损失值随迭代次数变化图
    markers = {
        'SGD': 'o',
        'Momentum': 'x',
        'AdaGrad': 's',
        'Adam': 'D',
        'RMSProp': 'v'
    }
    x = np.arange(max_iterations)
    for key in optimizers.keys():
        plt.plot(x,
                 smooth_curve(train_loss_list[key]),
                 marker=markers[key],
                 markevery=100,
                 label=key)

    plt.xlabel('iterations')
    plt.ylabel('loss')
    plt.ylim(0, 1)
    plt.legend()
    plt.show()
예제 #18
0
def main():
    # MNISTデータの読み込み
    x_train, t_train, x_test, t_test = get_data()

    # 実験の設定
    train_size = x_train.shape[0]
    batch_size = 128
    max_iterations = 2000

    optimizers = {}
    optimizers['SGD'] = SGD()
    optimizers['Momentum'] = Momentum()
    optimizers['AdaGrad'] = AdaGrad()
    optimizers['Adam'] = Adam()
    optimizers['RMSprop'] = RMSprop()

    networks = {}
    train_loss = {}
    for key in optimizers.keys():
        networks[key] = MultiLayerNet(input_size=784,
                                      hidden_size_list=[100, 100, 100, 100],
                                      output_size=10)
        train_loss[key] = []

    # 訓練の開始
    for i in range(max_iterations):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for key in optimizers.keys():
            grads = networks[key].gradient(x_batch, t_batch)
            optimizers[key].update(networks[key].params,
                                   grads)  # 現在のパラメータとその勾配を渡す

            loss = networks[key].loss(x_batch, t_batch)
            train_loss[key].append(loss)

        # terminal check
        if i % 100 == 0:
            print(f'=========iteration:{str(i)}===========')
            for key in optimizers.keys():
                loss = networks[key].loss(x_batch, t_batch)
                print(f'{key}:{str(loss)}')

    # グラフの描画
    fig = plt.figure()
    markers = {
        'SGD': 'o',
        'Momentum': 'x',
        'AdaGrad': 's',
        'Adam': 'D',
        'RMSprop': '+'
    }
    x = np.arange(max_iterations)
    for key in optimizers.keys():
        plt.plot(x,
                 smooth_curve(train_loss[key]),
                 marker=markers[key],
                 markevery=100,
                 label=key)
    plt.xlabel('iterations')
    plt.ylabel('loss')
    plt.ylim(0, 1)
    plt.legend()
    fig.savefig('../images/optimizer_compare.png')
예제 #19
0
과적합(overfitting): 모델이 학습 데이터는 정확하게 예측을 하지만 학습되지 않은 데이터에 대해서는 정확도가 떨어지는 현상
- 과적합 발생 경우
    1) 학습 데이터가 적은 경우
    2) 파라미터가 너무 많아서 표현력(representational power)이 너무 높은 모델
- 과적합이 되지 않도록 학습하는 방법
    1) regularization: L1, L2-regularization(정칙화, 규제)
    2) dropout
"""
from common.multi_layer_net import MultiLayerNet
from dataset.mnist import load_mnist

(X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True)

neural_net = MultiLayerNet(
    input_size=784,
    hidden_size_list=[100, 100, 100, 100, 100],
    output_size=10,
    weight_decay_lambda=0)  # weight_decay_lambda=0 : 가중치 감소를 사용하지 않음

# overfitting을 만들기 위해 데이터의 수를 줄임
X_train = X_train[:300]
Y_train = Y_train[:300]

epochs = 200  # 1epoch : 모든 학습 데이터가 1번씩 학습된 경우
mini_batch_size = 100  # 1번 forward에 보낼 데이터 샘플 개수

# 학습하면서 테스트 데이터의 정확도를 각 에포크마다 기록
train_accuracies = []
test_accuracies = []

optimizer = Sgd(learning_rate=0.01)  # optimizer
batch_size = 128
max_iterations = 2000

# 1:进行实验的设置==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(
        input_size=784, hidden_size_list=[100, 100, 100, 100],
        output_size=10)  #生成4个5层网络 4个隐藏层各有100个神经元 激活函数使用Relu
    train_loss[key] = []

# 2:开始训练==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in optimizers.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizers[key].update(networks[key].params, grads)

        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
예제 #21
0
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet

dataset_dir = os.path.dirname(os.path.abspath('__file__'))
save_file = dataset_dir + "/mnist.pkl"
(x_train, t_train), (x_test, t_test) = load_mnist(dataset_dir,
                                                  save_file,
                                                  normalize=True,
                                                  one_hot_label=True)

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100],
                        output_size=10,
                        use_batchnorm=True)

x_batch = x_train[:1]
t_batch = t_train[:1]

grad_backprop = network.gradient(x_batch, t_batch)
grad_numerical = network.numerical_gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))
예제 #22
0
def main():
    (train_x, train_label), _ = load_mnist()
    train_size = train_x.shape[0]
    batch_size = 128
    max_iterations = 2000

    # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本
    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    # 使用SGD优化器
    optimizer = SGD(lr=0.01)

    # 将要对比的权重初始化方式: 0.01, Xavier, He 三种
    weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}

    # 为每个优化器生成一个五层全连接神经网络
    networks = {}
    train_loss_list = {}
    for key, weight_init_type in weight_init_types.items():
        networks[key] = MultiLayerNet(input_size=784,
                                      hidden_size_list=[100, 100, 100, 100],
                                      output_size=10,
                                      weight_init_std=weight_init_type)
        train_loss_list[key] = []  # 每个优化器都记录在训练集上的损失值

    left = 0
    for i in range(max_iterations):
        # 获取一个batch
        batch_x, batch_label, left = get_batch(train_x, train_label,
                                               batch_mask, batch_size, left)

        # 计算梯度,然后用不同优化器去更新参数
        # 记录每次更新后的损失值
        for key in weight_init_types.keys():
            grads = networks[key].gradient(batch_x, batch_label)
            optimizer.update(networks[key].params, grads)

            loss = networks[key].loss(batch_x, batch_label)
            train_loss_list[key].append(loss)

        # 每迭代100次就输出一次当前各优化器的损失值
        if i % 100 == 0:
            print("=" * 15 + "iteration: " + str(i) + "=" * 15)
            for key in weight_init_types.keys():
                loss = train_loss_list[key][-1]
                print(key + ": " + str(loss))

    # 绘制损失值随迭代次数变化图
    markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
    x = np.arange(max_iterations)
    for key in weight_init_types.keys():
        plt.plot(x,
                 smooth_curve(train_loss_list[key]),
                 marker=markers[key],
                 markevery=100,
                 label=key)
    plt.xlabel("iterations")
    plt.ylabel("loss")
    plt.ylim(0, 2.5)
    plt.legend()
    plt.show()
예제 #23
0
iter_num = 2000
batch_size = 128
loss = {}
input_size = 784
hidden_size_list = [100, 100, 100, 100]
output_size = 10

# 1:実験の設定==========
weight_type = {"std": 0.01, "Xavier": "sigmoid", "He": "relu"}
optimizer = SGD(lr=0.01)

# ネットワークの定義
network = {}
for key, weight in weight_type.items():
    network[key] = MultiLayerNet(input_size,
                                 hidden_size_list,
                                 output_size,
                                 weight_init_std=weight_type[key])

    loss[key] = []

# 2:訓練の開始==========
for i in range(iter_num):
    # 60000サンプルから128サンプルをランダムに選ぶ
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    #
    for key in weight_type.keys():
        # 勾配を求めて重みを更新する
        grads = network[key].gradient(x_batch, t_batch)
예제 #24
0
# 실험 조건 세팅
from dataset.mnist import load_mnist

weight_init_types = {
    'std=0.01': 0.01,
    'Xavier': 'sigmoid',  # 가중치 초깃값: N(0, sqrt(1/n))
    'He': 'relu'  # 가중치 초깃값: N(0, sqrt(2/n))
}

# 각 실험 조건 별로 테스트할 신경망을 생성
neural_nets = dict()
train_losses = dict()
for key, type in weight_init_types.items():
    neural_nets[key] = MultiLayerNet(input_size=784,
                                     hidden_size_list=[100, 100, 100, 100],
                                     output_size=10,
                                     weight_init_std=type)
    train_losses[key] = []  # 빈 리스트 생성 - 실험(학습)하면서 손실값들을 저장

# MNIST train/test 데이터 로드
(X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True)

iterations = 2_000  # 학습 회수
batch_size = 128  # 1번 학습에 사용할 샘플 개수(미니 배치)
optimizer = Sgd(learning_rate=0.01)  # 파라미터 최적화 알고리즘
# optimizer를 변경하면 테스트
optimizer = Adam()

np.random.seed(109)
for i in range(iterations):  # 2,000번 반복하면서
    # 미니 배치 샘플 랜덤 추출
from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 過学習を再現するために、学習データを削減
x_train = x_train[:300]
t_train = t_train[:300]

# weight decay(荷重減衰)の設定 =======================
#weight_decay_lambda = 0 # weight decayを使用しない場合
weight_decay_lambda = 0.1
# ====================================================

network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10,
                        weight_decay_lambda=weight_decay_lambda)
optimizer = SGD(lr=0.01)

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
예제 #26
0
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 为了再现过拟合,减少学习数据
x_train = x_train[:300]
t_train = t_train[:300]

# weight decay (权值衰减) 的设定
# weight_decay_lamda = 0
# 不使用权值衰减的情况
weight_decay_lamda = 0.1

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10,
                        weight_decay_lambda=weight_decay_lamda)

optimizer = SGD(lr=0.01)

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0
예제 #27
0
        for i in range(1, len(self.hidden_size_list) + 2):
            grads['W%d' % i] = self.layers[
                'Affine%d' %
                i].dW + self.weight_decay_lambda * self.layers['Affine%d' %
                                                               i].W
            grads['b%d' % i] = self.layers['Affine%d' % i].db
        return grads


if __name__ == '__main__':
    from dataset.mnist import load_mnist
    from common.optimizer import SGD, Momentum, AdaGrad, Adam
    from common.multi_layer_net import MultiLayerNet
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
    multi_layer_net = MultiLayerNet(784,
                                    output_size=10,
                                    hidden_size_list=[100, 100, 100, 100])
    mul_layer_net = MulLayerNet(784,
                                output_size=10,
                                hidden_size_list=[100, 100, 100, 100])

    max_iterations = 2000
    train_size = x_train.shape[0]
    batch_size = 128
    optimizer_mul = AdaGrad()
    optimizer_multi = AdaGrad()

    for i in range(max_iterations):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]
예제 #28
0
batch_size = 128
max_iterations = 2000

# 1. 실험용 설정
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10)
    train_loss[key] = []

# 2. 훈련 시작
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in optimizers.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizers[key].update(networks[key].params, grads)

        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
예제 #29
0
x_data_train=x[40:,:] 
t_data_test=t[:40,:]
t_data_train=t[40:,:] 

print("x_data_train.shape=",x_data_train.shape,
"x_data_test.shape=",x_data_test.shape, 
"t_data_train.shape=",t_data_train.shape,
"t_data_test.shape=",t_data_test.shape)


"""
ニューラルネットワークの環境構築
"""

weight_decay_lambda = 0.1
network = MultiLayerNet(input_size=day_ago+4, hidden_size_list=[100], output_size=len(t[0]),
                        weight_decay_lambda=weight_decay_lambda)
optimizer = SGD(lr=0.01)





#パラメータ
iters_num= 4000 #勾配法の更新の回数 多いほどいい?
train_size=len(t_data_train)  #入力データの行数 サンプル数 testデータと分けたこと忘れずに
batch_size =300      #これがバッチの数#分からんから
learning_rate=0.01

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]