def main(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) x_train = x_train[:300] t_train = t_train[:300] max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 # weight_decay_lambda = 0 weight_decay_lambda = 0.1 train_acc_list = [] test_acc_list = [] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=learning_rate) iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \ ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break markers = {'train': 'o', 'test': 's'} x = np.arange(max_epochs) plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.show()
def __train(lr, weight_decay, epocs = 50): network = MultiLayerNet(input_size = 784, hidden_size_list = [100, 100, 100, 100, 100, 100], output_size = 10, weight_decay_lambda= weight_decay) trainer = Trainer(network, x_train, t_train, x_val, t_val, epochs = epocs, mini_batch_size=100, optimizer="sgd", optimizer_param={"lr":lr}, verbose=False) trainer.train() return trainer.test_acc_list, trainer.train_acc_list
def train(train_x, train_label, test_x, test_label, learning_rate, max_epoch, batch_size): # weight decay(权值衰减——L2正则项强度)的设定 ======================= weight_decay_lambda = 0 # 不使用权值衰减的情况 # weight_decay_lambda = 0.1 # 构造神经网络 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(learning_rate) train_acc_list = [] test_acc_list = [] train_size = train_x.shape[0] iter_per_epoch = max(train_size / batch_size, 1) batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) epoch_cnt = 0 left = 0 iteration = int(iter_per_epoch * max_epoch) for i in range(iteration): # 获取一个batch的数据,更新left值 batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) grads = network.gradient(batch_x, batch_label) optimizer.update(network.params, grads) # 每一个epoch记录一个在测试集上的准确率 if i % iter_per_epoch == 0: train_acc = network.accuracy(train_x, train_label) test_acc = network.accuracy(test_x, test_label) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) epoch_cnt += 1 return train_acc_list, test_acc_list
def train(train_x, train_label, val_x, val_label, lr, weight_decay, epochs=50): # 按照给定的超参数进行训练一个神经网络,并返回在验证集和训练集上的准确率 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) trainer = Trainer(network, train_x, train_label, val_x, val_label, epochs=epochs, mini_batch_size=100, optimizer='SGD', optimizer_param={'lr': lr}, verbose=True) trainer.train() return trainer.test_acc_list, trainer.train_acc_list
def main(): #0:MNISTデータの読み込み (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iteraations = 2000 # 1:実験の設定 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet( input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2:訓練の開始 for i in range(max_iteraations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in optimizers.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss) if i % 100 == 0: print ("==========" + "iteration:" + str(i) + "==========") for key in optimizers.keys(): loss = networks[key].loss(x_batch, t_batch) print (key + ":" + str(loss)) # 3.グラフの描画 markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"} x = np.arange(max_iteraations) for key in optimizers.keys(): plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key) plt.xlabel=("iterations") plt.ylabel=("loss") plt.ylim(0, 1) plt.legend() plt.show()
def main(): x_train, t_train, x_test, x_test = get_data() # 実験の設定 train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 訓練の開始 for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 実験のパラメータごとに実行 for key in weight_init_types.keys(): # 勾配を計算 grads = networks[key].gradient(x_batch, t_batch) # パラメータを更新 optimizer.update(networks[key].params, grads) # 更新されたパラメータで改めて損失を算出 loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss) # terminal check if i % 100 == 0: print(f'=========iteration:{str(i)}===========') for key in weight_init_types.keys(): loss = networks[key].loss(x_batch, t_batch) print(f'{key}:{str(loss)}') # グラフの描画 fig = plt.figure() markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(max_iterations) for key in weight_init_types.keys(): plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key) plt.xlabel('iterations') plt.ylabel('loss') plt.ylim(0, 2.5) plt.legend() fig.savefig('../images/weight_init_compare.png')
def __train(lr, weight_decay, epocs = 50): #네트워크 생성 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size= 10, weight_decay_lambda=weight_decay) #훈련 초기화 trainer = Trainer(network,x_train,t_train,x_test,t_test,epocs,mini_batch_size=100, optimizer='adam', optimizer_param={'lr':lr},verbose=False) #훈련 시작 trainer.train() return trainer.test_acc_list, trainer.train_acc_list
def __train(epocs=50): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=1.865405500969014e-05) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=epocs, mini_batch_size=100, optimizer='AdaGrad', optimizer_param={'lr': 0.002737364082615975}) trainer.train() return trainer.test_acc_list, trainer.train_acc_list
def __train(lr, weight_decay, epoches=50): net = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) trainer = Trainer(net, x_train, t_train, x_valuation, t_valuation, epoches=epoches, mini_batch_size=100, optimizer='SGD', optimizer_param={'lr': lr}, verbose=False) trainer.train() return trainer.train_acc_list, trainer.test_acc_list
def __train(weight_init_std): bn_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
batch_size = 128 max_iterations = 2000 # 1:実験の設定========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=10, hidden_size_list=[50, 50, 50, 50], output_size=10) train_loss[key] = [] # 2:訓練の開始========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in optimizers.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss)
(x_train, t_train), (x_test, t_test) = load_mnist(dataset_dir, save_file, normalize=True) # Over Fitting을 위해 적은 데이터만 사용 x_train = x_train[:300] t_train = t_train[:300] # Set Using Drop-out & Ratio use_dropout = False dropout_ratio = 0.2 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) trainer.train() train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
from common.mnist import load_mnist from common.multi_layer_net import MultiLayerNet from optimizer import * import numpy as np (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) x_train = x_train[:300] t_train = t_train[:300] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads)
# 0:MNISTデータの読み込み========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:実験の設定========== weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 2:訓練の開始========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in weight_init_types.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizer.update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss)
# 0:MNISTデータの読み込み========== (x_train, t_train), (x_test, t_test) = load_liner(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:実験の設定========== weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=10, hidden_size_list=[50, 50, 50, 50], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 2:訓練の開始========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in weight_init_types.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizer.update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss)
x_train = x_train.astype(np.float32) x_train = x_train / 255.0 x_test = x_test.astype(np.float32) x_test = x_test / 255.0 y_train = preprocessing.LabelBinarizer().fit_transform(y_train_label) y_test = preprocessing.LabelBinarizer().fit_transform(y_test_label) from common.multi_layer_net import MultiLayerNet from common.optimizer import * weight_decay_lambda = 0 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100], output_size=10, activation="sigmoid", weight_decay_lambda=weight_decay_lambda) optimizer = Adam() max_epochs = 30 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0
def main(): (train_x, train_label), _ = load_mnist() train_size = train_x.shape[0] batch_size = 128 max_iterations = 2000 # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本 batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) # 将要对比的优化器 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() optimizers['RMSProp'] = RMSProp() # 为每个优化器生成一个五层全连接神经网络 networks = {} train_loss_list = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss_list[key] = [] # 每个优化器都记录在训练集上的损失值 left = 0 for i in range(max_iterations): # 获取一个batch batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) # 计算梯度,然后用不同优化器去更新参数 # 记录每次更新后的损失值 for key in optimizers.keys(): grads = networks[key].gradient(batch_x, batch_label) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(batch_x, batch_label) train_loss_list[key].append(loss) # 每迭代100次就输出一次当前各优化器的损失值 if i % 100 == 0: print("=" * 15 + "iteration: " + str(i) + "=" * 15) for key in optimizers.keys(): loss = train_loss_list[key][-1] print(key + ": " + str(loss)) # 绘制损失值随迭代次数变化图 markers = { 'SGD': 'o', 'Momentum': 'x', 'AdaGrad': 's', 'Adam': 'D', 'RMSProp': 'v' } x = np.arange(max_iterations) for key in optimizers.keys(): plt.plot(x, smooth_curve(train_loss_list[key]), marker=markers[key], markevery=100, label=key) plt.xlabel('iterations') plt.ylabel('loss') plt.ylim(0, 1) plt.legend() plt.show()
def main(): # MNISTデータの読み込み x_train, t_train, x_test, t_test = get_data() # 実験の設定 train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 訓練の開始 for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in optimizers.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizers[key].update(networks[key].params, grads) # 現在のパラメータとその勾配を渡す loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss) # terminal check if i % 100 == 0: print(f'=========iteration:{str(i)}===========') for key in optimizers.keys(): loss = networks[key].loss(x_batch, t_batch) print(f'{key}:{str(loss)}') # グラフの描画 fig = plt.figure() markers = { 'SGD': 'o', 'Momentum': 'x', 'AdaGrad': 's', 'Adam': 'D', 'RMSprop': '+' } x = np.arange(max_iterations) for key in optimizers.keys(): plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key) plt.xlabel('iterations') plt.ylabel('loss') plt.ylim(0, 1) plt.legend() fig.savefig('../images/optimizer_compare.png')
과적합(overfitting): 모델이 학습 데이터는 정확하게 예측을 하지만 학습되지 않은 데이터에 대해서는 정확도가 떨어지는 현상 - 과적합 발생 경우 1) 학습 데이터가 적은 경우 2) 파라미터가 너무 많아서 표현력(representational power)이 너무 높은 모델 - 과적합이 되지 않도록 학습하는 방법 1) regularization: L1, L2-regularization(정칙화, 규제) 2) dropout """ from common.multi_layer_net import MultiLayerNet from dataset.mnist import load_mnist (X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True) neural_net = MultiLayerNet( input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=0) # weight_decay_lambda=0 : 가중치 감소를 사용하지 않음 # overfitting을 만들기 위해 데이터의 수를 줄임 X_train = X_train[:300] Y_train = Y_train[:300] epochs = 200 # 1epoch : 모든 학습 데이터가 1번씩 학습된 경우 mini_batch_size = 100 # 1번 forward에 보낼 데이터 샘플 개수 # 학습하면서 테스트 데이터의 정확도를 각 에포크마다 기록 train_accuracies = [] test_accuracies = [] optimizer = Sgd(learning_rate=0.01) # optimizer
batch_size = 128 max_iterations = 2000 # 1:进行实验的设置========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet( input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) #生成4个5层网络 4个隐藏层各有100个神经元 激活函数使用Relu train_loss[key] = [] # 2:开始训练========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in optimizers.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss)
# coding: utf-8 import sys, os sys.path.append(os.pardir) import numpy as np from dataset.mnist import load_mnist from common.multi_layer_net import MultiLayerNet dataset_dir = os.path.dirname(os.path.abspath('__file__')) save_file = dataset_dir + "/mnist.pkl" (x_train, t_train), (x_test, t_test) = load_mnist(dataset_dir, save_file, normalize=True, one_hot_label=True) network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100], output_size=10, use_batchnorm=True) x_batch = x_train[:1] t_batch = t_train[:1] grad_backprop = network.gradient(x_batch, t_batch) grad_numerical = network.numerical_gradient(x_batch, t_batch) for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ":" + str(diff))
def main(): (train_x, train_label), _ = load_mnist() train_size = train_x.shape[0] batch_size = 128 max_iterations = 2000 # 打乱训练集顺序,每次训练随机获取一个batch大小的训练样本 batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) # 使用SGD优化器 optimizer = SGD(lr=0.01) # 将要对比的权重初始化方式: 0.01, Xavier, He 三种 weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} # 为每个优化器生成一个五层全连接神经网络 networks = {} train_loss_list = {} for key, weight_init_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_init_type) train_loss_list[key] = [] # 每个优化器都记录在训练集上的损失值 left = 0 for i in range(max_iterations): # 获取一个batch batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) # 计算梯度,然后用不同优化器去更新参数 # 记录每次更新后的损失值 for key in weight_init_types.keys(): grads = networks[key].gradient(batch_x, batch_label) optimizer.update(networks[key].params, grads) loss = networks[key].loss(batch_x, batch_label) train_loss_list[key].append(loss) # 每迭代100次就输出一次当前各优化器的损失值 if i % 100 == 0: print("=" * 15 + "iteration: " + str(i) + "=" * 15) for key in weight_init_types.keys(): loss = train_loss_list[key][-1] print(key + ": " + str(loss)) # 绘制损失值随迭代次数变化图 markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(max_iterations) for key in weight_init_types.keys(): plt.plot(x, smooth_curve(train_loss_list[key]), marker=markers[key], markevery=100, label=key) plt.xlabel("iterations") plt.ylabel("loss") plt.ylim(0, 2.5) plt.legend() plt.show()
iter_num = 2000 batch_size = 128 loss = {} input_size = 784 hidden_size_list = [100, 100, 100, 100] output_size = 10 # 1:実験の設定========== weight_type = {"std": 0.01, "Xavier": "sigmoid", "He": "relu"} optimizer = SGD(lr=0.01) # ネットワークの定義 network = {} for key, weight in weight_type.items(): network[key] = MultiLayerNet(input_size, hidden_size_list, output_size, weight_init_std=weight_type[key]) loss[key] = [] # 2:訓練の開始========== for i in range(iter_num): # 60000サンプルから128サンプルをランダムに選ぶ batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # for key in weight_type.keys(): # 勾配を求めて重みを更新する grads = network[key].gradient(x_batch, t_batch)
# 실험 조건 세팅 from dataset.mnist import load_mnist weight_init_types = { 'std=0.01': 0.01, 'Xavier': 'sigmoid', # 가중치 초깃값: N(0, sqrt(1/n)) 'He': 'relu' # 가중치 초깃값: N(0, sqrt(2/n)) } # 각 실험 조건 별로 테스트할 신경망을 생성 neural_nets = dict() train_losses = dict() for key, type in weight_init_types.items(): neural_nets[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=type) train_losses[key] = [] # 빈 리스트 생성 - 실험(학습)하면서 손실값들을 저장 # MNIST train/test 데이터 로드 (X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True) iterations = 2_000 # 학습 회수 batch_size = 128 # 1번 학습에 사용할 샘플 개수(미니 배치) optimizer = Sgd(learning_rate=0.01) # 파라미터 최적화 알고리즘 # optimizer를 변경하면 테스트 optimizer = Adam() np.random.seed(109) for i in range(iterations): # 2,000번 반복하면서 # 미니 배치 샘플 랜덤 추출
from dataset.mnist import load_mnist from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 過学習を再現するために、学習データを削減 x_train = x_train[:300] t_train = t_train[:300] # weight decay(荷重減衰)の設定 ======================= #weight_decay_lambda = 0 # weight decayを使用しない場合 weight_decay_lambda = 0.1 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size)
from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 为了再现过拟合,减少学习数据 x_train = x_train[:300] t_train = t_train[:300] # weight decay (权值衰减) 的设定 # weight_decay_lamda = 0 # 不使用权值衰减的情况 weight_decay_lamda = 0.1 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lamda) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0
for i in range(1, len(self.hidden_size_list) + 2): grads['W%d' % i] = self.layers[ 'Affine%d' % i].dW + self.weight_decay_lambda * self.layers['Affine%d' % i].W grads['b%d' % i] = self.layers['Affine%d' % i].db return grads if __name__ == '__main__': from dataset.mnist import load_mnist from common.optimizer import SGD, Momentum, AdaGrad, Adam from common.multi_layer_net import MultiLayerNet (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) multi_layer_net = MultiLayerNet(784, output_size=10, hidden_size_list=[100, 100, 100, 100]) mul_layer_net = MulLayerNet(784, output_size=10, hidden_size_list=[100, 100, 100, 100]) max_iterations = 2000 train_size = x_train.shape[0] batch_size = 128 optimizer_mul = AdaGrad() optimizer_multi = AdaGrad() for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask]
batch_size = 128 max_iterations = 2000 # 1. 실험용 설정 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2. 훈련 시작 for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in optimizers.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss)
x_data_train=x[40:,:] t_data_test=t[:40,:] t_data_train=t[40:,:] print("x_data_train.shape=",x_data_train.shape, "x_data_test.shape=",x_data_test.shape, "t_data_train.shape=",t_data_train.shape, "t_data_test.shape=",t_data_test.shape) """ ニューラルネットワークの環境構築 """ weight_decay_lambda = 0.1 network = MultiLayerNet(input_size=day_ago+4, hidden_size_list=[100], output_size=len(t[0]), weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) #パラメータ iters_num= 4000 #勾配法の更新の回数 多いほどいい? train_size=len(t_data_train) #入力データの行数 サンプル数 testデータと分けたこと忘れずに batch_size =300 #これがバッチの数#分からんから learning_rate=0.01 train_loss_list=[] train_acc_list=[] test_acc_list=[]