def train(train_x, train_label, weight_init_std, learning_rate, max_epoch, batch_size): """ 构造带有BN层的神经网络和不带BN层的神经网络。测试BN层的效果 :param train_x: :param train_label: :param weight_init_std: 参数初始化方式 :param learning_rate: :param max_epoch: 测试的epoch数 :param batch_size: :return: """ bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=False) optimizer = SGD(learning_rate) train_acc_list = [] bn_train_acc_list = [] train_size = train_x.shape[0] iter_per_epoch = max(train_size / batch_size, 1) batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) epoch_cnt = 0 left = 0 iteration = int(iter_per_epoch * max_epoch) for i in range(iteration): # 获取一个batch的数据,更新left值 batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) # 两个网络分别更新 for _network in (bn_network, network): grads = _network.gradient(batch_x, batch_label) optimizer.update(_network.params, grads) # 每一个epoch记录一个在测试集上的准确率 if i % iter_per_epoch == 0: train_acc = network.accuracy(train_x, train_label) bn_train_acc = bn_network.accuracy(train_x, train_label) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 return train_acc_list, bn_train_acc_list
def __train(weight_init_std): bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100],output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def main(): (train_x, train_label), (test_x, test_label) = load_mnist() # 为了再现过拟合,减少学习数据 train_x = train_x[: 300] train_label = train_label[: 300] # 设定是否使用Dropuout,以及比例 ======================== use_dropout = False # use_dropout = True dropout_ratio = 0.2 network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ratio=dropout_ratio) trainer = Trainer(network, train_x, train_label, test_x, test_label, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) trainer.train() train_acc_list = trainer.train_acc_list test_acc_list = trainer.test_acc_list draw(train_acc_list, test_acc_list)
def main(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) x_train = x_train[:300] t_train = t_train[:300] max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 use_dropout = True dropout_ratio = 0.2 train_acc_list = [] test_acc_list = [] network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) optimizer = SGD(lr=learning_rate) iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \ ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break markers = {'train': 'o', 'test': 's'} x = np.arange(max_epochs) plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.show()
def __train(weight_init_std): bn_net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _net in (bn_net, net): grads = _net.gradient(x_batch, t_batch) optimizer.update(_net.params, grads) if i % iter_per_epoch == 0: train_acc = net.accuracy(x_train, t_train) bn_train_acc = bn_net.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("EPOCH: {0} | NET_ACC({1}) - BN_NET_ACC({2})".format( epoch_cnt, train_acc, bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epoches: break return train_acc_list, bn_train_acc_list
def __train(lr, weight_decay, epocs=50): network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) trainer = Trainer(network, x_train, t_train, x_val, t_val, epochs=epocs, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': lr}, verbose=False) trainer.train() return trainer.test_acc_list, trainer.train_acc_list
def __init__(self, idx, x_train, t_train, x_test, t_test, optimizer, weight_decay_lambda=0.0): self.idx = idx # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10, # weight_decay_lambda=weight_decay_lambda) self.layer = MultiLayerNetExtend( input_size=784, hidden_size_list=[50, 50, 50], output_size=10, weight_decay_lambda=weight_decay_lambda, use_dropout=False, dropout_ration=0.0, use_batchnorm=False) self.optimizer = optimizer self.rec_param = np.array([{} for i in range(self.n)]) self.x_train = x_train self.t_train = t_train self.x_test = x_test self.t_test = t_test self.z_vec = np.zeros(self.n) self.z_vec[idx] = 1 self.rec_z = np.zeros((self.n, self.n)) self.AdjG = np.zeros(self.n) #require to be {0 or 1} to all arguments self.WeiG = np.zeros(self.n) if np.all(self.WeiG_init == 0): self.makeWeiGraph(self.AdjG_init) else: self.WeiG = self.WeiG_init[self.idx] self.makeAdjGraph() self.train_loss = 0 self.train_acc = 0 self.test_acc = 0
def __train(lr, weight_decay_lambda, epoch_num=120): network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100] * 5, output_size=10, activation='ReLu', weight_init_std='ReLu', weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=False, use_weight_decay=True) # 注意下面传入的不是测试集而是验证集 trainer = Trainer(network=network, x_train=x_train, t_train=t_train, x_test=x_val, t_test=t_val, epochs=epoch_num, mini_batch_num=100, optimizer='SGD', optimizer_params={'lr': lr}) trainer.train() return trainer.train_acc_list, trainer.test_acc_list # 返回一次实验测试集和验证集的精度
def main(): # 读入数据 (train_x, train_label), _ = load_mnist(one_hot_label=True) # 构造神经网络 network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100], output_size=10, use_batchnorm=True) # 仅用一个训练样本来测试 batch_x = train_x[:1] batch_label = train_label[:1] # 用反向传播和数值方法分别计算梯度 grad_backprop = network.gradient(batch_x, batch_label) grad_numerical = network.numerical_gradient(batch_x, batch_label) # 比较两种方法的计算结果 for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ":" + str(diff))
def __trainning(weight_init_std, index): epoch_num = 0 acc_train_list = [] acc_train_BN_list = [] input_size = 784 hidden_size_list = [100] * 5 output_size = 10 activation = 'ReLu' weight_decay_lambda = 1 network = MultiLayerNetExtend(input_size=input_size, hidden_size_list=hidden_size_list, output_size=output_size, activation=activation, weight_init_std=weight_init_std, weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=False, use_weight_decay=False) network_BN = MultiLayerNetExtend(input_size=784, hidden_size_list=hidden_size_list, output_size=output_size, activation=activation, weight_init_std=weight_init_std, weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=True, use_weight_decay=False) # 开始训练,训练分下面几步 # 1. 从样本中随机挑选出batch_size个样本 # 2. 前向传播、反向传播、获取梯度 # 3. 更新梯度 # 4. 重复1 ~ 2直至循环结束 for i in range(iter_num): print('现在是W' + str(index) + '的第' + str(i) + '轮循环') # 挑选mini_batch batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 获取梯度,更新梯度 for network_ in (network, network_BN): grads = network_.gradient(x_train, t_train, use_weight_decay=False) # optimizer_SGD.update(params = network_.params, grads = grads) if network_ == network: optimizer.update(params=network_.params, grads=grads) else: optimizer_BN.update(params=network_.params, grads=grads) # 每一个epoch就计算两个网络的精度然后装进数组 if i % iter_num_per_epch == 0: network_acc = network.accuracy(x_batch, t_batch) network_BN_acc = network_BN.accuracy(x_batch, t_batch) acc_train_list.append(network_acc) acc_train_BN_list.append(network_BN_acc) epoch_num += 1 if epoch_num >= epoch_cnt_max: break return acc_train_list, acc_train_BN_list
# p.213 그림 6-18을 그리세요. # Batch Normalization을 사용하는 신경망과 사용하지 않는 신경망의 학습 속도 비교 import numpy as np import matplotlib.pyplot as plt from ch06.e02_sgd import Sgd from common.multi_layer_net_extend import MultiLayerNetExtend from common.optimizer import Momentum, AdaGrad, Adam from dataset.mnist import load_mnist # 배치 정규화를 사용하는 신경망 bn_neural_net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=0.01, use_batchnorm=True) # 배치 정규화를 사용하지 않는 신경망 neural_net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=0.01, use_batchnorm=False) # 미니 배치를 20번 학습시키면서, 두 신경망에서 정확도(accuracy)를 기록 # -> 그래프 (X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True) # 학습 시간을 줄이기 위해서 학습 데이터의 개수를 줄임. X_train = X_train[:1000] # 데이터 1000개만 사용
from ch06.ex02_sgd import Sgd from common.multi_layer_net import MultiLayerNet from common.multi_layer_net_extend import MultiLayerNetExtend from dataset.mnist import load_mnist np.random.seed(110) # 데이터 준비 (X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True) # 신경망 생성 wd_rate = 0 neural_net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=wd_rate, use_dropout=True, dropout_ration=0.15) # weight_decay_lambda: 가중치 감소에 사용할 상수 값 # 학습 데이터 개수를 300개로 제한 -> overfitting 만들기 위해서 X_train = X_train[:300] Y_train = Y_train[:300] X_test = X_test[:300] # 실험 시간 줄이기 위해서 Y_test = Y_test[:300] epochs = 200 # 1 에포크: 모든 학습 데이터가 1번씩 학습된 경우 mini_batch_size = 100 # 1번 forward에 보낼 데이터 샘플 개수 train_size = X_train.shape[0] iter_per_epoch = int(max(train_size / mini_batch_size, 1)) # 학습하면서 학습/테스트 데이터의 정확도를 각 에포크마다 기록
print(x) mask = x > 0.5 print(mask) print(x * mask) # false = 0, false * x = 0; true = 1, returns its original value np.random.seed(110) # 데이터 준비 (X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True) # 신경망 생성 dropout_ratio = 0.1 neural_net = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, use_dropout=True, dropout_ration=dropout_ratio) X_train = X_train[:500] Y_train = Y_train[:500] X_test = X_test[:500] Y_test = Y_test[:500] # Initial Idea # # drop out 구현 (Affine > Batch > ReLU > Dropout 이니까 Relu값을 가지고 dropout을 구현한다) # train_size = X_test.shape[0] epochs = 200 mini_batch_size = 100
mask_train = np.random.choice(x_train.shape[0], 1000) mask_test = np.random.choice(x_test.shape[0], 1000) x_train = x_train[mask_train] t_train = t_train[mask_train] x_test = x_test[mask_test] t_test = t_test[mask_test] train_size = x_train.shape[0] batch_size = 128 optimizer = SGD(lr=0.01) network_st = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100], output_size=10) network_bn = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100], output_size=10, use_batchnorm=True) loss_list = { "network_st": [], "network_bn": [], "network_st_test": [], "network_bn_test": [] } for i in range(iter_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] x_test_batch = x_test[batch_mask] t_test_batch = t_test[batch_mask]
#念のため、データの次元数の確認 print("x_train,t_train dimension_number") print(x_train.shape,t_train.shape) print("x_test,t_test dimension_number") print(x_test.shape,t_test.shape) # 高速化のため訓練データの削減 #x_train = x_train[:56678] #t_train = t_train[:56678] #optimizerを選択 optimizers = Adam() network = MultiLayerNetExtend(input_size=122, hidden_size_list=[1000,1000], output_size=2, activation='relu', weight_init_std=0.01, weight_decay_lambda=0, use_dropout=True, dropout_ration=0.5, use_batchnorm=False) #ハイパーパラメータ iters_num = 73554 #イテレーションの回数 batch_size = 512 learning_rate = 0.00001 #学習率 train_size = x_train.shape[0] #x_trainの行数を当てはめる #格納するリストの作成 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch_float = max(train_size / batch_size, 1) #1エポック回すのに何イテレーションかかるか
def __init__(self, idx, x_train, t_train, x_test, t_test, optimizer, weight_decay_lambda=0.0): """各Agentの初期状態変数 Args: idx : Agentのインデックス layer : Agent内のニューラルネットワークの層 optimizer : 最適化を行うアルゴリズムの選択 rec_param : 隣接するエージェントから受け取るパラメータ z_vec : 左の固有ベクトル rec_z : 隣接するエージェントから受け取る左固有ベクトル AdjG : 隣接行列?? WeiG : 重み行列?? """ self.idx = idx # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10, # weight_decay_lambda=weight_decay_lambda) self.layer = MultiLayerNetExtend( input_size=784, hidden_size_list=[ 500, 400, 300, 300, 200, 200, 100, 100, 100, 100, 100, 50, 50 ], output_size=10, weight_decay_lambda=weight_decay_lambda, use_dropout=True, dropout_ration=0.3, use_batchnorm=True) #dropout_ratio=0.03, use_batchnorm=True, hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50] weightdecay=0.01 → 0.9428 #hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50], output_size=10,weight_decay_lambda=weight_decay_lambda,use_dropout=True, dropout_ration=0.05, use_batchnorm=True 一番いい #hidden_size_list=[100,100,100,100,100] weightdecay=0.3, dropout_ration=0.3 self.optimizer = optimizer self.rec_param = np.array([{} for i in range(self.n)]) self.send_param = np.array([{} for i in range(self.n)]) #Initialize self.rec_param[self.idx] = self.layer.params.copy() self.send_param[self.idx] = self.layer.params.copy() self.x_train = x_train self.t_train = t_train self.x_test = x_test self.t_test = t_test self.AdjG = np.zeros(self.n) #require to be {0 or 1} to all arguments self.WeiG = np.zeros(self.n) if np.all(self.WeiG_init == 0): self.makeWeiGraph(self.AdjG_init) else: self.WeiG = self.WeiG_init[self.idx] self.makeAdjGraph() self.train_loss = 0 self.train_acc = 0 self.test_acc = 0
from ch06.ex02_sgd import Sgd from common.multi_layer_net_extend import MultiLayerNetExtend from common.optimizer import Momentum from dataset.mnist import load_mnist import numpy as np import matplotlib.pyplot as plt #p.213의 그림 그려보기 # Batch Normalization을 사용하는 신경망과 사용하지 않는 신경망의 학습 속도 비교 np.random.seed(110) # 배치 정규화를 사용하는 신경망 bn_neural_net = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100], #뉴런 100개짜리 계층 5개 생성 output_size=10, weight_init_std=0.3, # [W1, W2, W3, W4, W5]과 ~~~~ 했을 때, std = 0.01 use_batchnorm=True) # 배치 정규화를 사용하지 않는 신경망 neural_net = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100], #뉴런 100개짜리 계층 5개 생성 output_size=10, weight_init_std=0.3, # [W1, W2, W3, W4, W5]과 ~~~~ 했을 때, std = 0.01 use_batchnorm=False) # MNIST 데이터를 불러온다 (X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True) # 학습시간을 줄이기 위해서 학습 데이터의 개수를 줄임
use_dropout = True dropout_ratio = np.linspace(0, 0.2, 5) weight_decay = np.geomspace(0.001, 0.2, num=5) learning_rate = np.geomspace(0.001, 0.2, num=5) best_hp = [] # Training begins with progressbar.ProgressBar(max_value=125) as bar: for i in range(0, len(dropout_ratio)): for j in range(0, len(weight_decay)): for k in range(0, len(learning_rate)): network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, activation='sigmoid', weight_init_std='xavier', weight_decay_lambda=weight_decay[j], use_dropout=use_dropout, dropout_ration=dropout_ratio[i]) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=5, mini_batch_size=500, optimizer='adam', optimizer_param={'lr': learning_rate[k]}, verbose=False) trainer.train()
from common.trainer import Trainer (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 为了再现过拟合,减少学习数据 x_train = x_train[:300] t_train = t_train[:300] # 设定是否使用Dropuout,以及比例 ======================== use_dropout = True # 不使用Dropout的情况下为False dropout_ratio = 0.2 # ==================================================== network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) trainer.train() train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
output_size = 10 activation = 'ReLu' # 每一层之间的激活函数 weight_init_std = 'ReLu' # 根据激活函数以初始化权值分布 weight_decay_lambda = 1 epoch_num = 0 train_acc_list = [] test_acc_list = [] train_acc_weight_decay_list = [] test_acc_weight_decay_list = [] # 构造网络对象 network = MultiLayerNetExtend(input_size=input_size, hidden_size_list=hidden_size_list, output_size=output_size, activation=activation, weight_init_std=weight_init_std, weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=False, use_weight_decay=False) network_weight_decay = MultiLayerNetExtend( input_size=input_size, hidden_size_list=hidden_size_list, output_size=output_size, activation=activation, weight_init_std=weight_init_std, weight_decay_lambda=weight_decay_lambda, use_BatchNormalization=False, use_weight_decay=True) # 开始训练,训练分下面几步 # 1. 从样本中随机挑选出batch_size个样本