def __train(weight_init_std): bn_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def __train(lr, weight_decay, epocs=50): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) trainer = Trainer(network, x_train, t_train, x_val, t_val, epochs=epocs, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': lr}, verbose=False) trainer.train() return trainer.test_acc_list, trainer.train_acc_list
def __train(lr, weight_decay, epocs=50, verbose=False): # 减少epoch的数量 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) optimizer = SGD(lr) iter_per_epoch = max(train_size / mini_batch_size, 1) current_iter = 0 current_epoch = 0 train_loss_list = [] train_acc_list = [] val_acc_list = [] for i in range(int(epochs * iter_per_epoch)): batch_mask = np.random.choice(train_size, mini_batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if verbose: print("train loss:" + str(loss)) if current_iter % iter_per_epoch == 0: current_epoch += 1 train_acc = network.accuracy(x_train, t_train) val_acc = network.accuracy(x_val, t_val) train_acc_list.append(train_acc) val_acc_list.append(val_acc) if verbose: print("=== epoch:" + str(current_epoch) + ", train acc:" + str(train_acc) + ", validation acc:" + str(val_acc) + " ===") current_iter += 1 return val_acc_list, train_acc_list
def train(weight_init_std, x_train, t_train, max_epochs): batch_norm_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) no_batch_norm_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 max_iters_times = 1000000000 epoch = max(int(train_size / batch_size), 1) optimizer = SGD(lr=learning_rate) bn_train_acc_list = [] no_bn_train_acc_list = [] epoch_cnt = 0 for i in range(max_iters_times): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for network in (batch_norm_network, no_batch_norm_network): grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % epoch == 0: bn_train_acc = batch_norm_network.accuracy(x_train, t_train) no_bn_train_acc = no_batch_norm_network.accuracy(x_train, t_train) bn_train_acc_list.append(bn_train_acc) no_bn_train_acc_list.append(no_bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(no_bn_train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return no_bn_train_acc_list, bn_train_acc_list
name = input('Input the simulation conditions : ') fname_train = 'img_100000_new.csv' fname_test = 'rindex_100000_new.csv' data_train = np.loadtxt(fname_train, delimiter=',') data_test = np.loadtxt(fname_test, delimiter=',') indices = np.arange(data_train.shape[0]) np.random.shuffle(indices) validation_size = int(data_train.shape[0] * validation_split) x_train, x_test = data_train[indices[:-validation_size], :], data_train[ indices[-validation_size:], :] t_train, t_test = data_test[indices[:-validation_size], :], data_test[ indices[-validation_size:], :] hid_size = [200] layer_num = len(hid_size) + 1 network = MultiLayerNet(input_size=x_train.shape[1], hidden_size_list=hid_size, output_size=t_train.shape[1], weight_init_std=0.01) optimizer = SGD(0.001) iters_num = 100000 train_size = x_train.shape[0] test_size = x_test.shape[0] batch_size = 100 train_loss = [] #train_acc = [] test_loss = [] #test_acc = [] iter_per_epoch = max(train_size / batch_size, 1) values = {} for key in network.indexes: values[key] = [] for i in range(iters_num):
# 0:读入MNIST数据========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:进行实验的设置========== weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 2:开始训练========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in weight_init_types.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizer.update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss)
from multi_layer_net import MultiLayerNet from optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 오버피팅을 재현하기 위해 학습 데이터 수를 줄임 x_train = x_train[:300] t_train = t_train[:300] # weight decay(가중치 감쇠) 설정 ======================= #weight_decay_lambda = 0 # weight decay를 사용하지 않을 경우 weight_decay_lambda = 0.1 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) # 학습률이 0.01인 SGD로 매개변수 갱신 max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000):
max_iterations = 2000 # 1:实验设定========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet( input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2:訓練開始========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in optimizers.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizers[key].update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch)
import numpy as np import matplotlib.pyplot as plt from mnist import load_mnist from multi_layer_net import MultiLayerNet from trainer import Trainer if __name__ == "__main__": (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 减少学习数据, 造成过拟合现象 x_train = x_train[:300] t_train = t_train[:300] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, dropout_ration=0.2) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) for flag in [False, True]: network.set_dropout(flag) trainer.set_network(network)
from optimizers import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 为了实现过拟合,减少数据量 x_train = x_train[:300] t_train = t_train[:300] # Dropout 设定 ======================= use_dropout = True dropout_ratio = 0.2 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ratio=dropout_ratio) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0
""" Created on Sat Feb 20 10:16:33 2021 @author: leyuan """ import numpy as np from multi_layer_net import MultiLayerNet # 生成数据 x = np.random.randn(1, 784) t = np.random.randint(0, 10, (1, )) # 创建网络 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100], output_size=10, use_batchnorm=True) grad_numerical = network.numerical_gradient(x, t) grad_backprop = network.gradient(x, t) for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ':' + str(diff)) # break symmetry # x = np.random.randn(3, 2) # t = np.random.randint(0, 10, (3, )) # # 创建网络 # network = TwoLayerNet(input_size=2, hidden_size=3, output_size=10)
from common import layers from data.mnist import load_mnist import matplotlib.pyplot as plt from multi_layer_net import MultiLayerNet from common import optimizer (x_train, d_train), (x_test, d_test) = load_mnist(normalize=True) print("データ読み込み完了") # 過学習を再現するために、学習データを削減 x_train = x_train[:300] d_train = d_train[:300] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10) iters_num = 1000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 train_loss_list = [] accuracies_train = [] accuracies_test = [] plot_interval = 10 hidden_layer_num = network.hidden_layer_num # 正則化強度設定 ======================================
from multi_layer_net import MultiLayerNet from optimizer import SGD from utils import load_mnist weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} (x_train, t_train), (x_test, t_test) = load_mnist( normalize=True, one_hot_label=True) iters_num = 2000 train_size = x_train.shape[0] batch_size = 100 train_loss = {} for key, weight_type in weight_init_types.items(): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) optimizer = SGD() train_loss[key] = [] for i in range(iters_num): mask = np.random.choice(train_size, batch_size) x_batch = x_train[mask] t_batch = t_train[mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) train_loss[key].append(network.loss(x_batch, t_batch)) markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(iters_num) for key in weight_init_types.keys():
train_size = images_train.shape[0] batch_size = 100 max_epoch = 201 # 准确度计算200次 iter_per_epoch = train_size / batch_size # 想想着代表什么意思 epoch_cnt = 0 # rate=0.1 在优化器中不用了 #网络初始化 optimizer = dict() # 内置函数名。 train_loss = dict() network = dict() # 对象初始化化,或者说变量初始化 optimizer['sgd'] = op.Sgd() optimizer['adaGrad'] = op.AdaGrad() optimizer['momentu'] = op.Momentu() # 5层神经网络,对最后的一层神经元有个数要求 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 10], output_size=10) train_loss = [] # itetrain_accuracy = [] train_accuracy = [] test_accuracy = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = images_train[batch_mask] t_batch = labels_train[batch_mask] #在样本中随机选择 grads = dict() grads['dW'], grads['db'] = network.gradient(x_batch, t_batch) optimizer['sgd'].update(network.W, grads['dW']) #多层过程直接拿掉了param,而直接使用W,b optimizer['sgd'].update(network.b, grads['db']) loss_value = network.loss(x_batch, t_batch)
# 過学習を再現するために、学習データを削減 x_train = x_train[:300] d_train = d_train[:300] # ドロップアウト設定 ====================================== use_dropout = True dropout_ratio = 0.15 # ==================================================== # 正則化強度設定 ====================================== weight_decay_lambda = 0.005 # ================================================= network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda, use_dropout=use_dropout, dropout_ratio=dropout_ratio) optimizer = optimizer.SGD(learning_rate=0.01) # optimizer = optimizer.Momentum(learning_rate=0.01, momentum=0.9) # optimizer = optimizer.AdaGrad(learning_rate=0.01) # optimizer = optimizer.Adam() iters_num = 1000 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] accuracies_train = [] accuracies_test = []
fname_train = name_train + '.csv' fname_test = name_test + '.csv' fname_sim = name_sim + '.csv' data_train = np.loadtxt(fname_train, delimiter = ',') data_test = np.loadtxt(fname_test, delimiter = ',') data_sim = np.loadtxt(fname_sim, delimiter = ',') data_sim = np.reshape(data_sim, (1,200)) indices = np.arange(data_train.shape[0]) np.random.shuffle(indices) validation_size = int(data_train.shape[0]*validation_split) x_train, x_test = data_train[indices[:-validation_size], :], data_train[indices[-validation_size:], :] t_train, t_test = data_test[indices[:-validation_size], :], data_test[indices[-validation_size:], :] #(x_train, t_train), (x_test, t_test) = (data_train[0], data_train[1]), (data_test[0], data_test[1]) hid_size = 200 layer_num = 2 network = MultiLayerNet(input_size = x_train.shape[1], hidden_size_list = [hid_size], output_size = t_train.shape[1], weight_init_std = 0.01) optimizer = Adam() iters_num = 100000 train_size = x_train.shape[0] test_size = x_test.shape[0] batch_size = 100 learning_rate = 0.001 train_loss = [] #train_acc = [] test_loss = [] #test_acc = [] iter_per_epoch = max(train_size/batch_size, 1) values = {} w1_value = [] b1_value = [] w2_value = []
from common import layers from data.mnist import load_mnist import matplotlib.pyplot as plt from multi_layer_net import MultiLayerNet # データの読み込み (x_train, d_train), (x_test, d_test) = load_mnist(normalize=True, one_hot_label=True) print("データ読み込み完了") # batch_normalizationの設定 ================================ # use_batchnorm = True use_batchnorm = False # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[40, 20], output_size=10, activation='sigmoid', weight_init_std=0.01, use_batchnorm=use_batchnorm) iters_num = 1000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 # 慣性 momentum = 0.9 train_loss_list = [] accuracies_train = [] accuracies_test = [] plot_interval=10 for i in range(iters_num):
# from two_layer_net import TwoLayerNet from multi_layer_net import MultiLayerNet from cifar10 import load_cifar10 from optimizer import * (x_train, t_train), (x_test, t_test) = load_cifar10(normalize=True, flatten=True, one_hot_label=True, data_batch_number='1') # network = TwoLayerNet(input_size=3072, hidden_size=200, output_size=10) network = MultiLayerNet(input_size=3072, hidden_size_list=[100, 100, 100], output_size=10, activation='relu', weight_init_std='relu', weight_decay_lambda=0.1, use_dropout=True, dropout_ration=0.5, use_batchnorm=True) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1)