def main(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) x_train = x_train[:300] t_train = t_train[:300] max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 # weight_decay_lambda = 0 weight_decay_lambda = 0.1 train_acc_list = [] test_acc_list = [] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=learning_rate) iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \ ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break markers = {'train': 'o', 'test': 's'} x = np.arange(max_epochs) plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.show()
def train(train_x, train_label, test_x, test_label, learning_rate, max_epoch, batch_size): # weight decay(权值衰减——L2正则项强度)的设定 ======================= weight_decay_lambda = 0 # 不使用权值衰减的情况 # weight_decay_lambda = 0.1 # 构造神经网络 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(learning_rate) train_acc_list = [] test_acc_list = [] train_size = train_x.shape[0] iter_per_epoch = max(train_size / batch_size, 1) batch_mask = np.arange(train_size) np.random.shuffle(batch_mask) epoch_cnt = 0 left = 0 iteration = int(iter_per_epoch * max_epoch) for i in range(iteration): # 获取一个batch的数据,更新left值 batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask, batch_size, left) grads = network.gradient(batch_x, batch_label) optimizer.update(network.params, grads) # 每一个epoch记录一个在测试集上的准确率 if i % iter_per_epoch == 0: train_acc = network.accuracy(train_x, train_label) test_acc = network.accuracy(test_x, test_label) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) epoch_cnt += 1 return train_acc_list, test_acc_list
train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break
epochs = 200 # 1epoch : 모든 학습 데이터가 1번씩 학습된 경우 mini_batch_size = 100 # 1번 forward에 보낼 데이터 샘플 개수 # 학습하면서 테스트 데이터의 정확도를 각 에포크마다 기록 train_accuracies = [] test_accuracies = [] optimizer = Sgd(learning_rate=0.01) # optimizer for epoch in range(epochs): # indices = np.arange(train_size) # np.random.shuffle(indices) for i in range(iter_per_epoch): x_batch = X_train[(i * mini_batch_size):((i + 1) * mini_batch_size)] y_batch = Y_train[(i * mini_batch_size):((i + 1) * mini_batch_size)] gradients = neural_net.gradient(x_batch, y_batch) optimizer.update(neural_net.params, gradients) train_acc = neural_net.accuracy(X_train, Y_train) train_accuracies.append(train_acc) test_acc = neural_net.accuracy(X_test, Y_test) test_accuracies.append(test_acc) print(f'epoch #{epoch}: train={train_acc}, test={test_acc}') x = np.arange(epochs) plt.plot(x, train_accuracies, label='Train') plt.plot(x, test_accuracies, label='Test') plt.legend() plt.title(f'Weight Decay (lambda={wd_rate})') plt.xlabel('epoch') plt.ylabel('accuracy')
# coding: utf-8 import sys, os sys.path.append(os.pardir) import numpy as np from dataset.mnist import load_mnist from common.multi_layer_net import MultiLayerNet dataset_dir = os.path.dirname(os.path.abspath('__file__')) save_file = dataset_dir + "/mnist.pkl" (x_train, t_train), (x_test, t_test) = load_mnist(dataset_dir, save_file, normalize=True, one_hot_label=True) network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100], output_size=10, use_batchnorm=True) x_batch = x_train[:1] t_batch = t_train[:1] grad_backprop = network.gradient(x_batch, t_batch) grad_numerical = network.numerical_gradient(x_batch, t_batch) for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ":" + str(diff))
output_size=10, hidden_size_list=[100, 100, 100, 100]) max_iterations = 2000 train_size = x_train.shape[0] batch_size = 128 optimizer_mul = AdaGrad() optimizer_multi = AdaGrad() for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads_mul = mul_layer_net.gradient(x_batch, t_batch) grads_multi = multi_layer_net.gradient(x_batch, t_batch) optimizer_mul.update(mul_layer_net.params, grads_mul) optimizer_multi.update(multi_layer_net.params, grads_multi) loss_mul = mul_layer_net.loss(x_batch, t_batch) loss_multi = multi_layer_net.loss(x_batch, t_batch) if i % 100 == 0: print("===========" + "iteration:" + str(i) + "===========") loss_mul = mul_layer_net.loss(x_batch, t_batch) loss_multi = multi_layer_net.loss(x_batch, t_batch) print('mul-loss' + ":" + str(loss_mul)) print('multi-loss' + ":" + str(loss_multi)) # optimizer_function = Adam()