def __train(weight_init_std): bn_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def __train(lr, weight_decay, epocs=50, verbose=False): # 减少epoch的数量 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) optimizer = SGD(lr) iter_per_epoch = max(train_size / mini_batch_size, 1) current_iter = 0 current_epoch = 0 train_loss_list = [] train_acc_list = [] val_acc_list = [] for i in range(int(epochs * iter_per_epoch)): batch_mask = np.random.choice(train_size, mini_batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if verbose: print("train loss:" + str(loss)) if current_iter % iter_per_epoch == 0: current_epoch += 1 train_acc = network.accuracy(x_train, t_train) val_acc = network.accuracy(x_val, t_val) train_acc_list.append(train_acc) val_acc_list.append(val_acc) if verbose: print("=== epoch:" + str(current_epoch) + ", train acc:" + str(train_acc) + ", validation acc:" + str(val_acc) + " ===") current_iter += 1 return val_acc_list, train_acc_list
def train(weight_init_std, x_train, t_train, max_epochs): batch_norm_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) no_batch_norm_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 max_iters_times = 1000000000 epoch = max(int(train_size / batch_size), 1) optimizer = SGD(lr=learning_rate) bn_train_acc_list = [] no_bn_train_acc_list = [] epoch_cnt = 0 for i in range(max_iters_times): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for network in (batch_norm_network, no_batch_norm_network): grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % epoch == 0: bn_train_acc = batch_norm_network.accuracy(x_train, t_train) no_bn_train_acc = no_batch_norm_network.accuracy(x_train, t_train) bn_train_acc_list.append(bn_train_acc) no_bn_train_acc_list.append(no_bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(no_bn_train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return no_bn_train_acc_list, bn_train_acc_list
# 勾配 grad = network.gradient(x_batch, d_batch) if i == 0: v = {} for key in ('W1', 'W2', 'W3', 'b1', 'b2', 'b3'): if i == 0: v[key] = np.zeros_like(network.params[key]) v[key] = momentum * v[key] - learning_rate * grad[key] network.params[key] += v[key] loss = network.loss(x_batch, d_batch) train_loss_list.append(loss) if (i + 1) % plot_interval == 0: accr_test = network.accuracy(x_test, d_test) accuracies_test.append(accr_test) accr_train = network.accuracy(x_batch, d_batch) accuracies_train.append(accr_train) print('Generation: ' + str(i+1) + '. 正答率(トレーニング) = ' + str (round (100 * accr_train, 2)) + '%') print(' : ' + str(i+1) + '. 正答率(テスト) = ' + str (round (100 * accr_test, 2)) + '%') lists = range(0, iters_num, plot_interval) plt.plot(lists, accuracies_train, label="training set") plt.plot(lists, accuracies_test, label="test set") plt.legend(loc="lower right") plt.title ("count - accuracy : Momentum") plt.xlabel("count") plt.ylabel("accuracy")
plot_interval = 10 for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] d_batch = d_train[batch_mask] grad = network.gradient(x_batch, d_batch) optimizer.update(network.params, grad) loss = network.loss(x_batch, d_batch) train_loss_list.append(loss) if (i + 1) % plot_interval == 0: accr_train = network.accuracy(x_train, d_train) accr_test = network.accuracy(x_test, d_test) accuracies_train.append(accr_train) accuracies_test.append(accr_test) print('Generation: ' + str(i + 1) + '. 正答率(トレーニング) = ' + str(round(100 * accr_train, 2)) + '%') print(' : ' + str(i + 1) + '. 正答率(テスト) = ' + str(round(100 * accr_test, 2)) + '%') lists = range(0, iters_num, plot_interval) plt.plot(lists, accuracies_train, label="training set") plt.plot(lists, accuracies_test, label="test set") plt.legend(loc="lower right") plt.title("count - accuracy : Dropout dropout_ratio = 0.15") plt.ylabel("accuracy")
train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break # 그래프 그리기========== markers = {'train': 'o', 'test': 's'} x = np.arange(max_epochs) plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 10], output_size=10) train_loss = [] # itetrain_accuracy = [] train_accuracy = [] test_accuracy = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = images_train[batch_mask] t_batch = labels_train[batch_mask] #在样本中随机选择 grads = dict() grads['dW'], grads['db'] = network.gradient(x_batch, t_batch) optimizer['sgd'].update(network.W, grads['dW']) #多层过程直接拿掉了param,而直接使用W,b optimizer['sgd'].update(network.b, grads['db']) loss_value = network.loss(x_batch, t_batch) train_loss.append(loss_value) if i % iter_per_epoch == 0: train_acc = network.accuracy(images_train, labels_train) test_acc = network.accuracy(images_test, labels_test) train_accuracy.append(train_acc) test_accuracy.append(test_acc) epoch_cnt += 1 if epoch_cnt >= max_epoch: break x = np.arange(len(train_accuracy)) plot.plot(x, train_accuracy, test_accuracy) plot.show()