def __train(weight_init_std): bn_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def __train(lr, weight_decay, epocs=50, verbose=False): # 减少epoch的数量 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) optimizer = SGD(lr) iter_per_epoch = max(train_size / mini_batch_size, 1) current_iter = 0 current_epoch = 0 train_loss_list = [] train_acc_list = [] val_acc_list = [] for i in range(int(epochs * iter_per_epoch)): batch_mask = np.random.choice(train_size, mini_batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if verbose: print("train loss:" + str(loss)) if current_iter % iter_per_epoch == 0: current_epoch += 1 train_acc = network.accuracy(x_train, t_train) val_acc = network.accuracy(x_val, t_val) train_acc_list.append(train_acc) val_acc_list.append(val_acc) if verbose: print("=== epoch:" + str(current_epoch) + ", train acc:" + str(train_acc) + ", validation acc:" + str(val_acc) + " ===") current_iter += 1 return val_acc_list, train_acc_list
for epoch in range(max_epoch): for iter in range(max_iters): # ミニバッチ取得 batch_x = np.empty((batch_size, time_size), dtype='i') batch_t = np.empty((batch_size, time_size), dtype='i') for t in range(time_size): for i, offset in enumerate(offsets): batch_x[i, t] = xs[(offset + time_idx) % data_size] batch_t[i, t] = ts[(offset + time_idx) % data_size] time_idx += 1 # 学習 loss = model.forward(batch_x, batch_t) model.backward() optimizer.update(model.params, model.grads) total_loss += loss loss_count += 1 # perplexity の評価 ppl = np.exp(total_loss / loss_count) print('| epoch {0} | perplexity {1:1.2f}'.format(epoch + 1, ppl)) ppl_list.append(float(ppl)) total_loss, loss_count = 0, 0 # グラフ x = np.arange(len(ppl_list)) plt.plot(x, ppl_list, label='train') plt.xlabel('epochs') plt.ylabel('perplexity') plt.savefig('chap5.png')
batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break # 3.可视化==========
#max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 x, t = spiral.load_data() model = TwoLayersNet(input_size=2, hidden_size=hidden_size, output_size=3) opt = SGD(learning_rate) it_per_epoch = -(-len(x) // batch_size) # データshuffle idx = np.random.permutation(len(x)) x = x[idx] t = t[idx] for it in range(it_per_epoch): # ミニバッチデータ x_batch = x[batch_size * it:batch_size * (it + 1)] t_batch = t[batch_size * it:batch_size * (it + 1)] loss = model.forward(x_batch, t_batch) print('loss:', loss) print_pg(model, 'forward', it, pflg) model.backward(loss) print_pg(model, 'backward', it, pflg) opt.update(model.params, model.grads) print_pg(model, 'update', it, pflg)
for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 2:训练开始========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in weight_init_types.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizer.update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss) if i % 100 == 0: print("===========" + "iteration:" + str(i) + "===========") for key in weight_init_types.keys(): loss = networks[key].loss(x_batch, t_batch) print(key + ":" + str(loss)) # 3.可视化========== markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(max_iterations) for key in weight_init_types.keys(): plt.plot(x,