def __train(weight_init_std): normal_network = multi_layer_net_extend.MultiLayerNetExtend(input_size = 784 , hidden_size_list=[100, 100, 100, 100, 100] , output_size = 10 , weight_init_std=weight_init_std) bach_norm_network = multi_layer_net_extend.MultiLayerNetExtend(input_size = 784 , hidden_size_list=[100, 100, 100, 100, 100] , output_size = 10 , weight_init_std=weight_init_std , use_batchnorm=True) # <> optimizer_ = optimizer.SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] # 에폭당 반복횟수 : iter_per_epoch 만큼 반복해야 1에폭 iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 # 10억번 for i in range(100): # for i in range(1000000000): # 훈련데이터 중 일부를 무작위로 추출하여 사용 - 미니배치 단위 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # bach_norm_network 과 normal_network 을 한번씩 실행 for network in (bach_norm_network, normal_network): # 기울기 구하기 grads = network.gradient(x_batch, t_batch) # 기울기 최적화해서 업데이트 하기 optimizer_.update(network.params, grads) # 1에폭당 # 업데이트된 기울기로 훈련시 정확도 측정 if i % iter_per_epoch == 0: train_acc = normal_network.accuracy(x_train, t_train) bn_train_acc = bach_norm_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break # 훈련 결과 리턴 return train_acc_list, bn_train_acc_list
# ドロップアウト設定 ====================================== use_dropout = True dropout_ratio = 0.15 # ==================================================== # 正則化強度設定 ====================================== weight_decay_lambda = 0.005 # ================================================= network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda, use_dropout=use_dropout, dropout_ratio=dropout_ratio) optimizer = optimizer.SGD(learning_rate=0.01) # optimizer = optimizer.Momentum(learning_rate=0.01, momentum=0.9) # optimizer = optimizer.AdaGrad(learning_rate=0.01) # optimizer = optimizer.Adam() iters_num = 1000 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] accuracies_train = [] accuracies_test = [] plot_interval = 10 for i in range(iters_num):
train_x, train_y, test_x, test_y = x[:train_num, :], t[:train_num, :], x[ train_num:, :], t[train_num:, :] print('train_x.shape:', train_x.shape) print('train_y.shape:', train_y.shape) learning_rate = 0.01 train_acc_list = [] test_acc_list = [] train_loss_list = [] test_loss_list = [] network = TwoLayerNet(input_size=feature_count, hidden_size=10, output_size=2, weight_decay_lambda=0.0) op = optimizer.SGD(lr=0.01) epoch = 100 # # train & evaluate for i in range(10000): sample_train_x, sample_train_y = get_one_batch(train_x, train_y, batch_size=5) grads = network.gradient(sample_train_x, sample_train_y) # update parameters: mini-batch gradient descent op.update(network.params, grads) if i % epoch == 0: # calculate accuracy train_acc = network.accuracy(sample_train_x, sample_train_y) train_acc_list.append(train_acc) test_acc = network.accuracy(test_x, test_y) test_acc_list.append(test_acc)