weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 2:訓練の開始========== for i in range(max_iterations): x_batch, t_batch = get_one_batch(train_x, train_y, batch_size=batch_size) for key in weight_init_types.keys(): grads = networks[key].gradient(x_batch, t_batch) optimizer.update(networks[key].params, grads) loss = networks[key].loss(x_batch, t_batch) train_loss[key].append(loss) if i % 100 == 0: print("===========" + "iteration:" + str(i) + "===========") for key in weight_init_types.keys(): loss = networks[key].loss(x_batch, t_batch) print(key + ":" + str(loss))
# ax.hist(data_non_zero, bins=bins_range, range=ran) # if idx != 0: # plt.yticks([], []) plt.tight_layout() plt.show() if __name__ == '__main__': mnist = MNIST('datasets\\mnist') train_x, train_y, test_x, test_y = mnist.load(normalize=True, image_flat=True, label_one_hot=False) # show sample images sample_train_x, sample_train_y = get_one_batch(train_x, train_y, batch_size=5) # show_imgs(sample_train_x.reshape(-1, 28, 28), sample_train_y) learning_rate = 0.05 train_acc_list = [] test_acc_list = [] train_loss_list = [] test_loss_list = [] network = TwoLayerNet(input_size=28 * 28, hidden_size=50, output_size=10) # print(network.params["W1"][0]) # show(network.params) epoch = 200 # train & evaluate for i in range(2000):
}, pool_param_2={ 'pool_h': 2, 'pool_stride': 2 }, hidden_size_1=120, hidden_size_2=84, output_size=10, weight_init_std=0.01) # for layer in network.layers.values(): # print(layer) # print(network.lossLayer) # print('****** Print structure without values: OK ******') train_x_batch, train_y_batch = get_one_batch(train_x, train_y, batch_size=10) show_structure(network, train_x_batch, train_y_batch) op = optimizer.Adam(lr=0.001) epoch = 100 for i in range(5000): train_x_batch, train_y_batch = get_one_batch(train_x, train_y, batch_size=30) grads = network.gradient(train_x_batch, train_y_batch) try: op.update(network.params, grads) except ZeroDivisionError as e: print('Handling run-time error:', e)