def train(batch_size, iterate_num, learning_rate): """ Get the appropriate network parameters (weights, biases) by gradient method. In the process of gradient method, training data are choosed randomly in each step(mini-batch gradient method). batch_size: data of this number are choosed from training data in each step iterate_num: the number of iteration for gradient method learning_rate: learning rate for gradient method """ # get training data and test data(test data are not used below.) (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # initialized TwoLayerNet network = TwoLayerNet(28 * 28, 50, 10) # each image has 28*28 pixels # losses in each step losses = [] for i in range(iterate_num): # choose the training data for this step indices = np.random.choice(len(x_train), batch_size) x_train_batch = x_train[indices] t_train_batch = t_train[indices] # calculate the grad grads = network.numerical_gradient(x_train_batch, t_train_batch) # update the network parameters network.params['W1'] -= learning_rate * grads['W1'] network.params['b1'] -= learning_rate * grads['b1'] network.params['W2'] -= learning_rate * grads['W2'] network.params['b2'] -= learning_rate * grads['b2'] # record loss loss = network.loss(x_train_batch, t_train_batch) print('loss = {0}'.format(loss)) losses.append(loss) return network, losses
# coding: utf-8 import sys, os from ch04.two_layer_net import TwoLayerNet sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist # from two_layer_net import TwoLayerNet # 读入数据 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 # 适当设定循环的次数 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
import numpy as np from dataset.mnist import load_mnist from ch04.two_layer_net import TwoLayerNet (x_train, t_train), (x_test, t_test) = \ load_mnist(normalize = True, one_hot_label = True) train_loss_list = [] # Hyperparameters iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) for i in range(iters_num): # Get mini batch batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # Calcuate gradient grad = network.numerical_gradient(x_batch, t_batch) # grad = network.gradient(x_batch, t_batch) # Improved version # Update hyperparameters for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key]
# # 计算梯度 # grad = network.numerical_gradient(x_batch, t_batch) # # 按照梯度方向更新 # for key in ('W1', 'b1', 'W2', 'b2'): # network.params[key] -= learn_num * grad[key] # # 计算损失函数 # loss = network.loss(x_batch, t_batch) # train_loss.append(loss) # print(loss) # print(train_loss) # # 正向计算和反向计算图 # # 反向传播求导 # # 通过计算图,节点位置为: A → (x2) → 2A → (x1.1) → 2.2A 正向传播 # # 反向: 2.2 ← (x2) ← 1.1 ← (x1.1) ← 1 反向传播主要是,在结果处设置1,然后反向求,得到的2.2含义是,当A增加一个微小值ETA,则总数会增加2.2ETA,这就是导数 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 # 适当设定循环的次数 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask]