def train(batch_size, iterate_num, learning_rate):
    """
    Get the appropriate network parameters (weights, biases) by gradient method.

    In the process of gradient method,
    training data are choosed randomly in each step(mini-batch gradient method).

    batch_size: data of this number are choosed from training data in each step
    iterate_num: the number of iteration for gradient method
    learning_rate: learning rate for gradient method
    """
    # get training data and test data(test data are not used below.)
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

    # initialized TwoLayerNet
    network = TwoLayerNet(28 * 28, 50, 10)  # each image has 28*28 pixels

    # losses in each step
    losses = []

    for i in range(iterate_num):
        # choose the training data for this step
        indices = np.random.choice(len(x_train), batch_size)
        x_train_batch = x_train[indices]
        t_train_batch = t_train[indices]

        # calculate the grad
        grads = network.numerical_gradient(x_train_batch, t_train_batch)

        # update the network parameters
        network.params['W1'] -= learning_rate * grads['W1']
        network.params['b1'] -= learning_rate * grads['b1']
        network.params['W2'] -= learning_rate * grads['W2']
        network.params['b2'] -= learning_rate * grads['b2']

        # record loss
        loss = network.loss(x_train_batch, t_train_batch)
        print('loss = {0}'.format(loss))
        losses.append(loss)

    return network, losses
Exemple #2
0
(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize = True, one_hot_label = True)

train_loss_list = []

# Hyperparameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):

    # Get mini batch
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # Calcuate gradient
    grad = network.numerical_gradient(x_batch, t_batch)
    # grad = network.gradient(x_batch, t_batch) # Improved version

    # Update hyperparameters
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    # Log the learning progress
    loss = network.loss(x_train, t_train)
    train_loss_list.append(loss)