def train(batch_size, iterate_num, learning_rate):
    """
    Get the appropriate network parameters (weights, biases) by gradient method.

    In the process of gradient method,
    training data are choosed randomly in each step(mini-batch gradient method).

    batch_size: data of this number are choosed from training data in each step
    iterate_num: the number of iteration for gradient method
    learning_rate: learning rate for gradient method
    """
    # get training data and test data(test data are not used below.)
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

    # initialized TwoLayerNet
    network = TwoLayerNet(28 * 28, 50, 10)  # each image has 28*28 pixels

    # losses in each step
    losses = []

    for i in range(iterate_num):
        # choose the training data for this step
        indices = np.random.choice(len(x_train), batch_size)
        x_train_batch = x_train[indices]
        t_train_batch = t_train[indices]

        # calculate the grad
        grads = network.numerical_gradient(x_train_batch, t_train_batch)

        # update the network parameters
        network.params['W1'] -= learning_rate * grads['W1']
        network.params['b1'] -= learning_rate * grads['b1']
        network.params['W2'] -= learning_rate * grads['W2']
        network.params['b2'] -= learning_rate * grads['b2']

        # record loss
        loss = network.loss(x_train_batch, t_train_batch)
        print('loss = {0}'.format(loss))
        losses.append(loss)

    return network, losses
Ejemplo n.º 2
0
# coding: utf-8
import sys, os

from ch04.two_layer_net import TwoLayerNet

sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
# from two_layer_net import TwoLayerNet

# 读入数据
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                  one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000  # 适当设定循环的次数
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
Ejemplo n.º 3
0
import numpy as np
from dataset.mnist import load_mnist
from ch04.two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize = True, one_hot_label = True)

train_loss_list = []

# Hyperparameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):

    # Get mini batch
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # Calcuate gradient
    grad = network.numerical_gradient(x_batch, t_batch)
    # grad = network.gradient(x_batch, t_batch) # Improved version

    # Update hyperparameters
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
Ejemplo n.º 4
0
#     # 计算梯度
#     grad = network.numerical_gradient(x_batch, t_batch)
#     # 按照梯度方向更新
#     for key in ('W1', 'b1', 'W2', 'b2'):
#         network.params[key] -= learn_num * grad[key]
#     # 计算损失函数
#     loss = network.loss(x_batch, t_batch)
#     train_loss.append(loss)
#     print(loss)
# print(train_loss)
# # 正向计算和反向计算图
# # 反向传播求导
# # 通过计算图,节点位置为: A → (x2) → 2A → (x1.1) → 2.2A 正向传播
# # 反向:             2.2 ← (x2) ← 1.1 ← (x1.1) ← 1 反向传播主要是,在结果处设置1,然后反向求,得到的2.2含义是,当A增加一个微小值ETA,则总数会增加2.2ETA,这就是导数

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000  # 适当设定循环的次数
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]