Exemple #1
0
def __train(lr, weight_decay, epocs=50, verbose=False):  # 减少epoch的数量

    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10,
                            weight_decay_lambda=weight_decay)

    optimizer = SGD(lr)

    iter_per_epoch = max(train_size / mini_batch_size, 1)
    current_iter = 0
    current_epoch = 0

    train_loss_list = []
    train_acc_list = []
    val_acc_list = []

    for i in range(int(epochs * iter_per_epoch)):
        batch_mask = np.random.choice(train_size, mini_batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if verbose:
            print("train loss:" + str(loss))

        if current_iter % iter_per_epoch == 0:
            current_epoch += 1

            train_acc = network.accuracy(x_train, t_train)
            val_acc = network.accuracy(x_val, t_val)
            train_acc_list.append(train_acc)
            val_acc_list.append(val_acc)

            if verbose:
                print("=== epoch:" + str(current_epoch) + ", train acc:" +
                      str(train_acc) + ", validation acc:" + str(val_acc) +
                      " ===")
        current_iter += 1

    return val_acc_list, train_acc_list
# 慣性
momentum = 0.9

train_loss_list = []
accuracies_train = []
accuracies_test = []

plot_interval=10

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    d_batch = d_train[batch_mask]

    # 勾配
    grad = network.gradient(x_batch, d_batch)
    if i == 0:
        v = {}
    for key in ('W1', 'W2', 'W3', 'b1', 'b2', 'b3'):
        if i == 0:
            v[key] = np.zeros_like(network.params[key])
        v[key] = momentum * v[key] - learning_rate * grad[key]
        network.params[key] += v[key]

        loss = network.loss(x_batch, d_batch)
        train_loss_list.append(loss)
        
    if (i + 1) % plot_interval == 0:
        accr_test = network.accuracy(x_test, d_test)
        accuracies_test.append(accr_test)        
        accr_train = network.accuracy(x_batch, d_batch)
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grads = network.gradient(x_batch, t_batch)
    optimizer.update(network.params, grads)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) +
              ", test acc:" + str(test_acc))

        epoch_cnt += 1
        if epoch_cnt >= max_epochs:
            break
Exemple #4
0
import numpy as np
from multi_layer_net import MultiLayerNet

# 生成数据
x = np.random.randn(1, 784)
t = np.random.randint(0, 10, (1, ))

# 创建网络
network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100],
                        output_size=10,
                        use_batchnorm=True)

grad_numerical = network.numerical_gradient(x, t)
grad_backprop = network.gradient(x, t)

for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ':' + str(diff))

# break symmetry
# x = np.random.randn(3, 2)
# t = np.random.randint(0, 10, (3, ))

# # 创建网络
# network = TwoLayerNet(input_size=2, hidden_size=3, output_size=10)
# grad = network.gradient(x, t)
# for key in ('W1', 'b1', 'W2', 'b2'):
#         network.params[key] -= 0.1 * grad[key]
    normalize=True, one_hot_label=True)
iters_num = 2000
train_size = x_train.shape[0]
batch_size = 100

train_loss = {}

for key, weight_type in weight_init_types.items():
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
                            output_size=10, weight_init_std=weight_type)
    optimizer = SGD()
    train_loss[key] = []

    for i in range(iters_num):
        mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[mask]
        t_batch = t_train[mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)
        train_loss[key].append(network.loss(x_batch, t_batch))

markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
x = np.arange(iters_num)
for key in weight_init_types.keys():
    plt.plot(x, train_loss[key], marker=markers[key], markevery=100, label=key)
plt.xlabel("iterations")
plt.ylabel("loss")
plt.ylim(0, 2.5)
plt.legend()
plt.show()
Exemple #6
0
optimizer['momentu'] = op.Momentu()
# 5层神经网络,对最后的一层神经元有个数要求
network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 10],
                        output_size=10)
train_loss = []  #
itetrain_accuracy = []
train_accuracy = []
test_accuracy = []

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = images_train[batch_mask]
    t_batch = labels_train[batch_mask]  #在样本中随机选择
    grads = dict()
    grads['dW'], grads['db'] = network.gradient(x_batch, t_batch)
    optimizer['sgd'].update(network.W, grads['dW'])  #多层过程直接拿掉了param,而直接使用W,b
    optimizer['sgd'].update(network.b, grads['db'])
    loss_value = network.loss(x_batch, t_batch)
    train_loss.append(loss_value)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(images_train, labels_train)
        test_acc = network.accuracy(images_test, labels_test)
        train_accuracy.append(train_acc)
        test_accuracy.append(test_acc)
        epoch_cnt += 1
        if epoch_cnt >= max_epoch:
            break
x = np.arange(len(train_accuracy))
plot.plot(x, train_accuracy, test_accuracy)