def train(train_x, train_label, weight_init_std, learning_rate, max_epoch,
          batch_size):
    """
    构造带有BN层的神经网络和不带BN层的神经网络。测试BN层的效果
    :param train_x:
    :param train_label:
    :param weight_init_std: 参数初始化方式
    :param learning_rate:
    :param max_epoch: 测试的epoch数
    :param batch_size:
    :return:
    """
    bn_network = MultiLayerNetExtend(input_size=784,
                                     hidden_size_list=[100, 100, 100, 100, 100],
                                     output_size=10,
                                     weight_init_std=weight_init_std,
                                     use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_init_std,
                                  use_batchnorm=False)

    optimizer = SGD(learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    train_size = train_x.shape[0]
    iter_per_epoch = max(train_size / batch_size, 1)

    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    epoch_cnt = 0
    left = 0
    iteration = int(iter_per_epoch * max_epoch)

    for i in range(iteration):
        # 获取一个batch的数据,更新left值
        batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask,
                                               batch_size, left)

        # 两个网络分别更新
        for _network in (bn_network, network):
            grads = _network.gradient(batch_x, batch_label)
            optimizer.update(_network.params, grads)

        # 每一个epoch记录一个在测试集上的准确率
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(train_x, train_label)
            bn_train_acc = bn_network.accuracy(train_x, train_label)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - "
                  + str(bn_train_acc))
            epoch_cnt += 1

    return train_acc_list, bn_train_acc_list
def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True)

    network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100],output_size=10, weight_init_std=weight_init_std)

    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return train_acc_list, bn_train_acc_list
예제 #3
0
def main():
    (train_x, train_label), (test_x, test_label) = load_mnist()
    # 为了再现过拟合,减少学习数据
    train_x = train_x[: 300]
    train_label = train_label[: 300]

    # 设定是否使用Dropuout,以及比例 ========================
    use_dropout = False
    # use_dropout = True
    dropout_ratio = 0.2

    network = MultiLayerNetExtend(
        input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
        output_size=10, use_dropout=use_dropout, dropout_ratio=dropout_ratio)

    trainer = Trainer(network, train_x, train_label, test_x, test_label,
                      epochs=301, mini_batch_size=100, optimizer='sgd',
                      optimizer_param={'lr': 0.01}, verbose=True)

    trainer.train()

    train_acc_list = trainer.train_acc_list
    test_acc_list = trainer.test_acc_list

    draw(train_acc_list, test_acc_list)
예제 #4
0
def main():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      one_hot_label=True)
    x_train = x_train[:300]
    t_train = t_train[:300]

    max_epochs = 201
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.01

    use_dropout = True
    dropout_ratio = 0.2

    train_acc_list = []
    test_acc_list = []

    network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100, 100],
        output_size=10,
        use_dropout=use_dropout,
        dropout_ration=dropout_ratio)
    optimizer = SGD(lr=learning_rate)

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)

            print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \
                    ", test acc:" + str(test_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    markers = {'train': 'o', 'test': 's'}
    x = np.arange(max_epochs)
    plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
    plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.show()
def __train(weight_init_std):
    bn_net = MultiLayerNetExtend(input_size=784,
                                 hidden_size_list=[100, 100, 100, 100],
                                 output_size=10,
                                 weight_init_std=weight_init_std,
                                 use_batchnorm=True)
    net = MultiLayerNetExtend(input_size=784,
                              hidden_size_list=[100, 100, 100, 100],
                              output_size=10,
                              weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _net in (bn_net, net):
            grads = _net.gradient(x_batch, t_batch)
            optimizer.update(_net.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = net.accuracy(x_train, t_train)
            bn_train_acc = bn_net.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)
            print("EPOCH: {0} | NET_ACC({1}) - BN_NET_ACC({2})".format(
                epoch_cnt, train_acc, bn_train_acc))
            epoch_cnt += 1
            if epoch_cnt >= max_epoches:
                break
    return train_acc_list, bn_train_acc_list
예제 #6
0
def __train(lr, weight_decay, epocs=50):
    network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100, 100],
        output_size=10,
        weight_decay_lambda=weight_decay)
    trainer = Trainer(network,
                      x_train,
                      t_train,
                      x_val,
                      t_val,
                      epochs=epocs,
                      mini_batch_size=100,
                      optimizer='sgd',
                      optimizer_param={'lr': lr},
                      verbose=False)
    trainer.train()
    return trainer.test_acc_list, trainer.train_acc_list
예제 #7
0
    def __init__(self,
                 idx,
                 x_train,
                 t_train,
                 x_test,
                 t_test,
                 optimizer,
                 weight_decay_lambda=0.0):
        self.idx = idx
        # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10,
        #                             weight_decay_lambda=weight_decay_lambda)
        self.layer = MultiLayerNetExtend(
            input_size=784,
            hidden_size_list=[50, 50, 50],
            output_size=10,
            weight_decay_lambda=weight_decay_lambda,
            use_dropout=False,
            dropout_ration=0.0,
            use_batchnorm=False)
        self.optimizer = optimizer
        self.rec_param = np.array([{} for i in range(self.n)])

        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test

        self.z_vec = np.zeros(self.n)
        self.z_vec[idx] = 1
        self.rec_z = np.zeros((self.n, self.n))

        self.AdjG = np.zeros(self.n)  #require to be {0 or 1} to all arguments
        self.WeiG = np.zeros(self.n)
        if np.all(self.WeiG_init == 0):
            self.makeWeiGraph(self.AdjG_init)
        else:
            self.WeiG = self.WeiG_init[self.idx]
        self.makeAdjGraph()

        self.train_loss = 0
        self.train_acc = 0
        self.test_acc = 0
예제 #8
0
def __train(lr, weight_decay_lambda, epoch_num=120):
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100] * 5,
                                  output_size=10,
                                  activation='ReLu',
                                  weight_init_std='ReLu',
                                  weight_decay_lambda=weight_decay_lambda,
                                  use_BatchNormalization=False,
                                  use_weight_decay=True)
    # 注意下面传入的不是测试集而是验证集
    trainer = Trainer(network=network,
                      x_train=x_train,
                      t_train=t_train,
                      x_test=x_val,
                      t_test=t_val,
                      epochs=epoch_num,
                      mini_batch_num=100,
                      optimizer='SGD',
                      optimizer_params={'lr': lr})
    trainer.train()
    return trainer.train_acc_list, trainer.test_acc_list  # 返回一次实验测试集和验证集的精度
예제 #9
0
def main():
    # 读入数据
    (train_x, train_label), _ = load_mnist(one_hot_label=True)

    # 构造神经网络
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100],
                                  output_size=10,
                                  use_batchnorm=True)

    # 仅用一个训练样本来测试
    batch_x = train_x[:1]
    batch_label = train_label[:1]

    # 用反向传播和数值方法分别计算梯度
    grad_backprop = network.gradient(batch_x, batch_label)
    grad_numerical = network.numerical_gradient(batch_x, batch_label)

    # 比较两种方法的计算结果
    for key in grad_numerical.keys():
        diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
        print(key + ":" + str(diff))
def __trainning(weight_init_std, index):

    epoch_num = 0
    acc_train_list = []
    acc_train_BN_list = []

    input_size = 784
    hidden_size_list = [100] * 5
    output_size = 10
    activation = 'ReLu'
    weight_decay_lambda = 1

    network = MultiLayerNetExtend(input_size=input_size,
                                  hidden_size_list=hidden_size_list,
                                  output_size=output_size,
                                  activation=activation,
                                  weight_init_std=weight_init_std,
                                  weight_decay_lambda=weight_decay_lambda,
                                  use_BatchNormalization=False,
                                  use_weight_decay=False)
    network_BN = MultiLayerNetExtend(input_size=784,
                                     hidden_size_list=hidden_size_list,
                                     output_size=output_size,
                                     activation=activation,
                                     weight_init_std=weight_init_std,
                                     weight_decay_lambda=weight_decay_lambda,
                                     use_BatchNormalization=True,
                                     use_weight_decay=False)

    # 开始训练,训练分下面几步
    # 1. 从样本中随机挑选出batch_size个样本
    # 2. 前向传播、反向传播、获取梯度
    # 3. 更新梯度
    # 4. 重复1 ~ 2直至循环结束
    for i in range(iter_num):

        print('现在是W' + str(index) + '的第' + str(i) + '轮循环')

        # 挑选mini_batch
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 获取梯度,更新梯度
        for network_ in (network, network_BN):
            grads = network_.gradient(x_train, t_train, use_weight_decay=False)
            # optimizer_SGD.update(params = network_.params, grads = grads)
            if network_ == network:
                optimizer.update(params=network_.params, grads=grads)
            else:
                optimizer_BN.update(params=network_.params, grads=grads)

        # 每一个epoch就计算两个网络的精度然后装进数组
        if i % iter_num_per_epch == 0:
            network_acc = network.accuracy(x_batch, t_batch)
            network_BN_acc = network_BN.accuracy(x_batch, t_batch)
            acc_train_list.append(network_acc)
            acc_train_BN_list.append(network_BN_acc)

            epoch_num += 1

            if epoch_num >= epoch_cnt_max:
                break

    return acc_train_list, acc_train_BN_list
예제 #11
0
# p.213 그림 6-18을 그리세요.
# Batch Normalization을 사용하는 신경망과 사용하지 않는 신경망의 학습 속도 비교
import numpy as np
import matplotlib.pyplot as plt

from ch06.e02_sgd import Sgd
from common.multi_layer_net_extend import MultiLayerNetExtend

from common.optimizer import Momentum, AdaGrad, Adam
from dataset.mnist import load_mnist

# 배치 정규화를 사용하는 신경망
bn_neural_net = MultiLayerNetExtend(input_size=784,
                                    hidden_size_list=[100, 100, 100, 100, 100],
                                    output_size=10,
                                    weight_init_std=0.01,
                                    use_batchnorm=True)
# 배치 정규화를 사용하지 않는 신경망
neural_net = MultiLayerNetExtend(input_size=784,
                                 hidden_size_list=[100, 100, 100, 100, 100],
                                 output_size=10,
                                 weight_init_std=0.01,
                                 use_batchnorm=False)

# 미니 배치를 20번 학습시키면서, 두 신경망에서 정확도(accuracy)를 기록
# -> 그래프

(X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True)
# 학습 시간을 줄이기 위해서 학습 데이터의 개수를 줄임.
X_train = X_train[:1000]  # 데이터 1000개만 사용
예제 #12
0
from ch06.ex02_sgd import Sgd
from common.multi_layer_net import MultiLayerNet
from common.multi_layer_net_extend import MultiLayerNetExtend
from dataset.mnist import load_mnist

np.random.seed(110)

# 데이터 준비
(X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True)

# 신경망 생성
wd_rate = 0
neural_net = MultiLayerNetExtend(input_size=784,
                                 hidden_size_list=[100, 100, 100, 100, 100],
                                 output_size=10,
                                 weight_decay_lambda=wd_rate,
                                 use_dropout=True,
                                 dropout_ration=0.15)
# weight_decay_lambda: 가중치 감소에 사용할 상수 값

# 학습 데이터 개수를 300개로 제한 -> overfitting 만들기 위해서
X_train = X_train[:300]
Y_train = Y_train[:300]
X_test = X_test[:300]  # 실험 시간 줄이기 위해서
Y_test = Y_test[:300]

epochs = 200  # 1 에포크: 모든 학습 데이터가 1번씩 학습된 경우
mini_batch_size = 100  # 1번 forward에 보낼 데이터 샘플 개수
train_size = X_train.shape[0]
iter_per_epoch = int(max(train_size / mini_batch_size, 1))
# 학습하면서 학습/테스트 데이터의 정확도를 각 에포크마다 기록
예제 #13
0
print(x)
mask = x > 0.5
print(mask)
print(x *
      mask)  # false = 0, false * x = 0; true = 1, returns its original value

np.random.seed(110)

# 데이터 준비
(X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True)

# 신경망 생성
dropout_ratio = 0.1
neural_net = MultiLayerNetExtend(input_size=784,
                                 hidden_size_list=[100, 100, 100, 100, 100],
                                 output_size=10,
                                 use_dropout=True,
                                 dropout_ration=dropout_ratio)

X_train = X_train[:500]
Y_train = Y_train[:500]
X_test = X_test[:500]
Y_test = Y_test[:500]

# Initial Idea
# # drop out 구현 (Affine > Batch > ReLU > Dropout 이니까 Relu값을 가지고 dropout을 구현한다)
#

train_size = X_test.shape[0]
epochs = 200
mini_batch_size = 100
mask_train = np.random.choice(x_train.shape[0], 1000)
mask_test = np.random.choice(x_test.shape[0], 1000)
x_train = x_train[mask_train]
t_train = t_train[mask_train]
x_test = x_test[mask_test]
t_test = t_test[mask_test]
train_size = x_train.shape[0]
batch_size = 128

optimizer = SGD(lr=0.01)

network_st = MultiLayerNet(input_size=784,
                           hidden_size_list=[100, 100, 100],
                           output_size=10)
network_bn = MultiLayerNetExtend(input_size=784,
                                 hidden_size_list=[100, 100, 100],
                                 output_size=10,
                                 use_batchnorm=True)

loss_list = {
    "network_st": [],
    "network_bn": [],
    "network_st_test": [],
    "network_bn_test": []
}

for i in range(iter_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    x_test_batch = x_test[batch_mask]
    t_test_batch = t_test[batch_mask]
예제 #15
0
#念のため、データの次元数の確認
print("x_train,t_train dimension_number")
print(x_train.shape,t_train.shape)
print("x_test,t_test dimension_number")
print(x_test.shape,t_test.shape)

# 高速化のため訓練データの削減
#x_train = x_train[:56678]
#t_train = t_train[:56678]

#optimizerを選択
optimizers = Adam()

network = MultiLayerNetExtend(input_size=122, hidden_size_list=[1000,1000], output_size=2,
                            activation='relu', weight_init_std=0.01,
                            weight_decay_lambda=0, use_dropout=True, dropout_ration=0.5,
                            use_batchnorm=False)

#ハイパーパラメータ
iters_num = 73554 #イテレーションの回数
batch_size = 512
learning_rate = 0.00001 #学習率

train_size = x_train.shape[0] #x_trainの行数を当てはめる

#格納するリストの作成
train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch_float = max(train_size / batch_size, 1) #1エポック回すのに何イテレーションかかるか
예제 #16
0
    def __init__(self,
                 idx,
                 x_train,
                 t_train,
                 x_test,
                 t_test,
                 optimizer,
                 weight_decay_lambda=0.0):
        """各Agentの初期状態変数
        Args:
            idx         : Agentのインデックス
            layer       : Agent内のニューラルネットワークの層
            optimizer   : 最適化を行うアルゴリズムの選択
            rec_param   : 隣接するエージェントから受け取るパラメータ
            z_vec       : 左の固有ベクトル
            rec_z       : 隣接するエージェントから受け取る左固有ベクトル
            AdjG        : 隣接行列??
            WeiG        : 重み行列??
        """

        self.idx = idx
        # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10,
        #                             weight_decay_lambda=weight_decay_lambda)
        self.layer = MultiLayerNetExtend(
            input_size=784,
            hidden_size_list=[
                500, 400, 300, 300, 200, 200, 100, 100, 100, 100, 100, 50, 50
            ],
            output_size=10,
            weight_decay_lambda=weight_decay_lambda,
            use_dropout=True,
            dropout_ration=0.3,
            use_batchnorm=True)
        #dropout_ratio=0.03, use_batchnorm=True, hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50] weightdecay=0.01 → 0.9428
        #hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50], output_size=10,weight_decay_lambda=weight_decay_lambda,use_dropout=True, dropout_ration=0.05, use_batchnorm=True 一番いい
        #hidden_size_list=[100,100,100,100,100] weightdecay=0.3, dropout_ration=0.3

        self.optimizer = optimizer
        self.rec_param = np.array([{} for i in range(self.n)])
        self.send_param = np.array([{} for i in range(self.n)])

        #Initialize
        self.rec_param[self.idx] = self.layer.params.copy()
        self.send_param[self.idx] = self.layer.params.copy()

        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test

        self.AdjG = np.zeros(self.n)  #require to be {0 or 1} to all arguments
        self.WeiG = np.zeros(self.n)
        if np.all(self.WeiG_init == 0):
            self.makeWeiGraph(self.AdjG_init)
        else:
            self.WeiG = self.WeiG_init[self.idx]
        self.makeAdjGraph()

        self.train_loss = 0
        self.train_acc = 0
        self.test_acc = 0
from ch06.ex02_sgd import Sgd
from common.multi_layer_net_extend import MultiLayerNetExtend
from common.optimizer import Momentum
from dataset.mnist import load_mnist
import numpy as np
import matplotlib.pyplot as plt

#p.213의 그림 그려보기
# Batch Normalization을 사용하는 신경망과 사용하지 않는 신경망의 학습 속도 비교

np.random.seed(110)

# 배치 정규화를 사용하는 신경망
bn_neural_net = MultiLayerNetExtend(
    input_size=784,
    hidden_size_list=[100, 100, 100, 100, 100],  #뉴런 100개짜리 계층 5개 생성
    output_size=10,
    weight_init_std=0.3,  # [W1, W2, W3, W4, W5]과 ~~~~ 했을 때, std = 0.01
    use_batchnorm=True)

# 배치 정규화를 사용하지 않는 신경망
neural_net = MultiLayerNetExtend(
    input_size=784,
    hidden_size_list=[100, 100, 100, 100, 100],  #뉴런 100개짜리 계층 5개 생성
    output_size=10,
    weight_init_std=0.3,  # [W1, W2, W3, W4, W5]과 ~~~~ 했을 때, std = 0.01
    use_batchnorm=False)

# MNIST 데이터를 불러온다
(X_train, Y_train), (X_test, Y_test) = load_mnist(one_hot_label=True)

# 학습시간을 줄이기 위해서 학습 데이터의 개수를 줄임
예제 #18
0
use_dropout = True
dropout_ratio = np.linspace(0, 0.2, 5)
weight_decay = np.geomspace(0.001, 0.2, num=5)
learning_rate = np.geomspace(0.001, 0.2, num=5)
best_hp = []

# Training begins
with progressbar.ProgressBar(max_value=125) as bar:
    for i in range(0, len(dropout_ratio)):
        for j in range(0, len(weight_decay)):
            for k in range(0, len(learning_rate)):
                network = MultiLayerNetExtend(
                    input_size=784,
                    hidden_size_list=[100, 100, 100, 100],
                    output_size=10,
                    activation='sigmoid',
                    weight_init_std='xavier',
                    weight_decay_lambda=weight_decay[j],
                    use_dropout=use_dropout,
                    dropout_ration=dropout_ratio[i])
                trainer = Trainer(network,
                                  x_train,
                                  t_train,
                                  x_test,
                                  t_test,
                                  epochs=5,
                                  mini_batch_size=500,
                                  optimizer='adam',
                                  optimizer_param={'lr': learning_rate[k]},
                                  verbose=False)
                trainer.train()
예제 #19
0
from common.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 为了再现过拟合,减少学习数据
x_train = x_train[:300]
t_train = t_train[:300]

# 设定是否使用Dropuout,以及比例 ========================
use_dropout = True  # 不使用Dropout的情况下为False
dropout_ratio = 0.2
# ====================================================

network = MultiLayerNetExtend(input_size=784,
                              hidden_size_list=[100, 100, 100, 100, 100, 100],
                              output_size=10,
                              use_dropout=use_dropout,
                              dropout_ration=dropout_ratio)
trainer = Trainer(network,
                  x_train,
                  t_train,
                  x_test,
                  t_test,
                  epochs=301,
                  mini_batch_size=100,
                  optimizer='sgd',
                  optimizer_param={'lr': 0.01},
                  verbose=True)
trainer.train()

train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
예제 #20
0
output_size = 10
activation = 'ReLu'  # 每一层之间的激活函数
weight_init_std = 'ReLu'  # 根据激活函数以初始化权值分布
weight_decay_lambda = 1
epoch_num = 0

train_acc_list = []
test_acc_list = []
train_acc_weight_decay_list = []
test_acc_weight_decay_list = []

# 构造网络对象
network = MultiLayerNetExtend(input_size=input_size,
                              hidden_size_list=hidden_size_list,
                              output_size=output_size,
                              activation=activation,
                              weight_init_std=weight_init_std,
                              weight_decay_lambda=weight_decay_lambda,
                              use_BatchNormalization=False,
                              use_weight_decay=False)
network_weight_decay = MultiLayerNetExtend(
    input_size=input_size,
    hidden_size_list=hidden_size_list,
    output_size=output_size,
    activation=activation,
    weight_init_std=weight_init_std,
    weight_decay_lambda=weight_decay_lambda,
    use_BatchNormalization=False,
    use_weight_decay=True)

# 开始训练,训练分下面几步
# 1. 从样本中随机挑选出batch_size个样本