예제 #1
0
import sys
sys.path.append('..')
from common.optimizer import SGD
from common.trainer import Trainer
import numpy as np
from dataset import spiral
import matplotlib.pyplot as plt
from two_layer_net import TwoLayerNet

# Hyper pramerter設定
max_epoch = 300
batch_size = 30
hidden_size = 10
learning_rate = 1.0

# データの読み込み、モデルとオプティマイザの生成
x, t = spiral.load_data()
model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
optimizer = SGD(lr=learning_rate)

trainer = Trainer(model, optimizer)
trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
trainer.plot()
예제 #2
0
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from common.optimizer import *

# 0:MNISTデータの読み込み==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

# 1:実験の設定==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10)
    train_loss[key] = []

# 2:訓練の開始==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)
# 오버피팅을 재현하기 위해 학습 데이터 수를 줄임
x_train = x_train[:300]
t_train = t_train[:300]

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10)
optimizer = SGD(lr=0.01)  # 학습률이 0.01인 SGD로 매개변수 갱신

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
def main():
    
    # (1) ハイパーパラメータの設定
    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0

    # (2) データの読み込み、モデルとオプティマイザの生成
    x, t = spiral.load_data()
    model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
    optimizer = SGD(lr=learning_rate)

    # 学習で使用する変数
    data_size = len(x)
    max_iters = data_size // batch_size
    total_loss = 0
    loss_count = 0
    loss_list = []

    for epoch in range(max_epoch):
        # (3) データのシャッフル
        idx = np.random.permutation(data_size)
        x = x[idx]
        t = t[idx]

        for iters in range(max_iters):
            batch_x = x[iters*batch_size:(iters+1)*batch_size]
            batch_t = t[iters*batch_size:(iters+1)*batch_size]

            # (4) 勾配を求め、パラメータを更新
            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)

            total_loss += loss
            loss_count += 1

            # (5) 定期的に学習経過を出力
            if (iters + 1) % 10 == 0:
                avg_loss = total_loss / loss_count
                print(f"| epoch {epoch+1} | iter {iters+1} / {max_iters} | loss {avg_loss}")
                loss_list.append(avg_loss)
                total_loss, loss_count = 0, 0
        
    print(f"loss_list: \n{loss_list}")

    # 学習結果のプロット
    plt.plot(np.arange(len(loss_list)), np.asarray(loss_list))
    plt.xlabel('iterations (x10)')
    plt.ylabel('loss')
    plt.show()

    # 境界領域のプロット
    h = 0.001
    x_min, x_max = x[:, 0].min() - 0.1, x[:, 0].max() + 0.1
    y_min, y_max = x[:, 1].min() - 0.1, x[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    X = np.c_[xx.ravel(), yy.ravel()]
    score = model.predict(X)
    predict_cls = np.argmax(score, axis=1)
    Z = predict_cls.reshape(xx.shape)
    plt.contourf(xx, yy, Z)
    plt.axis('off')

    # データ点のプロット
    x, t = spiral.load_data()
    N = 100
    CLS_NUM = 3
    markers = ['o', 'x', '^']
    for i in range(CLS_NUM):
       plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
    plt.show()
x_train_split = np.split(x_train, n)
t_train_split = np.split(t_train, n)

max_epochs = 101
each_train_size = x_train_split[0].shape[0]
batch_size = min(100, each_train_size)

Agent.n = n
Agent.maxdeg, Agent.AdjG_init = maxdeg, Gadj
Agent.train_size, Agent.batch_size = each_train_size, batch_size

weight_decay_lambda = 0

agents = [
    Agent(idx, x_train_split[idx], t_train_split[idx], x_test, t_test,
          SGD(lr=lambda s: 0.01), weight_decay_lambda) for idx in range(n)
]

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(each_train_size / batch_size, 1)
epoch_cnt = 0

#####################

grad_numerical = agents[0].degub_numericalGrad()
grad_backprop = agents[0].debug_backpropGrad()

### 数値勾配と誤差逆伝搬により求めた勾配が一致していることを確認します.
예제 #6
0
    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout


if __name__ == "__main__":
    from common.layers import Affine
    from common.activation import Sigmoid, Softmax
    from common.loss import cross_entropy_error
    from common.optimizer import SGD
    import numpy as np
    from common.dataset import load_data

    _model = NeuralNetworkModel()
    _model.add_layer(Affine(2, 4))
    _model.add_layer(Sigmoid())
    _model.add_layer(Affine(4, 3))
    # _model.add_layer(Softmax())
    _model.add_optimizer(SGD(), cross_entropy_error)

    _x, _t = load_data(100, 3)

    for i in range(5):
        _loss = _model.forward(_x, _t)
        print(np.sum(_loss))
        _model.backward()
        _model.optimizer.update(_model.params, _model.grads)
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD


# 0. MNIST 데이터 읽기==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000 # 2000번 반복


# 1. 실험용 설정==========
weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
# 표준 편차 : 0.01
optimizer = SGD(lr=0.01) # 학습률 0.01


networks = {}
train_loss = {}
for key, weight_type in weight_init_types.items():
    networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
                                  output_size=10, weight_init_std=weight_type)
    # 출력은 10개
    train_loss[key] = []


# 2. 훈련 시작==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
lr = 20.0
max_epoch = 40
max_grad = 0.25
dropout = 0.5

#学習データの読み込み
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_val, _, _ = ptb.load_data('val')
corpus_test, _, _ = ptb.load_data('test')

vocab_size = len(word_to_id)
xs = corpus[:-1] #
ts = corpus[1:] #うまくマッチングさせる

model = BetterRnnlm(vocab_size,wordvec_size,hidden_size,dropout)
optimizer = SGD()
trainer = RnnlmTrainer(model,optimizer)

best_ppl = float('inf')
for epoch in range(max_epoch):
    trainer.fit(xs,ts,max_epoch=1,batch_size=batch_size,time_size=time_size,max_grad=max_grad)
    model.reset_state()
    ppl = eval_perplexity(model,corpus_val)
    print('valid preplexity: ',ppl)

    if best_ppl > ppl:
        best_ppl = ppl
        model.save_params()
    else:
        lr /= 4.0
        optimizer.lr = lr
def main():
    # ハイパーパラメータの設定
    batch_size = 10
    wordvec_size  = 100
    hidden_size  = 500
    time_size = 5 # Truncated BPTTの展開する時間サイズ
    lr = 0.1
    max_epoch = 100

    # 学習データセットの読み込み
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size  =1000
    corpus = corpus[:corpus_size]
    vocab_size = int(max(corpus) + 1)

    print("corpus: ", corpus)
    print("corpus size: ", len(corpus))

    # 入力
    xs = corpus[:-1]
    ts = corpus[1:] # 教師ラベル = 入力の次の単語ID
    print("corpus size: %d, vocabulary size: %d" % (corpus_size, vocab_size))
    data_size = len(xs)
    print("data_size: ", data_size)

    # 学習時に使用する変数
    max_iters = data_size // (batch_size * time_size)
    time_idx = 0
    total_loss = 0
    loss_count  =0
    ppl_list = []

    # モデルの生成
    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    # (1) ミニバッチの各サンプルの読み込み開始位置を計算
    jump = (corpus_size - 1) // batch_size # e.g. corpus_size=1000, batch_size=20 -> (1000-1)//20 -> 49
    print("jump: ", jump)
    offsets = [i * jump for i in range(batch_size)] # batch_size=20, jump=49 -> [0, 49, 98, ..., 931]
    print("offsets: ", offsets)

    # 学習
    for epoch in range(max_epoch):
        for iter in range(max_iters):

            # (2) ミニバッチの取得
            batch_x = np.empty((batch_size, time_size), dtype=np.int32)
            batch_t = np.empty((batch_size, time_size), dtype=np.int32)
            for t in range(time_size):
                for i, offset in enumerate(offsets):
                    print(f"offset: {offset}, time_idx: {time_idx} -> {(offset + time_idx) % data_size}")
                    batch_x[i, t] = xs[(offset + time_idx) % data_size]
                    batch_t[i, t] = ts[(offset + time_idx) % data_size]
                time_idx += 1 # 0リセットされないので、iterループ毎にcorpusのoffsetからのズレを変更している.
            
            # 勾配を求め、パラメータを更新
            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)
            total_loss += loss
            loss_count += 1

        # (3) エポック毎にパープレキシティの評価
        ppl = np.exp(total_loss / loss_count)
        print('| epoch %d | perplexity %.2f' % (epoch+1, ppl))
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0

    # グラフの描画
    x = np.arange(len(ppl_list))
    plt.plot(x, ppl_list, label='train')
    plt.xlabel('epochs')
    plt.ylabel('perplexity')
    plt.show()     
예제 #10
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--D',
                        '-d',
                        type=int,
                        default=8,
                        help='Dimension of feature vector')
    parser.add_argument('--T',
                        '-t',
                        type=int,
                        default=2,
                        help='Max step of aggregation')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=100,
                        help='Number of training dataset')
    parser.add_argument('--batch',
                        '-b',
                        type=int,
                        default=256,
                        help='batch size')

    args = parser.parse_args()

    train_H, train_y, train_node_size = get_train()

    seed = 1996

    train_H, train_y, val_H, val_y, train_node_size, val_node_size = shuffle_split(
        train_H, train_y, train_node_size, split_size=0.7, seed=seed)

    # feature dimension
    D = args.D

    # step size
    T = args.T

    # learning rate
    alpha = 0.0015

    # epoch size
    max_epoch = args.epoch

    # batch size
    batch_size = args.batch

    # get step per epoch
    train_size = len(train_H)
    iter_per_epoch = train_size // batch_size if (
        train_size % batch_size) == 0 else (train_size // batch_size) + 1

    # make feature vector(train)
    train_x = get_feature(D, train_H, train_node_size)

    # make feature vector(validation)
    val_x = get_feature(D, val_H, val_node_size)

    model = GNN(D, T)
    optimizer = SGD(alpha=alpha)

    train_loss_list = []
    train_acc_list = []
    val_loss_list = []
    val_acc_list = []

    for epoch in range(max_epoch):
        np.random.seed(int(epoch * 1234))
        shuffle_idx = np.random.permutation(train_H.shape[0])
        train_H = train_H[shuffle_idx]
        train_x = train_x[shuffle_idx]
        train_y = train_y[shuffle_idx]
        for num in range(iter_per_epoch):
            if train_size > (num + 1) * batch_size:
                batch_H = train_H[num * batch_size:(num + 1) * batch_size]
                batch_x = train_x[num * batch_size:(num + 1) * batch_size]
                batch_y = train_y[num * batch_size:(num + 1) * batch_size]
            else:
                batch_H = train_H[num * (batch_size):]
                batch_x = train_x[num * (batch_size):]
                batch_y = train_y[num * (batch_size):]

            # get batch gradient and update parameters
            batch_grads = None
            for idx in range(len(batch_H)):
                grad = model.get_gradient(batch_x[idx], batch_H[idx],
                                          batch_y[idx])
                if batch_grads == None:
                    batch_grads = {}
                    for key, val in grad.items():
                        batch_grads[key] = np.zeros_like(val)
                for key in grad.keys():
                    batch_grads[key] += (grad[key] / len(batch_H))
            optimizer.update(model.params, batch_grads)

        # train loss and average accuracy
        loss = 0
        train_pred = np.zeros((len(train_y), 1))
        for idx in range(len(train_H)):
            loss += model.loss(train_x[idx], train_H[idx],
                               train_y[idx]) / len(train_H)
            predict = 0 if model.predict(train_x[idx],
                                         train_H[idx]) < 1 / 2 else 1
            train_pred[idx] = predict
        train_score = avg_acc(train_y, train_pred)

        # validation loss and average accuracy
        val_loss = 0
        val_pred = np.zeros((len(val_y), 1))
        for idx in range(len(val_H)):
            val_loss += model.loss(val_x[idx], val_H[idx],
                                   val_y[idx]) / len(val_H)
            predict = 0 if model.predict(val_x[idx], val_H[idx]) < 1 / 2 else 1
            val_pred[idx] = predict
        val_score = avg_acc(val_y, val_pred)

        print(
            'epoch:{} loss:{:.5f} val_loss:{:.5f} avg_acc:{:.5f} val_avg_acc:{:.5f}'
            .format(epoch + 1, loss, val_loss, train_score, val_score))
        train_loss_list.append(loss)
        val_loss_list.append(val_loss)
        train_acc_list.append(train_score)
        val_acc_list.append(val_score)

    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4))
    x = np.arange(len(train_loss_list))
    ax1.plot(x, train_loss_list, label='train')
    x = np.arange(len(val_loss_list))
    ax1.plot(x, val_loss_list, label='validation')
    ax1.legend()
    ax1.set_xlabel('epoch')
    ax1.set_ylabel('loss')

    x = np.arange(len(train_acc_list))
    ax2.plot(x, train_acc_list, label='train')
    x = np.arange(len(val_acc_list))
    ax2.plot(x, val_acc_list, label='validation')
    ax2.legend()
    ax2.set_xlabel('epoch')
    ax2.set_ylabel('average accuracy')

    fig.savefig('src/graph/GNN_SGD.png')
    plt.close()
예제 #11
0
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 過学習を再現するために、学習データを削減
x_train = x_train[:300]
t_train = t_train[:300]

# weight decay(荷重減衰)の設定 =======================
#weight_decay_lambda = 0 # weight decayを使用しない場合
weight_decay_lambda = 0.1
# ====================================================

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10,
                        weight_decay_lambda=weight_decay_lambda)
optimizer = SGD(lr=0.01)  # 学習係数0.01のSGDでパラメータ更新

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD, Momentum, AdaGrad, Adam

# 0:读入MNIST数据==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

# 1:进行实验的设置==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10)
    train_loss[key] = []

# 2:开始训练==========
for i in range(max_iterations):
from dataset import spiral
import matplotlib.pyplot as plt
from two_layer_net import TwoLayerNet


# ハイパーパラメータの設定
max_epoch = 300
batch_size = 30
hidden_size = 10
learning_rate = 1.0

# もしload_data()の引数ランダムシードを与えるならのちのload_data()も同じにすべき。
x, t = spiral.load_data()  # 300個の座標らと、300個のone-hotな正解ら。
# 入力層のニューロンは2個、唯一の隠れ層のニューロンは10個、出力層のニューロンは3個。
model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
optimizer = SGD(lr=learning_rate)  # 学習率は1である。stochastic gradient descent.

# 学習で使用する変数
data_size = len(x)  # 300
max_iters = data_size // batch_size  # 300/30==10
total_loss = 0
loss_count = 0
loss_list = []

for epoch in range(max_epoch):
    # データのシャッフル
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]

    for iters in range(max_iters):
예제 #14
0
    test = "../../data/titles-en-test.labeled"
    train_X, train_y, train_v = load_data(train)
    test_X, test_y, test_v = load_data(test)
    v = TfidfVectorizer(max_df=0.8)
    v.fit(list(map(lambda x: " ".join(x), train_X)))
    train_X, test_X = get_tdidf(v, train_X), get_tdidf(v, test_X)
    svd = TruncatedSVD(n_components=200, random_state=3939)
    svd.fit(train_X)
    train_X, test_X = get_reduced(svd, train_X), get_reduced(svd, test_X)
    train_X, train_y = np.array(train_X).astype(
        np.float32), np.array(train_y).astype(np.int32)
    test_X, test_y = np.array(test_X).astype(
        np.float32), np.array(test_y).astype(np.int32)
    print(train_X, train_y)
    model = Perceptron(len(train_X[0]), 1)
    optimizer = SGD(lr=0.5)
    batch_size = 32
    max_epoch = 50
    train_model(model, optimizer, train_X, train_y, batch_size, max_epoch)
    pred_y = model.predict(test_X)
    pred_y = sigmoid(pred_y)
    # print(pred_y)
    print(accuracy_score(test_y, np.int32(pred_y >= 0.5)))
    print(confusion_matrix(test_y, np.int32(pred_y >= 0.5)))
    print(classification_report(test_y, np.int32(pred_y >= 0.5)))
"""result
0.9256110520722636
[[1424   53]
 [ 157 1189]]
              precision    recall  f1-score   support
예제 #15
0
#!/usr/bin/env python
# coding: utf-8
import sys

sys.path.append('..')
from common.optimizer import SGD
from common.trainer import RnnlmTrainer
from dataset import ptb
from simple_rnnlm import SimpleRnnlm

# ハイパーパラメータの設定
batch_size = 10
wordvec_size = 100
hidden_size = 100  # RNNの隠れ状態ベクトルの要素数
time_size = 5  # RNNを展開するサイズ
lr = 0.1
max_epoch = 100

# 学習データの読み込み
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_size = 1000  # テスト用にデータセットを小さくする
corpus = corpus[:corpus_size]
vocab_size = int(max(corpus) + 1)
xs = corpus[:-1]  # 入力
ts = corpus[1:]  # 出力(教師ラベル)

# モデルの生成
model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)
def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100],
        output_size=10,
        weight_init_std=weight_init_std,
        use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_init_std)
    bn_lin_network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100],
        output_size=10,
        weight_init_std=weight_init_std,
        use_batchnorm_linearized=True)

    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    test_acc_list = []
    bn_train_acc_list = []
    bn_test_acc_list = []
    bn_lin_train_acc_list = []
    bn_lin_test_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _network in (bn_network, network, bn_lin_network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)

        if i % iter_per_epoch == 0:
            #train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            #bn_train_acc = bn_network.accuracy(x_train, t_train)
            bn_test_acc = bn_network.accuracy(x_test, t_test)
            #bn_lin_train_acc = bn_lin_network.accuracy(x_train, t_train)
            bn_lin_test_acc = bn_lin_network.accuracy(x_test, t_test)
            #train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            #bn_train_acc_list.append(bn_train_acc)
            bn_test_acc_list.append(bn_test_acc)
            #bn_lin_train_acc_list.append(bn_lin_train_acc)
            bn_lin_test_acc_list.append(bn_lin_test_acc)

            #print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc) + " - " + str(bn_lin_train_acc))
            print("epoch:" + str(epoch_cnt) + " | " + str(test_acc) + " - " +
                  str(bn_test_acc) + " - " + str(bn_lin_test_acc))
            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    #return train_acc_list, bn_train_acc_list, bn_lin_train_acc_list
    return test_acc_list, bn_test_acc_list, bn_lin_test_acc_list
예제 #17
0
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

x_train = x_train[:300]
t_train = t_train[:300]

weight_decay_lambda = 0.1

network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10,
                        weight_decay_lambda=weight_decay_lambda)

optimizer = SGD(lr=0.01)

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
def f(x, y):
    return x**2 / 20.0 + y**2


def df(x, y):
    return x / 10.0, 2.0 * y


init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0

optimizers = OrderedDict()
optimizers['SGD'] = SGD(lr=0.95)
optimizers['Momentum'] = Momentum(lr=0.1)
optimizers['AdaGrad'] = AdaGrad(lr=1.5)
optimizers['Adam'] = Adam(lr=0.3)

idx = 1

for key in optimizers:
    optimizer = optimizers[key]
    x_history = []
    y_history = []
    params['x'], params['y'] = init_pos[0], init_pos[1]

    for i in range(30):
        x_history.append(params['x'])
        y_history.append(params['y'])