import sys sys.path.append('..') from common.optimizer import SGD from common.trainer import Trainer import numpy as np from dataset import spiral import matplotlib.pyplot as plt from two_layer_net import TwoLayerNet # Hyper pramerter設定 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # データの読み込み、モデルとオプティマイザの生成 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) trainer = Trainer(model, optimizer) trainer.fit(x, t, max_epoch, batch_size, eval_interval=10) trainer.plot()
import matplotlib.pyplot as plt from dataset.mnist import load_mnist from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import * # 0:MNISTデータの読み込み========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:実験の設定========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2:訓練の開始========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size)
sys.path.append(os.pardir) # 부모 디렉터리의 파일을 가져올 수 있도록 설정 import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 오버피팅을 재현하기 위해 학습 데이터 수를 줄임 x_train = x_train[:300] t_train = t_train[:300] network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10) optimizer = SGD(lr=0.01) # 학습률이 0.01인 SGD로 매개변수 갱신 max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
def main(): # (1) ハイパーパラメータの設定 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # (2) データの読み込み、モデルとオプティマイザの生成 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) # 学習で使用する変数 data_size = len(x) max_iters = data_size // batch_size total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # (3) データのシャッフル idx = np.random.permutation(data_size) x = x[idx] t = t[idx] for iters in range(max_iters): batch_x = x[iters*batch_size:(iters+1)*batch_size] batch_t = t[iters*batch_size:(iters+1)*batch_size] # (4) 勾配を求め、パラメータを更新 loss = model.forward(batch_x, batch_t) model.backward() optimizer.update(model.params, model.grads) total_loss += loss loss_count += 1 # (5) 定期的に学習経過を出力 if (iters + 1) % 10 == 0: avg_loss = total_loss / loss_count print(f"| epoch {epoch+1} | iter {iters+1} / {max_iters} | loss {avg_loss}") loss_list.append(avg_loss) total_loss, loss_count = 0, 0 print(f"loss_list: \n{loss_list}") # 学習結果のプロット plt.plot(np.arange(len(loss_list)), np.asarray(loss_list)) plt.xlabel('iterations (x10)') plt.ylabel('loss') plt.show() # 境界領域のプロット h = 0.001 x_min, x_max = x[:, 0].min() - 0.1, x[:, 0].max() + 0.1 y_min, y_max = x[:, 1].min() - 0.1, x[:, 1].max() + 0.1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) X = np.c_[xx.ravel(), yy.ravel()] score = model.predict(X) predict_cls = np.argmax(score, axis=1) Z = predict_cls.reshape(xx.shape) plt.contourf(xx, yy, Z) plt.axis('off') # データ点のプロット x, t = spiral.load_data() N = 100 CLS_NUM = 3 markers = ['o', 'x', '^'] for i in range(CLS_NUM): plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i]) plt.show()
x_train_split = np.split(x_train, n) t_train_split = np.split(t_train, n) max_epochs = 101 each_train_size = x_train_split[0].shape[0] batch_size = min(100, each_train_size) Agent.n = n Agent.maxdeg, Agent.AdjG_init = maxdeg, Gadj Agent.train_size, Agent.batch_size = each_train_size, batch_size weight_decay_lambda = 0 agents = [ Agent(idx, x_train_split[idx], t_train_split[idx], x_test, t_test, SGD(lr=lambda s: 0.01), weight_decay_lambda) for idx in range(n) ] train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(each_train_size / batch_size, 1) epoch_cnt = 0 ##################### grad_numerical = agents[0].degub_numericalGrad() grad_backprop = agents[0].debug_backpropGrad() ### 数値勾配と誤差逆伝搬により求めた勾配が一致していることを確認します.
def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout if __name__ == "__main__": from common.layers import Affine from common.activation import Sigmoid, Softmax from common.loss import cross_entropy_error from common.optimizer import SGD import numpy as np from common.dataset import load_data _model = NeuralNetworkModel() _model.add_layer(Affine(2, 4)) _model.add_layer(Sigmoid()) _model.add_layer(Affine(4, 3)) # _model.add_layer(Softmax()) _model.add_optimizer(SGD(), cross_entropy_error) _x, _t = load_data(100, 3) for i in range(5): _loss = _model.forward(_x, _t) print(np.sum(_loss)) _model.backward() _model.optimizer.update(_model.params, _model.grads)
from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD # 0. MNIST 데이터 읽기========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 2000번 반복 # 1. 실험용 설정========== weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} # 표준 편차 : 0.01 optimizer = SGD(lr=0.01) # 학습률 0.01 networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) # 출력은 10개 train_loss[key] = [] # 2. 훈련 시작========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
lr = 20.0 max_epoch = 40 max_grad = 0.25 dropout = 0.5 #学習データの読み込み corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_val, _, _ = ptb.load_data('val') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] # ts = corpus[1:] #うまくマッチングさせる model = BetterRnnlm(vocab_size,wordvec_size,hidden_size,dropout) optimizer = SGD() trainer = RnnlmTrainer(model,optimizer) best_ppl = float('inf') for epoch in range(max_epoch): trainer.fit(xs,ts,max_epoch=1,batch_size=batch_size,time_size=time_size,max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model,corpus_val) print('valid preplexity: ',ppl) if best_ppl > ppl: best_ppl = ppl model.save_params() else: lr /= 4.0 optimizer.lr = lr
def main(): # ハイパーパラメータの設定 batch_size = 10 wordvec_size = 100 hidden_size = 500 time_size = 5 # Truncated BPTTの展開する時間サイズ lr = 0.1 max_epoch = 100 # 学習データセットの読み込み corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_size =1000 corpus = corpus[:corpus_size] vocab_size = int(max(corpus) + 1) print("corpus: ", corpus) print("corpus size: ", len(corpus)) # 入力 xs = corpus[:-1] ts = corpus[1:] # 教師ラベル = 入力の次の単語ID print("corpus size: %d, vocabulary size: %d" % (corpus_size, vocab_size)) data_size = len(xs) print("data_size: ", data_size) # 学習時に使用する変数 max_iters = data_size // (batch_size * time_size) time_idx = 0 total_loss = 0 loss_count =0 ppl_list = [] # モデルの生成 model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) # (1) ミニバッチの各サンプルの読み込み開始位置を計算 jump = (corpus_size - 1) // batch_size # e.g. corpus_size=1000, batch_size=20 -> (1000-1)//20 -> 49 print("jump: ", jump) offsets = [i * jump for i in range(batch_size)] # batch_size=20, jump=49 -> [0, 49, 98, ..., 931] print("offsets: ", offsets) # 学習 for epoch in range(max_epoch): for iter in range(max_iters): # (2) ミニバッチの取得 batch_x = np.empty((batch_size, time_size), dtype=np.int32) batch_t = np.empty((batch_size, time_size), dtype=np.int32) for t in range(time_size): for i, offset in enumerate(offsets): print(f"offset: {offset}, time_idx: {time_idx} -> {(offset + time_idx) % data_size}") batch_x[i, t] = xs[(offset + time_idx) % data_size] batch_t[i, t] = ts[(offset + time_idx) % data_size] time_idx += 1 # 0リセットされないので、iterループ毎にcorpusのoffsetからのズレを変更している. # 勾配を求め、パラメータを更新 loss = model.forward(batch_x, batch_t) model.backward() optimizer.update(model.params, model.grads) total_loss += loss loss_count += 1 # (3) エポック毎にパープレキシティの評価 ppl = np.exp(total_loss / loss_count) print('| epoch %d | perplexity %.2f' % (epoch+1, ppl)) ppl_list.append(float(ppl)) total_loss, loss_count = 0, 0 # グラフの描画 x = np.arange(len(ppl_list)) plt.plot(x, ppl_list, label='train') plt.xlabel('epochs') plt.ylabel('perplexity') plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--D', '-d', type=int, default=8, help='Dimension of feature vector') parser.add_argument('--T', '-t', type=int, default=2, help='Max step of aggregation') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of training dataset') parser.add_argument('--batch', '-b', type=int, default=256, help='batch size') args = parser.parse_args() train_H, train_y, train_node_size = get_train() seed = 1996 train_H, train_y, val_H, val_y, train_node_size, val_node_size = shuffle_split( train_H, train_y, train_node_size, split_size=0.7, seed=seed) # feature dimension D = args.D # step size T = args.T # learning rate alpha = 0.0015 # epoch size max_epoch = args.epoch # batch size batch_size = args.batch # get step per epoch train_size = len(train_H) iter_per_epoch = train_size // batch_size if ( train_size % batch_size) == 0 else (train_size // batch_size) + 1 # make feature vector(train) train_x = get_feature(D, train_H, train_node_size) # make feature vector(validation) val_x = get_feature(D, val_H, val_node_size) model = GNN(D, T) optimizer = SGD(alpha=alpha) train_loss_list = [] train_acc_list = [] val_loss_list = [] val_acc_list = [] for epoch in range(max_epoch): np.random.seed(int(epoch * 1234)) shuffle_idx = np.random.permutation(train_H.shape[0]) train_H = train_H[shuffle_idx] train_x = train_x[shuffle_idx] train_y = train_y[shuffle_idx] for num in range(iter_per_epoch): if train_size > (num + 1) * batch_size: batch_H = train_H[num * batch_size:(num + 1) * batch_size] batch_x = train_x[num * batch_size:(num + 1) * batch_size] batch_y = train_y[num * batch_size:(num + 1) * batch_size] else: batch_H = train_H[num * (batch_size):] batch_x = train_x[num * (batch_size):] batch_y = train_y[num * (batch_size):] # get batch gradient and update parameters batch_grads = None for idx in range(len(batch_H)): grad = model.get_gradient(batch_x[idx], batch_H[idx], batch_y[idx]) if batch_grads == None: batch_grads = {} for key, val in grad.items(): batch_grads[key] = np.zeros_like(val) for key in grad.keys(): batch_grads[key] += (grad[key] / len(batch_H)) optimizer.update(model.params, batch_grads) # train loss and average accuracy loss = 0 train_pred = np.zeros((len(train_y), 1)) for idx in range(len(train_H)): loss += model.loss(train_x[idx], train_H[idx], train_y[idx]) / len(train_H) predict = 0 if model.predict(train_x[idx], train_H[idx]) < 1 / 2 else 1 train_pred[idx] = predict train_score = avg_acc(train_y, train_pred) # validation loss and average accuracy val_loss = 0 val_pred = np.zeros((len(val_y), 1)) for idx in range(len(val_H)): val_loss += model.loss(val_x[idx], val_H[idx], val_y[idx]) / len(val_H) predict = 0 if model.predict(val_x[idx], val_H[idx]) < 1 / 2 else 1 val_pred[idx] = predict val_score = avg_acc(val_y, val_pred) print( 'epoch:{} loss:{:.5f} val_loss:{:.5f} avg_acc:{:.5f} val_avg_acc:{:.5f}' .format(epoch + 1, loss, val_loss, train_score, val_score)) train_loss_list.append(loss) val_loss_list.append(val_loss) train_acc_list.append(train_score) val_acc_list.append(val_score) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4)) x = np.arange(len(train_loss_list)) ax1.plot(x, train_loss_list, label='train') x = np.arange(len(val_loss_list)) ax1.plot(x, val_loss_list, label='validation') ax1.legend() ax1.set_xlabel('epoch') ax1.set_ylabel('loss') x = np.arange(len(train_acc_list)) ax2.plot(x, train_acc_list, label='train') x = np.arange(len(val_acc_list)) ax2.plot(x, val_acc_list, label='validation') ax2.legend() ax2.set_xlabel('epoch') ax2.set_ylabel('average accuracy') fig.savefig('src/graph/GNN_SGD.png') plt.close()
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 過学習を再現するために、学習データを削減 x_train = x_train[:300] t_train = t_train[:300] # weight decay(荷重減衰)の設定 ======================= #weight_decay_lambda = 0 # weight decayを使用しない場合 weight_decay_lambda = 0.1 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) # 学習係数0.01のSGDでパラメータ更新 max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
import matplotlib.pyplot as plt from dataset.mnist import load_mnist from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD, Momentum, AdaGrad, Adam # 0:读入MNIST数据========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:进行实验的设置========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2:开始训练========== for i in range(max_iterations):
from dataset import spiral import matplotlib.pyplot as plt from two_layer_net import TwoLayerNet # ハイパーパラメータの設定 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # もしload_data()の引数ランダムシードを与えるならのちのload_data()も同じにすべき。 x, t = spiral.load_data() # 300個の座標らと、300個のone-hotな正解ら。 # 入力層のニューロンは2個、唯一の隠れ層のニューロンは10個、出力層のニューロンは3個。 model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) # 学習率は1である。stochastic gradient descent. # 学習で使用する変数 data_size = len(x) # 300 max_iters = data_size // batch_size # 300/30==10 total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # データのシャッフル idx = np.random.permutation(data_size) x = x[idx] t = t[idx] for iters in range(max_iters):
test = "../../data/titles-en-test.labeled" train_X, train_y, train_v = load_data(train) test_X, test_y, test_v = load_data(test) v = TfidfVectorizer(max_df=0.8) v.fit(list(map(lambda x: " ".join(x), train_X))) train_X, test_X = get_tdidf(v, train_X), get_tdidf(v, test_X) svd = TruncatedSVD(n_components=200, random_state=3939) svd.fit(train_X) train_X, test_X = get_reduced(svd, train_X), get_reduced(svd, test_X) train_X, train_y = np.array(train_X).astype( np.float32), np.array(train_y).astype(np.int32) test_X, test_y = np.array(test_X).astype( np.float32), np.array(test_y).astype(np.int32) print(train_X, train_y) model = Perceptron(len(train_X[0]), 1) optimizer = SGD(lr=0.5) batch_size = 32 max_epoch = 50 train_model(model, optimizer, train_X, train_y, batch_size, max_epoch) pred_y = model.predict(test_X) pred_y = sigmoid(pred_y) # print(pred_y) print(accuracy_score(test_y, np.int32(pred_y >= 0.5))) print(confusion_matrix(test_y, np.int32(pred_y >= 0.5))) print(classification_report(test_y, np.int32(pred_y >= 0.5))) """result 0.9256110520722636 [[1424 53] [ 157 1189]] precision recall f1-score support
#!/usr/bin/env python # coding: utf-8 import sys sys.path.append('..') from common.optimizer import SGD from common.trainer import RnnlmTrainer from dataset import ptb from simple_rnnlm import SimpleRnnlm # ハイパーパラメータの設定 batch_size = 10 wordvec_size = 100 hidden_size = 100 # RNNの隠れ状態ベクトルの要素数 time_size = 5 # RNNを展開するサイズ lr = 0.1 max_epoch = 100 # 学習データの読み込み corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_size = 1000 # テスト用にデータセットを小さくする corpus = corpus[:corpus_size] vocab_size = int(max(corpus) + 1) xs = corpus[:-1] # 入力 ts = corpus[1:] # 出力(教師ラベル) # モデルの生成 model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer)
def __train(weight_init_std): bn_network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) bn_lin_network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm_linearized=True) optimizer = SGD(lr=learning_rate) train_acc_list = [] test_acc_list = [] bn_train_acc_list = [] bn_test_acc_list = [] bn_lin_train_acc_list = [] bn_lin_test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network, bn_lin_network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: #train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) #bn_train_acc = bn_network.accuracy(x_train, t_train) bn_test_acc = bn_network.accuracy(x_test, t_test) #bn_lin_train_acc = bn_lin_network.accuracy(x_train, t_train) bn_lin_test_acc = bn_lin_network.accuracy(x_test, t_test) #train_acc_list.append(train_acc) test_acc_list.append(test_acc) #bn_train_acc_list.append(bn_train_acc) bn_test_acc_list.append(bn_test_acc) #bn_lin_train_acc_list.append(bn_lin_train_acc) bn_lin_test_acc_list.append(bn_lin_test_acc) #print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc) + " - " + str(bn_lin_train_acc)) print("epoch:" + str(epoch_cnt) + " | " + str(test_acc) + " - " + str(bn_test_acc) + " - " + str(bn_lin_test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break #return train_acc_list, bn_train_acc_list, bn_lin_train_acc_list return test_acc_list, bn_test_acc_list, bn_lin_test_acc_list
from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) x_train = x_train[:300] t_train = t_train[:300] weight_decay_lambda = 0.1 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
def f(x, y): return x**2 / 20.0 + y**2 def df(x, y): return x / 10.0, 2.0 * y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers['SGD'] = SGD(lr=0.95) optimizers['Momentum'] = Momentum(lr=0.1) optimizers['AdaGrad'] = AdaGrad(lr=1.5) optimizers['Adam'] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y'])