def __train(weight_init_std): bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 過学習を再現するために、学習データを削減 x_train = x_train[:300] t_train = t_train[:300] # weight decay(荷重減衰)の設定 ======================= #weight_decay_lambda = 0 # weight decayを使用しない場合 weight_decay_lambda = 0.1 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
from common.util import eval_perplexity from dataset import ptb from rnnlm import Rnnlm batch_size = 20 wordvec_size = 100 hidden_size = 100 time_size = 35 lr = 20.0 max_epoch = 4 max_grad = 0.25 corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = Rnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad, eval_interval=20) trainer.plot(ylim=(0, 500)) model.reset_state() ppl_test = eval_perplexity(model, corpus_test) print('test perplexity: ', ppl_test) model.save_params()
sys.path.append(os.pardir) import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 weight_init_types = {"std=0.01": 0.01, "Xavier": "sigmoid", "He": "relu"} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100,100,100,100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in weight_init_types.keys():
x_train_split = np.split(x_train, n) t_train_split = np.split(t_train, n) max_epochs = 101 each_train_size = x_train_split[0].shape[0] batch_size = min(100, each_train_size) Agent.n = n Agent.maxdeg, Agent.AdjG_init = maxdeg, Gadj Agent.train_size, Agent.batch_size = each_train_size, batch_size weight_decay_lambda = 0 agents = [Agent(idx, x_train_split[idx], t_train_split[idx], x_test, t_test, SGD(lr=lambda s:0.01), weight_decay_lambda) for idx in range(n)] train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(each_train_size / batch_size, 1) epoch_cnt = 0 ##################### grad_numerical = agents[0].degub_numericalGrad() grad_backprop = agents[0].debug_backpropGrad() ### 数値勾配と誤差逆伝搬により求めた勾配が一致していることを確認します. for key in grad_numerical.keys():
xs = corpus[:-1] # 입력 ts = corpus[1:] # 정답 레이블 data_size = len(xs) print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size)) # 학습 시 사용하는 변수 max_iters = data_size // (batch_size * time_size) time_idx = 0 total_loss = 0 loss_count = 0 ppl_list = [] # perplexity # 모델 생성 model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) # 각 미니배치에서 샘플을 읽기 시작한 위치를 계산 jump = (corpus_size - 1) // batch_size offsets = [i * jump for i in range(batch_size)] for epoch in range(max_epoch): for iter in range(max_iters): # 미니배치 획득 batch_x = np.empty((batch_size, time_size), dtype='i') batch_t = np.empty((batch_size, time_size), dtype='i') for t in range(time_size): for i, offset in enumerate(offsets): batch_x[i, t] = xs[(offset + time_idx) % data_size] batch_t[i, t] = ts[(offset + time_idx) % data_size] time_idx += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--D', '-d', type=int, default=8, help='Dimension of feature vector') parser.add_argument('--T', '-t', type=int, default=2, help='Max step of aggregation') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of training dataset') parser.add_argument('--batch', '-b', type=int, default=256, help='batch size') args = parser.parse_args() train_H, train_y, train_node_size = get_train() seed = 1996 train_H, train_y, val_H, val_y, train_node_size, val_node_size = shuffle_split( train_H, train_y, train_node_size, split_size=0.7, seed=seed) # feature dimension D = args.D # step size T = args.T # learning rate alpha = 0.0015 # epoch size max_epoch = args.epoch # batch size batch_size = args.batch # get step per epoch train_size = len(train_H) iter_per_epoch = train_size // batch_size if ( train_size % batch_size) == 0 else (train_size // batch_size) + 1 # make feature vector(train) train_x = get_feature(D, train_H, train_node_size) # make feature vector(validation) val_x = get_feature(D, val_H, val_node_size) model = GNN(D, T) optimizer = SGD(alpha=alpha) train_loss_list = [] train_acc_list = [] val_loss_list = [] val_acc_list = [] for epoch in range(max_epoch): np.random.seed(int(epoch * 1234)) shuffle_idx = np.random.permutation(train_H.shape[0]) train_H = train_H[shuffle_idx] train_x = train_x[shuffle_idx] train_y = train_y[shuffle_idx] for num in range(iter_per_epoch): if train_size > (num + 1) * batch_size: batch_H = train_H[num * batch_size:(num + 1) * batch_size] batch_x = train_x[num * batch_size:(num + 1) * batch_size] batch_y = train_y[num * batch_size:(num + 1) * batch_size] else: batch_H = train_H[num * (batch_size):] batch_x = train_x[num * (batch_size):] batch_y = train_y[num * (batch_size):] # get batch gradient and update parameters batch_grads = None for idx in range(len(batch_H)): grad = model.get_gradient(batch_x[idx], batch_H[idx], batch_y[idx]) if batch_grads == None: batch_grads = {} for key, val in grad.items(): batch_grads[key] = np.zeros_like(val) for key in grad.keys(): batch_grads[key] += (grad[key] / len(batch_H)) optimizer.update(model.params, batch_grads) # train loss and average accuracy loss = 0 train_pred = np.zeros((len(train_y), 1)) for idx in range(len(train_H)): loss += model.loss(train_x[idx], train_H[idx], train_y[idx]) / len(train_H) predict = 0 if model.predict(train_x[idx], train_H[idx]) < 1 / 2 else 1 train_pred[idx] = predict train_score = avg_acc(train_y, train_pred) # validation loss and average accuracy val_loss = 0 val_pred = np.zeros((len(val_y), 1)) for idx in range(len(val_H)): val_loss += model.loss(val_x[idx], val_H[idx], val_y[idx]) / len(val_H) predict = 0 if model.predict(val_x[idx], val_H[idx]) < 1 / 2 else 1 val_pred[idx] = predict val_score = avg_acc(val_y, val_pred) print( 'epoch:{} loss:{:.5f} val_loss:{:.5f} avg_acc:{:.5f} val_avg_acc:{:.5f}' .format(epoch + 1, loss, val_loss, train_score, val_score)) train_loss_list.append(loss) val_loss_list.append(val_loss) train_acc_list.append(train_score) val_acc_list.append(val_score) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4)) x = np.arange(len(train_loss_list)) ax1.plot(x, train_loss_list, label='train') x = np.arange(len(val_loss_list)) ax1.plot(x, val_loss_list, label='validation') ax1.legend() ax1.set_xlabel('epoch') ax1.set_ylabel('loss') x = np.arange(len(train_acc_list)) ax2.plot(x, train_acc_list, label='train') x = np.arange(len(val_acc_list)) ax2.plot(x, val_acc_list, label='validation') ax2.legend() ax2.set_xlabel('epoch') ax2.set_ylabel('average accuracy') fig.savefig('src/graph/GNN_SGD.png') plt.close()
def f(x, y): return x**2 / 20.0 + y**2 def df(x, y): return x / 10.0, 2.0 * y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers['SGD'] = SGD(lr=0.95) optimizers['Momentum'] = Momentum(lr=0.1) optimizers['AdaGrad'] = AdaGrad(lr=1.5) optimizers['Adam'] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y'])
import os import sys sys.path.append(os.pardir) from common.optimizer import SGD from common.trainer import Trainer from dataset import spiral from ch01.two_layer_net import TwoLayerNet # ハイパーパラメータの設定 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) trainer = Trainer(model, optimizer) trainer.fit(x, t, max_epoch, batch_size, eval_interval=10) trainer.plot()
import matplotlib.pyplot as plt from dataset.mnist import load_mnist from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD, Momentum, AdaGrad, Adam # 0:读入MNIST数据========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1:进行实验的设置========== optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() #optimizers['RMSprop'] = RMSprop() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] # 2:开始训练========== for i in range(max_iterations):
time_size = 35 lr = 20.0 max_epoch = 40 max_grad = 0.25 dropout = 0.5 corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_val, _, _ = ptb.load_data('val') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) best_ppl = float('inf') for epoch in range(max_epoch): trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model, corpus_val) print('valid perplexity: ', ppl) if best_ppl > ppl: best_ppl = ppl
def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout if __name__ == "__main__": max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) # 学習に使用する変数 data_size = len(x) max_iters = data_size // batch_size total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # データシャッフル idx = np.random.permutation(data_size) x = x[idx] t = t[idx] for iters in range(max_iters):
from dataset import spiral import matplotlib.pyplot as plt from two_layer_net import TwoLayerNet # ハイパーパラメータの設定 max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # もしload_data()の引数ランダムシードを与えるならのちのload_data()も同じにすべき。 x, t = spiral.load_data() # 300個の座標らと、300個のone-hotな正解ら。 # 入力層のニューロンは2個、唯一の隠れ層のニューロンは10個、出力層のニューロンは3個。 model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) # 学習率は1である。stochastic gradient descent. # 学習で使用する変数 data_size = len(x) # 300 max_iters = data_size // batch_size # 300/30==10 total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # データのシャッフル idx = np.random.permutation(data_size) x = x[idx] t = t[idx] for iters in range(max_iters):
test = "../../data/titles-en-test.labeled" train_X, train_y, train_v = load_data(train) test_X, test_y, test_v = load_data(test) v = TfidfVectorizer(max_df=0.8) v.fit(list(map(lambda x: " ".join(x), train_X))) train_X, test_X = get_tdidf(v, train_X), get_tdidf(v, test_X) svd = TruncatedSVD(n_components=200, random_state=3939) svd.fit(train_X) train_X, test_X = get_reduced(svd, train_X), get_reduced(svd, test_X) train_X, train_y = np.array(train_X).astype( np.float32), np.array(train_y).astype(np.int32) test_X, test_y = np.array(test_X).astype( np.float32), np.array(test_y).astype(np.int32) print(train_X, train_y) model = Perceptron(len(train_X[0]), 1) optimizer = SGD(lr=0.5) batch_size = 32 max_epoch = 50 train_model(model, optimizer, train_X, train_y, batch_size, max_epoch) pred_y = model.predict(test_X) pred_y = sigmoid(pred_y) # print(pred_y) print(accuracy_score(test_y, np.int32(pred_y >= 0.5))) print(confusion_matrix(test_y, np.int32(pred_y >= 0.5))) print(classification_report(test_y, np.int32(pred_y >= 0.5))) """result 0.9256110520722636 [[1424 53] [ 157 1189]] precision recall f1-score support
def backward(self, dout=1): dout = self.loss_layer.backward(dout) for layer in reversed(self.layers): dout = layer.backward(dout) return dout if __name__ == "__main__": from common.layers import Affine from common.activation import Sigmoid, Softmax from common.loss import cross_entropy_error from common.optimizer import SGD import numpy as np from common.dataset import load_data _model = NeuralNetworkModel() _model.add_layer(Affine(2, 4)) _model.add_layer(Sigmoid()) _model.add_layer(Affine(4, 3)) # _model.add_layer(Softmax()) _model.add_optimizer(SGD(), cross_entropy_error) _x, _t = load_data(100, 3) for i in range(5): _loss = _model.forward(_x, _t) print(np.sum(_loss)) _model.backward() _model.optimizer.update(_model.params, _model.grads)
from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import SGD # 0. MNIST 데이터 읽기========== (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 # 1. 실험용 설정========== weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} optimizer = SGD(lr=0.01) networks = {} train_loss = {} for key, weight_type in weight_init_types.items(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) train_loss[key] = [] # 2. 훈련 시작========== for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask]
from common.optimizer import SGD (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 오버피팅을 재현하기 위해 학습 데이터 수를 줄임 x_train = x_train[:300] t_train = t_train[:300] # weight decay(가중치 감소) 설정 ======================= weight_decay_lambda = 0 # weight decay를 사용하지 않을 경우 # weight_decay_lambda = 0.1 # ==================================================== network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay_lambda) optimizer = SGD(lr=0.01) # 학습률이 0.01인 SGD로 매개변수 갱신 max_epochs = 201 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask]
le.fit(list(itertools.chain.from_iterable(train_X))) le = dict(zip(le.classes_, le.transform(le.classes_))) train_X, test_X = zero_padding(get_le(le, train_X)), zero_padding( get_le(le, test_X)) # preprocess y le = LabelEncoder() le.fit(list(itertools.chain.from_iterable(train_y))) le = dict(zip(le.classes_, le.transform(le.classes_))) train_y, test_y = zero_padding(get_le(le, train_y)), zero_padding( get_le(le, test_y)) batch_size = 32 model = RNNLM(vocab_size=len(set(itertools.chain.from_iterable(train_X))) + 1, wordvec_size=300, hidden_size=600) optimizer = SGD(lr=0.001) train_model(model, optimizer, train_X, train_y, max_epoch=30, batch_size=batch_size) # prediction # pred_y = model.predict(test_X[:32]) # pred_y = pred_y.reshape(pred_y.shape[0]*pred_y.shape[1], -1) # test_y = test_y[:32] # test_y = test_y.reshape(test_y.shape[0]*test_y.shape[1]) # pred_y = softmax(pred_y).argmax(axis=1) # print(test_y) # print(pred_y) pred_y = batch_pred(test_X, batch_size=batch_size)