def get_models(): # tanh + backward rnn1 = Single_layer_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE) optim1 = Adam() # tanh + backward_truncate rnn2 = Single_layer_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE, bptt_truncate=BPTT_TRUNCATE) optim2 = Adam() # relu + backward_truncate rnn3 = Single_layer_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE, bptt_truncate=BPTT_TRUNCATE, activation_func='relu') optim3 = Adam() labels = [ 'model1: tanh + backward', 'model2: tanh + backward_truncate', 'model3: relu + backward' ] rnns = [rnn1, rnn2, rnn3] optims = [optim1, optim2, optim3] return labels, rnns, optims
def train(network, x_train, y_train, x_test, y_test, iter_times=10000, hidden_size=10, batch_size=100, lr=0.1): nn = network optimizers = { 'SGD': SGD(lr), 'Momentum': Momentum(lr), 'Nesterov': Nesterov(lr), 'AdaGrad': AdaGrad(lr), 'RMSProp': RMSProp(0.02), # lr == 0.1 may make loss += ln(eps), eps == 1e-15 'Adam': Adam(0.005) } opt = optimizers['Adam'] for i in range(iter_times): if i % max(x_train.shape[0] // batch_size, 1) == 0: print('{:.1%}'.format(i / iter_times)) batch_mask = np.random.choice(x_train.shape[0], batch_size) x_batch, y_batch = x_train[batch_mask], y_train[batch_mask] grads = nn.grad(x_batch, y_batch) opt.update(nn.params, grads) print('Train acc: {:.4} Test acc: {:.4}'.format( nn.accuracy(x_train, y_train), nn.accuracy(x_test, y_test)))
def main(): # ハイパーパラメータの設定 window_size = 5 hidden_size = 100 batch_size = 100 max_epoch = 10 # データの読み込み corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) # モデルなどの生成 model = CBOW(vocab_size, hidden_size, window_size, corpus) optimizer = Adam() trainer = Trainer(model, optimizer) # 学習開始 trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot() # 後ほど利用できるように、必要なデータを保存 word_vecs = model.word_vecs params = {} params['word_vecs'] = word_vecs.astype(np.float16) params['word_to_id'] = word_to_id params['id_to_word'] = id_to_word pkl_file = 'cbow_params.pkl' with open(pkl_file, 'wb') as f: pickle.dump(params, f, -1)
def train_eval(x_train, x_test, is_peeky): if is_peeky: model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) else: model = Seq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100)) return acc_list
def setup_actor_optimizer(self): logger.info('setting up actor optimizer') self.actor_loss = -tf.reduce_mean(self.critic_with_actor_tf) actor_shapes = [var.get_shape().as_list() for var in self.actor.trainable_vars] actor_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in actor_shapes]) logger.info(' actor shapes: {}'.format(actor_shapes)) logger.info(' actor params: {}'.format(actor_nb_params)) self.actor_grads = U.flatgrad(self.actor_loss, self.actor.trainable_vars, clip_norm=self.clip_norm) self.actor_optimizer = Adam(var_list=self.actor.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08)
def main(): # データセットの読み込み (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() # 入力列を逆順にするとSeq2Se2の精度が上がるらしいが。。。クソ理論 is_reverse = True if is_reverse: x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # ハイパーパラメータの設定 vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 # モデル/オプティマイザ/トレーナーの生成 # model = Seq2seq(vocab_size, wordvec_size, hidden_size) model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print(f'val acc {acc * 100}')
def main(): # データの読み込み (x_train, t_train), (x_test, t_test) = sequence.load_data('date.txt') char_to_id, id_to_char = sequence.get_vocab() # 入力文を反転 x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # ハイパーパラメータの設定 vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 256 batch_size = 128 max_epoch = 10 max_grad = 5.0 model = AttentionSeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose, is_reverse=True) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100))
def main(): window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot()
def setup_critic_optimizer(self): logger.info('setting up critic optimizer') normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1]) self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf)) if self.critic_l2_reg > 0.: critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name] for var in critic_reg_vars: logger.info(' regularizing: {}'.format(var.name)) logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg)) critic_reg = tc.layers.apply_regularization( tc.layers.l2_regularizer(self.critic_l2_reg), weights_list=critic_reg_vars ) self.critic_loss += critic_reg critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars] critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes]) logger.info(' critic shapes: {}'.format(critic_shapes)) logger.info(' critic params: {}'.format(critic_nb_params)) self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm) self.critic_optimizer = Adam(var_list=self.critic.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08)
def test_train_word2vec_model(): """word2vecモデルの学習 """ window_size = 1 hidden_size = 5 # 単語の分散表現ベクトルの次元数 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' # コーパスの作成 corpus, word_to_id, id_to_word = preprocess(text) # コンテキストとターゲットの作成 vocab_size = len(word_to_id) contexts, target = create_context_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) print("one-hot target: ", target) print("one-hot contexts: ", contexts) # CBOWモデル model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() # trainer trainer = Trainer(model, optimizer) # 学習 trainer.fit(contexts, target, max_epoch=max_epoch, batch_size=batch_size) trainer.plot() # CBOWの重み(W_in)を取得する word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id])
from dataset.mnist import load_mnist from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import Adam (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in networks.keys():
if config.GPU: corpus = to_gpu(corpus_train) corpus_val = to_gpu(corpus_val) corpus_test = to_gpu(corpus_test) vocab_size = len(preprocessing.word_to_id) xs = sum(corpus_train, [])[:-1] ts = sum(corpus_train, [])[1:] corpus_val = sum(corpus_val, []) corpus_test = sum(corpus_test, []) model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout) # optimizer = SGD(lr) optimizer = Adam(lr=lr) trainer = RnnlmTrainer(model, optimizer) best_ppl = float('inf') for epoch in range(max_epoch): trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model, corpus_val) print('검증 퍼플렉서티: ', ppl)
print("y train: ",y_train.shape) print("x_test: ",x_test.shape) print("y_test: ",y_test.shape) if(run): #MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latiture, Longitude network = MultiLayerNetRegression( input_size=8, hidden_size_list=[ 100,1000,100, ], output_size=1, ) optimizer = Adam(lr=learning_rate) train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 #学習 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] y_batch = y_train[batch_mask] grads = network.gradient(x_batch, y_batch)
x, t, x_submission = hp_data.load(scale=True, label_log10=True, non_nan_ratio=0.8) print('x.shape:', x.shape) feature_count = x.shape[-1] train_num = 1450 train_x, train_y, test_x, test_y = x[:train_num, :], t[:train_num, :], x[ train_num:, :], t[train_num:, :] max_iterations = 30000 batch_size = 128 # initialize network optimizer weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} # optimizer = SGD(lr=0.01) optimizer = Adam(lr=1e-3) # network = MultiLayerRegression(input_size=feature_count, hidden_size_list=[100, 100, 100, 300], output_size=1, # weight_init_std='relu', activation='relu', # weight_decay_lambda=1e-4, # use_dropout=True, dropout_ratio=0.2, # use_batchnorm=True) network = MultiLayerRegression(input_size=feature_count, hidden_size_list=[300, 200, 100, 10], output_size=1, weight_init_std='relu', activation='relu', weight_decay_lambda=1e-4, use_dropout=True, dropout_ratio=0.3, use_batchnorm=True)
[0 0 0 0 1 0 0] [0 1 0 0 0 0 0] [0 0 0 0 0 1 0]] contexts: [[[1 0 0 0 0 0 0] [0 0 1 0 0 0 0]] [[0 1 0 0 0 0 0] [0 0 0 1 0 0 0]] [[0 0 1 0 0 0 0] [0 0 0 0 1 0 0]] [[0 0 0 1 0 0 0] [0 1 0 0 0 0 0]] [[0 0 0 0 1 0 0] [0 0 0 0 0 1 0]] [[0 1 0 0 0 0 0] [0 0 0 0 0 0 1]]]""" model = SimpleCBOW(vocab_size, hidden_size) optimizier = Adam() trainer = Trainer(model, optimizier) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot()
def df(x, y): return x / 10.0, 2.0*y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers["SGD"] = SGD(lr=0.95) optimizers["Momentum"] = Momentum(lr=0.1) optimizers["AdaGrad"] = AdaGrad(lr=1.5) optimizers["Adam"] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y']) grads['x'], grads['y'] = df(params['x'], params['y']) optimizer.update(params, grads)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--D', '-d', type=int, default=8, help='Dimension of feature vector') parser.add_argument('--T', '-t', type=int, default=2, help='Max step of aggregation') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of training dataset') parser.add_argument('--batch', '-b', type=int, default=256, help='batch size') parser.add_argument('--flag', '-f', action='store_true', help='make prediction file') args = parser.parse_args() train_H, train_y, train_node_size = get_train() seed = 1996 train_H, train_y, val_H, val_y, train_node_size, val_node_size = shuffle_split(train_H, train_y, train_node_size, split_size=0.7, seed=seed) # feature dimension D = args.D # step size T = args.T # learning rate alpha = 0.015 # epoch size max_epoch = args.epoch # batch size batch_size = args.batch # get step per epoch train_size = len(train_H) iter_per_epoch = train_size//batch_size if (train_size%batch_size) == 0 else (train_size//batch_size)+1 make_pred = args.flag ## make feature vector(train) train_x = get_feature(D, train_H, train_node_size) ## make feature vector(validation) val_x = get_feature(D, val_H, val_node_size) model = GNN(D, T) optimizer = Adam(alpha=alpha, beta1=0.9, beta2=0.999, eps=1e-8) train_loss_list = [] train_acc_list = [] val_loss_list = [] val_acc_list = [] for epoch in range(max_epoch): np.random.seed(int(epoch*1234)) shuffle_idx = np.random.permutation(train_H.shape[0]) train_H = train_H[shuffle_idx] train_x = train_x[shuffle_idx] train_y = train_y[shuffle_idx] for num in range(iter_per_epoch): if train_size > (num+1)*batch_size: batch_H = train_H[num*batch_size:(num+1)*batch_size] batch_x = train_x[num*batch_size:(num+1)*batch_size] batch_y = train_y[num*batch_size:(num+1)*batch_size] else: batch_H = train_H[num*(batch_size):] batch_x = train_x[num*(batch_size):] batch_y = train_y[num*(batch_size):] # get batch gradient and update parameters batch_grads = None for idx in range(len(batch_H)): grad = model.get_gradient(batch_x[idx], batch_H[idx], batch_y[idx]) if batch_grads == None: batch_grads = {} for key, val in grad.items(): batch_grads[key] = np.zeros_like(val) for key in grad.keys(): batch_grads[key] += (grad[key] / len(batch_H)) optimizer.update(model.params, batch_grads) # train loss and average accuracy loss = 0 train_pred = np.zeros((len(train_y), 1)) for idx in range(len(train_H)): loss += model.loss(train_x[idx], train_H[idx], train_y[idx]) / len(train_H) predict = 0 if model.predict(train_x[idx], train_H[idx]) < 1/2 else 1 train_pred[idx] = predict train_score = avg_acc(train_y, train_pred) # validation loss and average accuracy val_loss = 0 val_pred = np.zeros((len(val_y), 1)) for idx in range(len(val_H)): val_loss += model.loss(val_x[idx], val_H[idx], val_y[idx]) / len(val_H) predict = 0 if model.predict(val_x[idx], val_H[idx]) < 1/2 else 1 val_pred[idx] = predict val_score = avg_acc(val_y, val_pred) print('epoch:{} loss:{:.5f} val_loss:{:.5f} avg_acc:{:.5f} val_avg_acc:{:.5f}'.format(epoch+1, loss, val_loss, train_score, val_score)) train_loss_list.append(loss) val_loss_list.append(val_loss) train_acc_list.append(train_score) val_acc_list.append(val_score) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,4)) x = np.arange(len(train_loss_list)) ax1.plot(x, train_loss_list, label='train') x = np.arange(len(val_loss_list)) ax1.plot(x, val_loss_list, label='validation') ax1.legend() ax1.set_xlabel('epoch') ax1.set_ylabel('loss') x = np.arange(len(train_acc_list)) ax2.plot(x, train_acc_list, label='train') x = np.arange(len(val_acc_list)) ax2.plot(x, val_acc_list, label='validation') ax2.legend() ax2.set_xlabel('epoch') ax2.set_ylabel('average accuracy') fig.savefig('src/graph/GNN_Adam.png') plt.close() if make_pred: ## predict test data test_H, test_node_size = get_test() ## make feature vector(test) test_x = get_feature(D, test_H, test_node_size) with open('prediction.txt', mode='w') as f: for idx in range(len(test_node_size)): predict = 0 if model.predict(test_x[idx], test_H[idx]) < 1/2 else 1 f.write('{}'.format(predict) + '\n')
max_grad = 5.0 x_test, x_train = preprocessing.divide_test_train(x_train, test_rate=0.1) t_test, t_train = preprocessing.divide_test_train(t_train, test_rate=0.1) model = Transformer(vocab_size, wordvec_size, head_size, num_heads=8, num_encoders=1, num_decoders=1) if os.path.isfile("../pkl/myTransformer_params.pkl"): model.load_params("../pkl/myTransformer_params.pkl") optimizer = Adam(lr=0.00001) # optimizer = SGD(lr=0.00005) # optimizer = RMSprop(lr=0.00005) trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad, eval_interval=10) model.save_params('../pkl/myTransformer_params.pkl') correct_num = 0
ts = corpus[1:] # ハイパーパラメータの設定 vocab_size = len(word_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 1 max_epoch = 50 max_grad = 5.0 sample_size = 100 lr = 0.001 time_size = 35 #モデルの生成 model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = RnnlmTrainer(model, optimizer) #学習 best_ppl = float('inf') t1 = time.time() for epoch in range(max_epoch): trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model, corpus) print('valid perplexity: ', ppl) if best_ppl > ppl: best_ppl = ppl model.save_params()
def df(x, y): return x / 10.0, 2.0 * y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers['SGD'] = SGD(lr=0.95) optimizers['Momentum'] = Momentum(lr=0.1) optimizers['AdaGrad'] = AdaGrad(lr=1.5) optimizers['Adam'] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y']) grads['x'], grads['y'] = df(params['x'], params['y']) optimizer.update(params, grads)