xs = corpus[:-1] # input ts = corpus[1:] # output(answer) data_size = len(xs) print(f'Corpus Size: {corpus_size}, Number of Vocab: {vocab_size}') # variables for learning max_iters = data_size // (batch_size * time_size) time_idx = 0 total_loss = 0 loss_count = 0 ppl_list = [] # init model model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) # calculate start reading point of each sample of mini batch jump = (corpus_size - 1) // batch_size offsets = [i * jump for i in range(batch_size)] for epoch in range(max_epoch): for iter in range(max_iters): # get mini batch batch_x = np.empty((batch_size, time_size), dtype='i') batch_t = np.empty((batch_size, time_size), dtype='i') for t in range(time_size): for i, offset in enumerate(offsets): batch_x[i, t] = xs[(offset + time_idx) % data_size] batch_t[i, t] = ts[(offset + time_idx) % data_size] time_idx += 1
from two_layer_net import TwoLayerNet from commons.optimizer import SGD from datasets import spiral import matplotlib.pyplot as plt import numpy as np # set hyperparameters max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 # read data and set model, optimizer x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) # variables for learning data_size = len(x) max_iters = data_size // batch_size total_loss = 0 loss_count = 0 loss_list = [] for epoch in range(max_epoch): # suffle datas for mini batch idx = np.random.permutation(data_size) x = x[idx] t = t[idx] for iters in range(max_iters):
hidden_size = 100 # number of elements in hideen layers of RNN time_size = 35 # unfold size of RNN lr = 20.0 max_epoch = 4 max_grad = 0.25 # read train dataset corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] # generate model model = RNNLM(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) # train with gradient clipping trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad, eval_interval=20) trainer.plot(ylim=(0, 500)) # evaluate with test dataset model.reset_state() ppl_test = eval_perplexity(model, corpus_test)
from commons.optimizer import SGD from commons.trainer import Trainer from datasets import spiral from two_layer_net import TwoLayerNet # set hyperparameters max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) trainer = Trainer(model, optimizer) trainer.fit(x, t, max_epoch, batch_size, eval_interval=10) trainer.plot()
# read train dataset corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_val, _, _ = ptb.load_data('val') corpus_test, _, _ = ptb.load_data('test') if config.GPU: corpus = to_gpu(corpus) corpus_val = to_gpu(corpus_val) corpus_test = to_gpu(corpus_test) vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = BetterRNNLM(vocab_size, wordvec_size, hidden_size, dropout) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) best_ppl = float('inf') for epoch in range(max_epoch): trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model, corpus_val) print('Validation Perplexity:', ppl) if best_ppl > ppl: best_ppl = ppl model.save_params() else:
return x**2 / 10 + y * y def df(x, y): return 2 * x / 10, 2 * y init_pos = (-7.0, 2.0) params = {} params["x"], params["y"] = init_pos[0], init_pos[1] grads = {} grads["x"], grads["y"] = 0, 0 optimizers = OrderedDict() optimizers["SGD"] = SGD(lr=0.7) optimizers["Momentum"] = Momentum(lr=0.1) optimizers["AdaGrad"] = AdaGrad(lr=1.5) optimizers["Adam"] = Adam(lr=0.3) idx = 1 # 遍历优化器 for key in optimizers: optimizer = optimizers[key] x_history = [] # 迭代优化参数列表 y_history = [] params["x"], params["y"] = init_pos[0], init_pos[1] # 构建优化器 for i in range(30): # 此处添加的就是每轮经过优化器优化过的参数 x_history.append(params["x"]) y_history.append(params["y"])