def train_rnnlm(): batch_size = 20 wordvec_size = 100 hidden_size = 100 time_size = 35 lr = 20.0 max_epoch = 4 max_grad = 0.25 corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = RNNLM(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RNNLMTrainer(model, optimizer) trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad) trainer.plot(ylim=(0, 500)) model.reset_state() ppl_test = eval_perplexity(model, corpus_test) print(f'test perplexity: {ppl_test}') model.save_params()
def train(): window_size = 5 hidden_size = 100 batch_size = 100 max_epoch = 10 corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) model = CBOW(vocab_size, hidden_size, window_size, corpus) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size, None) trainer.plot() word_vecs = model.word_vecs params = {} params['word_vecs'] = word_vecs.astype(np.float16) params['word_to_id'] = word_to_id params['id_to_word'] = id_to_word pkl_file = 'cbow_params.pkl' with open(pkl_file, 'wb') as f: pickle.dump(params, f, -1)
def train_better_rnnlm(): batch_size = 20 wordvec_size = 650 hidden_size = 650 time_size = 35 lr = 20.0 max_epoch = 40 max_grad = 0.25 dropout = 0.5 corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = BetterRNNLM(vocab_size, wordvec_size, hidden_size, dropout) optimizer = SGD(lr) trainer = RNNLMTrainer(model, optimizer) best_ppl = float('inf') for epoch in range(max_epoch): trainer.fit(xs, ts, 1, batch_size, time_size, max_grad) model.reset_state() ppl = eval_perplexity(model, corpus_test) print(f'test perplexity: {ppl}') if ppl > best_ppl: best_ppl = ppl model.save_params('better_RNN_params.pkl') else: lr /= 4.0 optimizer.lr = lr model.reset_state()
def main(): window_size = 2 wordvec_size = 100 corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) C = create_co_matrix(corpus, vocab_size, window_size=window_size) W = ppmi(C, verbose=True) U, S, V = randomized_svd(W, wordvec_size, n_iter=5, random_state=None) queries = ['you', 'year', 'car', 'toyota'] for query in queries: most_sililar(query, word_to_id, id_to_word, U, top=5)
def main(): batch_size = 10 wordvec_size = 100 hidden_size = 100 time_size = 5 lr = 0.1 max_epoch = 100 corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_size = 1000 corpus = corpus[:corpus_size] vocab_size = int(max(corpus) + 1) xs = corpus[:-1] ts = corpus[1:] model = SimpleRNNLM(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RNNLMTrainer(model, optimizer) trainer.fit(xs, ts, max_epoch, batch_size, time_size) trainer.plot()
def generate_text(): corpus, word_to_id, id_to_word = ptb.load_data('train') start_word = 'you' start_id = word_to_id[start_word] skip_words = ['N', '<unk>', '$'] skip_ids = [word_to_id[word] for word in skip_words] print('-' * 50) model = GenRNNLM() model.load_params() word_ids = model.generate(start_id, skip_ids) text = ' '.join([id_to_word[word_id] for word_id in word_ids]) text = text.replace(' <eos> ', '.\n') print(text) print('-' * 50) model = GenBetterRNNLM() model.load_params() word_ids = model.generate(start_id, skip_ids) text = ' '.join([id_to_word[word_id] for word_id in word_ids]) text = text.replace(' <eos> ', '.\n') print(text) print('-' * 50) model.reset_state() for word in ('the meaning of life is').split(): x = np.array(word_to_id[word]).reshape(1, 1) if word == 'is': start_id = word_to_id[word] word_ids = model.generate(start_id, skip_ids) else: model.predict(x) print('the meaning of life is ?') text = ' '.join([id_to_word[word_id] for word_id in word_ids[1:]]) text = text.split('<eos>')[0] print(text)
from commons.optimizer import SGD from datasets import ptb from simple_rnnlm import SimpleRnnlm import matplotlib.pyplot as plt import numpy as np # set hyperparameters batch_size = 10 wordvec_size = 100 hidden_size = 100 # number of hidden vectors in RNN time_size = 5 # spread time area at Truncated BPTT once lr = 0.1 max_epoch = 100 # read datasets(first 1000) corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_size = 1000 corpus = corpus[:corpus_size] vocab_size = int(max(corpus) + 1) xs = corpus[:-1] # input ts = corpus[1:] # output(answer) data_size = len(xs) print(f'Corpus Size: {corpus_size}, Number of Vocab: {vocab_size}') # variables for learning max_iters = data_size // (batch_size * time_size) time_idx = 0 total_loss = 0 loss_count = 0 ppl_list = []
from commons.trainer import RnnlmTrainer from commons.util import eval_perplexity from datasets import ptb from rnnlm import RNNLM # set hyperparameters batch_size = 20 wordvec_size = 100 hidden_size = 100 # number of elements in hideen layers of RNN time_size = 35 # unfold size of RNN lr = 20.0 max_epoch = 4 max_grad = 0.25 # read train dataset corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] # generate model model = RNNLM(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) # train with gradient clipping trainer.fit(xs, ts, max_epoch, batch_size,
from rnnlm import RNNLM from better_rnnlm import BetterRNNLM from datasets import ptb from commons.util import eval_perplexity if __name__ == '__main__': # select model for evaluation model = RNNLM() # model = BetterRNNLM() # read tunned params model.load_params() corpus, _, _ = ptb.load_data('test') model.reset_state() ppl_test = eval_perplexity(model, corpus) print('Test Perplexity:', ppl_test)