def runTest(n_layers, pre_modelFile, hidden_size, reverse, modelFile, beam_size, input, corpus, diff_corpus): voc, pairs = loadPrepareData(corpus) diff_voc, diff_pairs = loadPrepareData(diff_corpus) embedding = nn.Embedding(300, hidden_size) #----------------------------------------------------------------- #my code ''' EMBEDDING_DIM = 300 #Should be the same as hidden_size! if EMBEDDING_DIM != hidden_size: sys.exit("EMBEDDING_DIM do not equal to hidden_size. Please correct it.") CONTEXT_SIZE = 2 pre_checkpoint = torch.load(pre_modelFile) pretrained_model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) pretrained_model.load_state_dict(pre_checkpoint['w2v']) pretrained_model.train(False) embedding = pretrained_model ''' if USE_CUDA: embedding = embedding.cuda() #----------------------------------------------------------------- encoder = EncoderRNN(300, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) if USE_CUDA: checkpoint = torch.load(modelFile) else: checkpoint = torch.load(modelFile, map_location='cpu') encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() print('Loading w2v_model ...') logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) w2v_model = gensim.models.KeyedVectors.load_word2vec_format(pre_modelFile, binary=True) print("Loading complete!") if input: evaluateInput(encoder, decoder, voc, beam_size, w2v_model) else: evaluateRandomly(encoder, decoder, voc, diff_pairs, reverse, beam_size, w2v_model, 20)
def draw_2D_word_vector(modelFile, corpus, EMBEDDING_DIM, CONTEXT_SIZE, frequency_boundary, batch_size): checkpoint = torch.load(modelFile) voc, pairs = loadPrepareData(corpus) model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) model.load_state_dict(checkpoint['w2v']) model.train(False); new_word = voc.index2word[0] labels = [new_word] new_word = np.array(get_word_vector(model, new_word, voc, EMBEDDING_DIM).data) vectors2D = np.array([new_word]) start_word = 0 index2vector = {start_word:new_word} below1000_count = 0 #frequency_boundary = 1600 for i in range(voc.n_words): new_word = voc.index2word[i] if voc.word2count[new_word] <= frequency_boundary: below1000_count += 1 else: labels.append(new_word) new_word = np.array(get_word_vector(model, new_word, voc, EMBEDDING_DIM).data) vectors2D = np.concatenate((vectors2D, [new_word]), axis = 0) index2vector[i] = [new_word] print("{} words out of {} words are in low frequency({} times).".format(\ below1000_count, voc.n_words, frequency_boundary)) print("{} words left".format(voc.n_words - below1000_count)) print("Shape of vectors2D: {}".format(vectors2D.shape)) file_name = 'b{}vectors2D.png'.format(frequency_boundary) iteration = os.path.split(modelFile)[-1].split('_')[0] tsne(corpus, voc.n_words, vectors2D, labels, file_name, iteration, batch_size, EMBEDDING_DIM)
def prep_net(): modelFile = './save/model/movie_subtitles/1-1_512/50000_backup_bidir_model.tar' corpus = './corpus/movie_subtitles.txt' n_iteration = 10000 n_layers, hidden_size, reverse = parseFilename(modelFile, True) beam_size = 1 torch.set_grad_enabled(False) voc, pairs = loadPrepareData(corpus) embedding = nn.Embedding(voc.n_words, hidden_size) encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) checkpoint = torch.load(modelFile, map_location='cpu') encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) encoder = encoder.to(device) decoder = decoder.to(device) return beam_size, encoder, decoder, voc
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, input, corpus): voc, pairs = loadPrepareData(corpus) embedding = nn.Embedding(voc.n_words, hidden_size) encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) checkpoint = torch.load(modelFile) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() if input: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, inp, corpus): # TODO:beam_size控制每个输入的回答个数.beamsize不为一时EOS符号也会输出?? torch.set_grad_enabled(False) voc, pairs = loadPrepareData(corpus) embedding = nn.Embedding(voc.n_words, hidden_size) #nn.Embedding可以训练,完成把X空间转换到(嵌入到)Y空间,nn.Embedding(voc.n_words, hidden_size)表示输入元素(单词)个数为voc.n_words,嵌入到hidden_size维度的空间 #即把每一个词从onehot编码转化为每个词用hidden_size维向量表示 encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) checkpoint = torch.load(modelFile) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) encoder = encoder.to(device) decoder = decoder.to(device) if inp: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
def runTest(n_layers, hidden_size, batch_size, reverse, modelFile, beam_size, inp, corpus): torch.set_grad_enabled(False) voc, pairs = loadPrepareData(corpus) encoder = EncoderRNN(hidden_size, batch_size, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, hidden_size, batch_size, voc.loc_count, n_layers) checkpoint = torch.load(modelFile) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) encoder = encoder.to(device) decoder = decoder.to(device) if inp: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
def runTest(n_layers, pre_modelFile, hidden_size, reverse, modelFile, beam_size, input, corpus, diff_corpus): voc, pairs = loadPrepareData(corpus) diff_voc, diff_pairs = loadPrepareData(diff_corpus) #embedding = nn.Embedding(voc.n_words, hidden_size) #----------------------------------------------------------------- #my code EMBEDDING_DIM = 300 #Should be the same as hidden_size! if EMBEDDING_DIM != hidden_size: sys.exit("EMBEDDING_DIM do not equal to hidden_size. Please correct it.") CONTEXT_SIZE = 2 pre_checkpoint = torch.load(pre_modelFile) pretrained_model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) pretrained_model.load_state_dict(pre_checkpoint['w2v']) pretrained_model.train(False) embedding = pretrained_model if USE_CUDA: embedding = embedding.cuda() #----------------------------------------------------------------- encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) if USE_CUDA: checkpoint = torch.load(modelFile) else: checkpoint = torch.load(modelFile, map_location='cpu') encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False); decoder.train(False); if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() if input: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, diff_pairs, reverse, beam_size, 20)
def load(filename, reverse=False, n_iteration=1, batch_size=64): voc, pairs = loadPrepareData(filename) # training data corpus_name = os.path.split(filename)[-1].split('.')[0] training_batches = None print('Training pairs not found, generating ...') training_batches = [ batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)], reverse) for _ in range(n_iteration) ] print(len(training_batches)) print(training_batches)
def test_word_vector(modelFile, corpus, EMBEDDING_DIM, CONTEXT_SIZE): checkpoint = torch.load(modelFile) voc, pairs = loadPrepareData(corpus) model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) model.load_state_dict(checkpoint['w2v']) model.train(False) while(1): test_word = input('>') if test_word == 'q': break else: try: embeds = get_word_vector(model, voc.index2word[int(test_word)], voc, EMBEDDING_DIM) print("Word freauency of '{}': {}".format(voc.index2word[int(test_word)], \ voc.word2count[voc.index2word[int(test_word)]])) except KeyError: print("This index is vacant.") except ValueError: print("Please input an index.")
def draw_manually(modelFile, corpus, EMBEDDING_DIM, CONTEXT_SIZE, frequency_boundary, batch_size): checkpoint = torch.load(modelFile) voc, pairs = loadPrepareData(corpus) model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) model.load_state_dict(checkpoint['w2v']) model.train(False); words = input("Input space-separated words: ").split() labels = [words[0]] new_word = np.array(get_word_vector(model, words[0], voc, EMBEDDING_DIM).data) vectors2D = np.array([new_word]) for w in words[1:]: labels.append(w) new_word = np.array(get_word_vector(model, w, voc, EMBEDDING_DIM).data) vectors2D = np.concatenate((vectors2D, [new_word]), axis = 0) print("Shape of vectors2D: {}".format(vectors2D.shape)) file_name = 'manually_{}2{}.png'.format(words[0], words[-1]) iteration = os.path.split(modelFile)[-1].split('_')[0] tsne(corpus, len(words), vectors2D, labels, file_name, iteration, batch_size, EMBEDDING_DIM)
def test_vector_relation(modelFile, corpus, EMBEDDING_DIM, CONTEXT_SIZE): checkpoint = torch.load(modelFile) voc, pairs = loadPrepareData(corpus) model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) model.load_state_dict(checkpoint['w2v']) model.train(False) word1, word2, word3, word4 = "heaven", "hell", "good", "cat" word4 = input('>') test_word1 = np.array(get_word_vector(model, word1, voc, EMBEDDING_DIM).data) test_word2 = np.array(get_word_vector(model, word2, voc, EMBEDDING_DIM).data) test_word3 = np.array(get_word_vector(model, word3, voc, EMBEDDING_DIM).data) test_word4 = np.array(get_word_vector(model, word4, voc, EMBEDDING_DIM).data) #test_word4_like = test_word3 - (test_word1 - test_word2) test_word4_like = test_word4 _1st, _2nd, _3rd, _4th = 99999999, 99999999, 99999999, 99999999 i_1st, i_2nd, i_3rd, i_4th = -1, -1, -1, -1 for i in tqdm(range(0, voc.n_words)): i_vector = np.array(get_word_vector(model, voc.index2word[i], voc, EMBEDDING_DIM).data) distance = ((i_vector - test_word4_like) ** 2).mean(axis=None) #print(distance) if distance < _1st: _4th, _3rd, _2nd, _1st = _3rd, _2nd, _1st, distance i_4th, i_3rd, i_2nd, i_1st = i_3rd, i_2nd, i_1st, i #print("1st index:", i) elif distance < _2nd: _4th, _3rd, _2nd = _3rd, _2nd, distance i_4th, i_3rd, i_2nd = i_3rd, i_2nd, i elif distance < _3rd: _4th, _3rd = _3rd, distance i_4th, i_3rd = i_3rd, i elif distance < _4th: _4th = distance i_4th = i _1st_word = voc.index2word[i_1st] _2nd_word = voc.index2word[i_2nd] _3rd_word = voc.index2word[i_3rd] _4th_word = voc.index2word[i_4th] print("Most likely words of {}: {} > {} > {} > {} > other_words".format(word4, _1st_word, _2nd_word, _3rd_word, _4th_word))
def trainWord2vec(corpus, iteration, hidden_size, frequency_boundary): voc, pairs = loadPrepareData(corpus) corpus_name = os.path.split(corpus)[-1].split('.')[0] sentences = [] for pair in pairs: sentences.append(pair[0].split(' ')) print("Sentences ready, start training...") model = Word2Vec(iter=iteration, size=hidden_size, window=10, min_count=frequency_boundary, workers=4) model.build_vocab(sentences) model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) directory = os.path.join( save_dir, 'model', corpus_name, 'gensim', 'hi{}fb{}'.format(hidden_size, frequency_boundary)) if not os.path.exists(directory): os.makedirs(directory) model.save(os.path.join(directory, 'mymodel{}'.format(iteration)))
def runTest(n_layers, hidden_size, reverse, modelFile, attn_model, beam_size, k, p, v, inp, corpus): torch.set_grad_enabled(False) voc, pairs = loadPrepareData(corpus) embedding = nn.Embedding(voc.num_words, hidden_size) if v: embedding_decoder = nn.Embedding(voc.num_words, hidden_size * 2) encoder = EncoderRNN(hidden_size, embedding, n_layers) decoder = DecoderRNN(attn_model, embedding_decoder, hidden_size * 2, voc.num_words, n_layers) hidvar = LatentVariation(hidden_size * 2, hidden_size) hidvar.load_state_dict(checkpoint['hv']) hidvar = hidvar.to(device) else: encoder = EncoderRNN(voc.num_words, hidden_size, embedding, n_layers) attn_model = attn_model decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, n_layers) checkpoint = torch.load(modelFile) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) encoder = encoder.to(device) decoder = decoder.to(device) if inp: evaluateInput(encoder, decoder, hidvar, voc, beam_size, k, p, hidvar) else: evaluateScore(encoder, decoder, hidvar, voc, pairs, reverse, beam_size, hidvar)
def runTest(corpus, rnn_layers, hidden_size, embed_size, node_size, capsule_size, gcn_layers, gcn_filters, capsule_num, saved_aspect_model, saved_review_model, beam_size, max_length, min_length, save_dir): vocabs, train_pairs, valid_pairs, test_pairs = loadPrepareData(corpus, save_dir) print('Building aspect model ...') aspect_model = AspectModel(vocabs, embed_size, node_size, hidden_size, capsule_size, gcn_layers, gcn_filters, rnn_layers, capsule_num).to(device) print('Building review model ...') review_model = ReviewModel(vocabs, embed_size, node_size, hidden_size, rnn_layers).to(device) checkpoint = torch.load(saved_aspect_model) aspect_model.load_state_dict(checkpoint['aspect_model']) checkpoint = torch.load(saved_review_model) review_model.load_state_dict(checkpoint['review_model']) # train mode set to false, effect only on dropout, batchNorm aspect_model.train(False) review_model.train(False) evaluateRandomly(aspect_model, review_model, vocabs, test_pairs, len(test_pairs), beam_size, max_length, min_length, save_dir)
def predict_word(modelFile, corpus, EMBEDDING_DIM, CONTEXT_SIZE): checkpoint = torch.load(modelFile) voc, pairs = loadPrepareData(corpus) model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) model.load_state_dict(checkpoint['w2v']) model.train(False) print("Please input 2 space-separated words(input 'q' to exit)") while(1): test_word = input('>') test_word = test_word.split() if test_word[0] == 'q': break if len(test_word) != 2: print("You should input 2 words!") else: try: test_word_idxs = [voc.word2index[w] for w in test_word] test_word_var = Variable(torch.LongTensor(test_word_idxs)) log_probs, embeds = model(test_word_var) _, i_predicted_word = torch.max(log_probs, 1) print("The next word of '{} {}' is '{}'".format(test_word[0], test_word[1], voc.index2word[i_predicted_word.data[0]])) except KeyError: print("Incorrect spelling or unseen word.")
def trainIters(n_iteration, learning_rate, batch_size, n_layers, hidden_size, attn_model='dot', decoder_learning_ratio=5.0): voc, pairs = loadPrepareData() choise = [random.choice(pairs) for _ in range(batch_size)] training_batches = [ batch2TrainData(voc, choise) for _ in range(n_iteration) ] # model checkpoint = None print('Building encoder and decoder ...') encoder = EncoderRNN(voc, hidden_size, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(voc, attn_model, hidden_size, n_layers) # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) # initialize print('Initializing ...') start_iteration = 1 perplexity = [] print_loss = 0 for iteration in tqdm(range(start_iteration, n_iteration + 1)): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, inp, corpus): voc, pairs = loadPrepareData(corpus) embedding = nn.Embedding(voc.n_words, hidden_size) encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) checkpoint = torch.load(modelFile) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False); decoder.train(False); if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() if inp: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
def trainIters(args, corpus, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, print_every, loadFilename=None, attn_model='dot', decoder_learning_ratio=1.0): print(args) currentDT = datetime.datetime.now() directory = os.path.join( save_dir, args.corpus, 'model', '{}_{}_{}'.format(n_layers, hidden_size, currentDT.strftime('%Y-%m-%d-%H:%M:%S'))) print(directory) print( "corpus: {}, reverse={}, n_epoch={}, learning_rate={}, batch_size={}, n_layers={}, hidden_size={}, decoder_learning_ratio={}" .format(corpus, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, decoder_learning_ratio)) data, length = loadPrepareData(args) print('load data...') print(len(data.train)) print(len(data.dev)) print(len(data.test)) exit(0) user_length, item_length = length #, user_length2, item_length2 = length train_batches = batchify(data.train, data.user_text, user_length, data.item_text, item_length, batch_size, train_mask_idx=data.train_mask_idx, shuffle=True) val_batches = batchify(data.dev, data.user_text, user_length, data.item_text, item_length, batch_size) test_batches = batchify(data.test, data.user_text, user_length, data.item_text, item_length, batch_size) # model checkpoint = None print('Building encoder and decoder ...') embedding = nn.Embedding(data.voc.n_words, hidden_size) encoderU = EncoderRNNlinear(data.voc.n_words, hidden_size, embedding, data.dmax, n_layers, args.encoder_dropout) encoderB = EncoderRNNlinear(data.voc.n_words, hidden_size, embedding, data.dmax, n_layers, args.encoder_dropout) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, data.voc.n_words, n_layers, args.decoder_dropout) if loadFilename: checkpoint = torch.load(loadFilename) encoderU.load_state_dict(checkpoint['enU']) encoderB.load_state_dict(checkpoint['enB']) decoder.load_state_dict(checkpoint['de']) # use cuda if USE_CUDA: encoderU = encoderU.cuda() encoderB = encoderB.cuda() decoder = decoder.cuda() # optimizer print('Building optimizers ...') encoderU_optimizer = optim.Adam(encoderU.parameters(), lr=learning_rate) encoderB_optimizer = optim.Adam(encoderB.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoderU_optimizer.load_state_dict(checkpoint['enU_opt']) encoderB_optimizer.load_state_dict(checkpoint['enB_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # initialize print('Initializing ...') start_epoch = 0 perplexity = [] best_val_loss = None print_loss = 0 if loadFilename: start_epoch = checkpoint['epoch'] + 1 perplexity = checkpoint['plt'] for epoch in range(start_epoch, n_epoch): epoch_start_time = time.time() # train epoch encoderU.train() encoderB.train() decoder.train() print_loss = 0 start_time = time.time() for batch, training_batch in enumerate(train_batches): input_variable, lengths, target_variable, mask, max_target_len = training_batch user_input_variable, business_input_variable = input_variable user_lengths, business_lengths = lengths if batch + 5 % 1000 == 5: print("user_lengths: ", user_lengths) loss = train(user_input_variable, business_input_variable, user_lengths, business_lengths, target_variable, mask, max_target_len, encoderU, encoderB, decoder, embedding, encoderU_optimizer, encoderB_optimizer, decoder_optimizer, batch_size) print_loss += loss perplexity.append(loss) #print("batch {} loss={}".format(batch, loss)) if batch % print_every == 0 and batch > 0: cur_loss = print_loss / print_every elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_batches), learning_rate, elapsed * 1000 / print_every, cur_loss, math.exp(cur_loss))) print_loss = 0 start_time = time.time() # evaluate val_loss = 0 for val_batch in val_batches: input_variable, lengths, target_variable, mask, max_target_len = val_batch user_input_variable, business_input_variable = input_variable user_lengths, business_lengths = lengths loss = evaluate(user_input_variable, business_input_variable, user_lengths, business_lengths, target_variable, mask, max_target_len, encoderU, encoderB, decoder, embedding, encoderU_optimizer, encoderB_optimizer, decoder_optimizer, batch_size) val_loss += loss val_loss /= len(val_batches) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'epoch': epoch, 'enU': encoderU.state_dict(), 'enB': encoderB.state_dict(), 'de': decoder.state_dict(), 'enU_opt': encoderU_optimizer.state_dict(), 'enB_opt': encoderB_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(epoch, filename(reverse, 'expansion_model')))) best_val_loss = val_loss # Run on test data. test_loss = 0 for test_batch in test_batches: input_variable, lengths, target_variable, mask, max_target_len = test_batch user_input_variable, business_input_variable = input_variable user_lengths, business_lengths = lengths loss = evaluate(user_input_variable, business_input_variable, user_lengths, business_lengths, target_variable, mask, max_target_len, encoderU, encoderB, decoder, embedding, encoderU_optimizer, encoderB_optimizer, decoder_optimizer, batch_size) test_loss += loss test_loss /= len(test_batches) print('-' * 89) print('| test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('-' * 89) if val_loss > best_val_loss: # early stop break
def trainIters(corpus, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, print_every, loadFilename=None, attn_model='dot', decoder_learning_ratio=5.0): print( "corpus: {}, reverse={}, n_epoch={}, learning_rate={}, batch_size={}, n_layers={}, hidden_size={}, decoder_learning_ratio={}" .format(corpus, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, decoder_learning_ratio)) voc, pairs, valid_pairs, test_pairs = loadPrepareData(corpus) print('load data...') path = "data/attr2seq" # training data corpus_name = corpus training_batches = None try: training_batches = torch.load( os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'training_batches'), batch_size))) except FileNotFoundError: print('Training pairs not found, generating ...') training_batches = batchify(pairs, batch_size, voc, reverse) print('Complete building training pairs ...') torch.save( training_batches, os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'training_batches'), batch_size))) # validation/test data eval_batch_size = 10 try: val_batches = torch.load( os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'val_batches'), eval_batch_size))) except FileNotFoundError: print('Validation pairs not found, generating ...') val_batches = batchify(valid_pairs, eval_batch_size, voc, reverse, evaluation=True) print('Complete building validation pairs ...') torch.save( val_batches, os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'val_batches'), eval_batch_size))) try: test_batches = torch.load( os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'test_batches'), eval_batch_size))) except FileNotFoundError: print('Test pairs not found, generating ...') test_batches = batchify(test_pairs, eval_batch_size, voc, reverse, evaluation=True) print('Complete building test pairs ...') torch.save( test_batches, os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'test_batches'), eval_batch_size))) # model checkpoint = None print('Building encoder and decoder ...') embedding = nn.Embedding(voc.n_words, hidden_size) encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = DecoderRNN(embedding, hidden_size, voc.n_words, n_layers) if loadFilename: checkpoint = torch.load(loadFilename) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # use cuda if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(checkpoint['en_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # initialize print('Initializing ...') start_epoch = 0 perplexity = [] best_val_loss = None print_loss = 0 if loadFilename: start_epoch = checkpoint['epoch'] + 1 perplexity = checkpoint['plt'] for epoch in range(start_epoch, n_epoch): epoch_start_time = time.time() # train epoch encoder.train() decoder.train() print_loss = 0 start_time = time.time() for batch, training_batch in enumerate(training_batches): input_variable_attr, input_variable, lengths, target_variable, mask, max_target_len = training_batch loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size) print_loss += loss perplexity.append(loss) #print("batch{} loss={}".format(batch, loss)) if batch % print_every == 0 and batch > 0: cur_loss = print_loss / print_every elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(training_batches), learning_rate, elapsed * 1000 / print_every, cur_loss, math.exp(cur_loss))) print_loss = 0 start_time = time.time() # evaluate val_loss = 0 for val_batch in val_batches: input_variable_attr, input_variable, lengths, target_variable, mask, max_target_len = val_batch loss = evaluate(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, eval_batch_size) val_loss += loss val_loss /= len(val_batches) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: directory = os.path.join(save_dir, 'model', '{}_{}'.format(n_layers, hidden_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'epoch': epoch, 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(epoch, filename(reverse, 'text_decoder_model')))) best_val_loss = val_loss # Run on test data. test_loss = 0 for test_batch in test_batches: input_variable_attr, input_variable, lengths, target_variable, mask, max_target_len = test_batch loss = evaluate(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, eval_batch_size) test_loss += loss test_loss /= len(test_batches) print('-' * 89) print('| test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('-' * 89) if val_loss > best_val_loss: break
def train_word_vector(corpus, n_iteration, hidden_size, context_size, learning_rate, batch_size, loadFilename=None): voc, pairs = loadPrepareData(corpus) corpus_name = os.path.split(corpus)[-1].split('.')[0] CONTEXT_SIZE = context_size EMBEDDING_DIM = hidden_size try: trigrams = torch.load(os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ 'training_batches', \ batch_size))) except FileNotFoundError: test_sentence = [] for i in range(batch_size): pair = random.choice(pairs) test_sentence.append(pair[0].split()) test_sentence[i].insert(0,"SOS") test_sentence[i].append("EOS") #print(test_sentence[:3]) trigrams = [] for j in range(len(test_sentence)): for i in range(len(test_sentence[j]) - 2): trigram = ([test_sentence[j][i], test_sentence[j][i + 1]], test_sentence[j][i + 2]) trigrams.append(trigram) torch.save(trigrams, os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ 'training_batches', \ batch_size))) #print the first 3, just so you can see what they look like #print(trigrams[:30]) #print(voc.n_words()) #vocab = set(test_sentence) #word_to_ix = {word: i for i, word in enumerate(vocab)} losses = [] loss_function = nn.NLLLoss() model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) if loadFilename: checkpoint = torch.load(loadFilename) model.load_state_dict(checkpoint['w2v']) optimizer = optim.SGD(model.parameters(), lr=learning_rate) if loadFilename: optimizer.load_state_dict(checkpoint['w2v_opt']) print("There are {} trigrams.".format(len(trigrams))) print("Total {} iterations.".format(n_iteration)) start_iteration = 1 if loadFilename: start_iteration = checkpoint['iteration'] + 1 losses = checkpoint['losses'] print("{} iterations left...".format(n_iteration - start_iteration + 1)) for iteration in tqdm(range(start_iteration, n_iteration + 1)): total_loss = torch.Tensor([0]) for context, target in trigrams: # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words # into integer indices and wrap them in variables) context_idxs = [] for w in context: if w not in voc.word2index: context_idxs.append(voc.word2index['UNK']) else: context_idxs.append(voc.word2index[w]) #context_idxs = [voc.word2index[w] for w in context] context_var = Variable(torch.LongTensor(context_idxs)) # Step 2. Recall that torch *accumulates* gradients. Before passing in a # new instance, you need to zero out the gradients from the old # instance model.zero_grad() # Step 3. Run the forward pass, getting log probabilities over next # words log_probs, embeds = model(context_var) # Step 4. Compute your loss function. (Again, Torch wants the target # word wrapped in a variable) if target not in voc.word2index: target = 'UNK' loss = loss_function(log_probs, Variable( torch.LongTensor([voc.word2index[target]]))) # Step 5. Do the backward pass and update the gradient loss.backward() optimizer.step() total_loss += loss.data losses.append(total_loss) save_every = 500 if (iteration % save_every == 0): directory = os.path.join(save_dir, 'model', corpus_name, 'hi{}_ba{}'.format(hidden_size, batch_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'iteration': iteration, 'w2v': model.state_dict(), 'w2v_opt': optimizer.state_dict(), 'loss': loss, 'losses': losses }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'backup_w2v_model'))) print('\n') print("Training completed!") print('\n') print("Loss: {}".format(losses)) # The loss decreased every iteration over the training data! directory = os.path.join(save_dir, 'model', corpus_name, 'hi{}_ba{}'.format(hidden_size, batch_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'iteration': n_iteration, 'w2v': model.state_dict(), 'w2v_opt': optimizer.state_dict(), 'loss': loss, 'losses':losses }, os.path.join(directory, '{}_{}.tar'.format(n_iteration, 'backup_w2v_model')))
def trainIters(corpus, learning_rate, lr_decay_epoch, lr_decay_ratio, batch_size, n_layers, hidden_size, embed_size, attr_size, attr_num, overall, save_dir, loadFilename=None): print("corpus={}, learning_rate={}, lr_decay_epoch={}, lr_decay_ratio={}, batch_size={}, n_layers={}, \ hidden_size={}, embed_size={}, attr_size={}, attr_num={}, overall={}, save_dir={}" .format(corpus, learning_rate, \ lr_decay_epoch, lr_decay_ratio, batch_size, n_layers, hidden_size, embed_size, attr_size, attr_num, overall, save_dir)) print('load data...') vocab, train_pairs, valid_pairs, test_pairs = loadPrepareData( corpus, save_dir) print('finish load data...') data_path = os.path.join(save_dir, "batches") # training data corpus_name = corpus training_batches = None try: training_batches = torch.load( os.path.join(data_path, '{}_{}.tar'.format('training_batches', batch_size))) except FileNotFoundError: print('Training pairs not found, generating ...') training_batches = batchify(train_pairs, batch_size, vocab) print('Complete building training pairs ...') torch.save( training_batches, os.path.join(data_path, '{}_{}.tar'.format('training_batches', batch_size))) # validation/test data eval_batch_size = 10 try: val_batches = torch.load( os.path.join(data_path, '{}_{}.tar'.format('val_batches', eval_batch_size))) except FileNotFoundError: print('Validation pairs not found, generating ...') val_batches = batchify(valid_pairs, eval_batch_size, vocab, evaluation=True) # 测试不需要求导 print('Complete building validation pairs ...') torch.save( val_batches, os.path.join(data_path, '{}_{}.tar'.format('val_batches', eval_batch_size))) try: test_batches = torch.load( os.path.join(data_path, '{}_{}.tar'.format('test_batches', eval_batch_size))) except FileNotFoundError: print('Test pairs not found, generating ...') test_batches = batchify(test_pairs, eval_batch_size, vocab, evaluation=True) print('Complete building test pairs ...') torch.save( test_batches, os.path.join(data_path, '{}_{}.tar'.format('test_batches', eval_batch_size))) # aspect with open(os.path.join(save_dir, 'aspect_ids.pkl'), 'rb') as fp: ids = pickle.load(fp) # model checkpoint = None print('Building encoder and decoder ...') # topic encoder with open(os.path.join(save_dir, 'user.pkl'), 'rb') as fp: user_dict = pickle.load(fp) with open(os.path.join(save_dir, 'item.pkl'), 'rb') as fp: item_dict = pickle.load(fp) num_user = len(user_dict) num_item = len(item_dict) num_over = overall attr_embeddings = [] uemb = nn.Embedding(num_user, attr_size) attr_embeddings.append(uemb) iemb = nn.Embedding(num_item, attr_size) attr_embeddings.append(iemb) remb = from_pretrained( torch.cat( (torch.eye(num_over), torch.zeros(num_over, attr_size - num_over)), dim=1)) attr_embeddings.append(remb) if USE_CUDA: for attr_embedding in attr_embeddings: attr_embedding = attr_embedding.cuda() encoder = AttributeEncoder(attr_size, attr_num, hidden_size, attr_embeddings, n_layers) # sketch encoder sketch_embedding = nn.Embedding(vocab.n_sketchs, embed_size) if USE_CUDA: sketch_embedding = sketch_embedding.cuda() birnn_encoder = EncoderRNN(embed_size, hidden_size, sketch_embedding, n_layers) # review decoder topic_embedding = nn.Embedding(vocab.n_topics, embed_size) sketch_embedding = nn.Embedding(vocab.n_sketchs, embed_size) word_embedding = nn.Embedding(vocab.n_words, embed_size) if USE_CUDA: topic_embedding = topic_embedding.cuda() sketch_embedding = sketch_embedding.cuda() word_embedding = word_embedding.cuda() aspect_ids = nn.Embedding(vocab.n_topics - 3, 100) # remove [SOS] [EOS] [PAD] aspect_ids.weight.data.copy_(torch.from_numpy(np.array(ids))) aspect_ids.weight.requires_grad = False attn_model = 'dot' review_decoder = ReviewAttnDecoderRNN(topic_embedding, sketch_embedding, word_embedding, embed_size, hidden_size, attr_size, vocab.n_words, aspect_ids, n_layers) if loadFilename: checkpoint = torch.load(loadFilename) encoder.load_state_dict(checkpoint['encoder']) birnn_encoder.load_state_dict(checkpoint['birnn_encoder']) review_decoder.load_state_dict(checkpoint['review_decoder']) # use cuda if USE_CUDA: encoder = encoder.cuda() birnn_encoder = birnn_encoder.cuda() review_decoder = review_decoder.cuda() # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()), lr=learning_rate) birnn_encoder_optimizer = optim.Adam(filter(lambda p: p.requires_grad, birnn_encoder.parameters()), lr=learning_rate) review_decoder_optimizer = optim.Adam(filter(lambda p: p.requires_grad, review_decoder.parameters()), lr=learning_rate) if loadFilename: encoder_optimizer.load_state_dict(checkpoint['encoder_opt']) birnn_encoder_optimizer.load_state_dict( checkpoint['birnn_encoder_opt']) review_decoder_optimizer.load_state_dict( checkpoint['review_decoder_opt']) # initialize print('Initializing ...') step = 0 epoch = 0 perplexity = [] _loss = [] log_path = os.path.join('ckpt/' + corpus_name) if not os.path.exists(log_path): os.makedirs(log_path) writer = SummaryWriter(log_path) best_val_loss = None if loadFilename: step = checkpoint['step'] epoch = checkpoint['epoch'] + 1 perplexity = checkpoint['plt'] _loss = checkpoint['loss'] for i in range(len(_loss)): writer.add_scalar("Train/loss", _loss[i], i) writer.add_scalar("Train/perplexity", perplexity[i], i) while True: # learning rate adjust adjust_learning_rate(encoder_optimizer, epoch, learning_rate, lr_decay_epoch, lr_decay_ratio) adjust_learning_rate(birnn_encoder_optimizer, epoch, learning_rate, lr_decay_epoch, lr_decay_ratio) adjust_learning_rate(review_decoder_optimizer, epoch, learning_rate, lr_decay_epoch, lr_decay_ratio) # train epoch encoder.train() birnn_encoder.train() review_decoder.train() tr_loss = 0 for batch_idx, training_batch in enumerate(training_batches): attr_input, topic_input, sketch_output, review_input, review_output, mask = training_batch loss = train(attr_input, topic_input, sketch_output, review_input, review_output, mask, encoder, birnn_encoder, review_decoder, encoder_optimizer, birnn_encoder_optimizer, review_decoder_optimizer) step += 1 tr_loss += loss _loss.append(loss) perplexity.append(math.exp(loss)) writer.add_scalar("Train/loss", loss, step) writer.add_scalar("Train/perplexity", math.exp(loss), step) print( "epoch {} batch {} loss={} perplexity={} en_lr={:05.5f} bi_lr={:05.5f} de_lr={:05.5f}" .format(epoch, batch_idx, loss, math.exp(loss), encoder_optimizer.param_groups[0]['lr'], birnn_encoder_optimizer.param_groups[0]['lr'], review_decoder_optimizer.param_groups[0]['lr'])) cur_loss = tr_loss / len(training_batches) print('\n' + '-' * 30) print( 'train | epoch {:3d} | average loss {:5.5f} | average ppl {:8.3f}'. format(epoch, cur_loss, math.exp(cur_loss))) print('-' * 30) print_loss = 0 # evaluate vl_loss = 0 for val_batch in val_batches: attr_input, topic_input, sketch_output, review_input, review_output, mask = val_batch loss = evaluate(attr_input, topic_input, sketch_output, review_input, review_output, mask, encoder, birnn_encoder, review_decoder, encoder_optimizer, birnn_encoder_optimizer, review_decoder_optimizer) vl_loss += loss vl_loss /= len(val_batches) writer.add_scalar("Valid/loss", vl_loss, step) print('\n' + '-' * 30) print('valid | epoch {:3d} | valid loss {:5.5f} | valid ppl {:8.3f}'. format(epoch, vl_loss, math.exp(vl_loss))) print('-' * 30) # Save the model if the validation loss is the best we've seen so far. model_path = os.path.joion(save_dir, "model") if not best_val_loss or vl_loss < best_val_loss: directory = os.path.join( model_path, '{}_{}_{}'.format(n_layers, hidden_size, batch_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'step': step, 'epoch': epoch, 'encoder': encoder.state_dict(), 'encoder_opt': encoder_optimizer.state_dict(), 'birnn_encoder': birnn_encoder.state_dict(), 'birnn_encoder_opt': birnn_encoder_optimizer.state_dict(), 'review_decoder': review_decoder.state_dict(), 'review_decoder_opt': review_decoder_optimizer.state_dict(), 'loss': _loss, 'plt': perplexity }, os.path.join(directory, '{}_{}.tar'.format(epoch, 'review_model'))) best_val_loss = vl_loss # Run on test data. ts_loss = 0 for test_batch in test_batches: attr_input, topic_input, sketch_output, review_input, review_output, mask = test_batch loss = evaluate(attr_input, topic_input, sketch_output, review_input, review_output, mask, encoder, birnn_encoder, review_decoder, encoder_optimizer, birnn_encoder_optimizer, review_decoder_optimizer) ts_loss += loss ts_loss /= len(test_batches) writer.add_scalar("Test/loss", ts_loss, step) print('\n' + '-' * 30) print('| test loss {:5.2f} | test ppl {:8.2f}'.format( ts_loss, math.exp(ts_loss))) print('-' * 30 + '\n') if vl_loss > best_val_loss: print( 'validation loss is larger than best validation loss. Break!') break epoch += 1
def trainIters(corpus, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, dropout, loadFilename=None, attn_model='dot', decoder_learning_ratio=5.0): voc, pairs = loadPrepareData(corpus) embedding_dict = concate_embedding(pairs, voc, hidden_size) # training data corpus_name = os.path.split(corpus)[-1].split('.')[0] training_batches = None try: training_batches = torch.load(os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) except FileNotFoundError: print('Generating training batches...') training_batches = [ batch2TrainData([random.choice(pairs) for _ in range(batch_size)], voc, reverse) for _ in range(n_iteration) ] torch.save(training_batches, os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) # model checkpoint = None print('Building encoder and decoder ...') encoder = EncoderRNN(hidden_size, batch_size, n_layers, dropout) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, hidden_size, batch_size, voc.loc_count, n_layers, dropout) if loadFilename: checkpoint = torch.load(loadFilename) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # use cuda encoder = encoder.to(device) decoder = decoder.to(device) # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(checkpoint['en_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # initialize print('Initializing ...') start_iteration = 1 perplexity = [] print_loss = 0 if loadFilename: start_iteration = checkpoint['iteration'] + 1 perplexity = checkpoint['plt'] for iteration in tqdm(range(start_iteration, n_iteration + 1)): training_batch = training_batches[iteration - 1] input_vec, input_lengths, target_vec, max_target_len = training_batch # print("input_lengths:", input_lengths) loss = train(input_vec, input_lengths, target_vec, max_target_len, encoder, decoder, embedding_dict, encoder_optimizer, decoder_optimizer, batch_size) print_loss += loss perplexity.append(loss) if iteration % print_every == 0: print_loss_avg = math.exp(print_loss / print_every) print('%d %d%% %.4f' % (iteration, iteration / n_iteration * 100, print_loss_avg)) print_loss = 0 if (iteration % save_every == 0): directory = os.path.join( save_dir, 'model', corpus_name, '{}-{}_{}'.format(n_layers, batch_size, hidden_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'iteration': iteration, 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(iteration, filename(reverse, 'backup_bidir_model'))))
new_pair_batch = [] end_of_group = [] count = 0 dict_pairs = {} for group in pair_batch: for pair in group: new_pair_batch.append(pair) count += 1 dict_pairs[count - 1] = pair end_of_group.append(count) return new_pair_batch, end_of_group, dict_pairs corpus_index = './data/movie_conversations.txt' corpus = './data/movie_lines.txt' voc, pairs = loadPrepareData(corpus, corpus_index, 3) #pprint.pprint(pairs[0]) #print("length of pair: ",len(pairs)) pairs = pairs_transform(pairs) #pprint.pprint(pairs[:5]) class EncoderRNN(nn.Module): def __init__(self, input_size, hidden_size, embedding, n_layers=1, dropout=0.1): super(EncoderRNN, self).__init__() self.n_layers = n_layers
def trainIters(corpus, learning_rate, lr_decay_epoch, lr_decay_ratio, weight_decay, batch_size, rnn_layers, hidden_size, embed_size, node_size, epochs, save_dir, load_file=None): print('load data...') vocabs, train_pairs, valid_pairs, test_pairs = loadPrepareData(corpus, save_dir) print('load data finish...') data_path = os.path.join(save_dir, "batches") if not os.path.exists(data_path): os.makedirs(data_path) corpus_name = corpus try: training_batches = torch.load(os.path.join(data_path, '{}_{}.tar'.format('training_batches', batch_size))) except FileNotFoundError: print('Training pairs not found, generating ...') training_batches = batchify(train_pairs, batch_size, vocabs) print('Complete building training pairs ...') torch.save(training_batches, os.path.join(data_path, '{}_{}.tar'.format('training_batches', batch_size))) # validation/test data eval_batch_size = 10 try: val_batches = torch.load(os.path.join(data_path, '{}_{}.tar'.format('val_batches', eval_batch_size))) except FileNotFoundError: print('Validation pairs not found, generating ...') val_batches = batchify(valid_pairs, eval_batch_size, vocabs) print('Complete building validation pairs ...') torch.save(val_batches, os.path.join(data_path, '{}_{}.tar'.format('val_batches', eval_batch_size))) print('Building review model ...') review_model = ReviewModel(vocabs, embed_size, node_size, hidden_size, rnn_layers).to(device) print('Building optimizers ...') review_optimizer = optim.Adam(review_model.parameters(), lr=learning_rate, weight_decay=weight_decay) print('Initializing ...') global_step = 1 last_epoch = 1 perplexities = [] losses = [] best_val_loss = None log_path = os.path.join('ckpt/' + corpus_name) if not os.path.exists(log_path): os.makedirs(log_path) writer = SummaryWriter(log_path) if load_file: checkpoint = torch.load(load_file) review_model.load_state_dict(checkpoint['review_model']) global_step = checkpoint['global_step'] last_epoch = checkpoint['epoch'] + 1 perplexities = checkpoint['perplexity'] losses = checkpoint['loss'] for i in range(len(losses)): writer.add_scalar("Train/loss", losses[i], i) writer.add_scalar("Train/perplexity", perplexities[i], i) for epoch in tqdm(range(last_epoch, epochs+1), desc="Epoch: ", leave=True): # train epoch review_model.train() tr_loss = 0 steps = trange(len(training_batches), desc="Train Loss") for step in steps: context_input, aspect_input, review_input, review_output, extend_input = training_batches[step] loss = train(context_input, aspect_input, review_input, review_output, extend_input, review_model, review_optimizer) global_step += 1 tr_loss += loss losses.append(loss) perplexities.append(math.exp(loss)) writer.add_scalar("Train/loss", loss, global_step) writer.add_scalar("Train/perplexity", math.exp(loss), global_step) steps.set_description("ReviewModel (Loss=%g, PPL=%g)" % (round(loss, 4), round(math.exp(loss), 4))) cur_loss = tr_loss / len(training_batches) cur_ppl = math.exp(cur_loss) print('\nTrain | Epoch: {:3d} | Avg Loss={:4.4f} | Avg PPL={:4.4f}\n'.format(epoch, cur_loss, cur_ppl)) # evaluate review_model.eval() with torch.no_grad(): vl_loss = 0 for val_batch in val_batches: context_input, aspect_input, review_input, review_output, extend_input = val_batch loss = evaluate(context_input, aspect_input, review_input, review_output, extend_input, review_model) vl_loss += loss vl_loss /= len(val_batches) vl_ppl = math.exp(vl_loss) writer.add_scalar("Valid/loss", vl_loss, global_step) writer.add_scalar("Valid/perplexity", vl_ppl, global_step) print('\nValid | Epoch: {:3d} | Avg Loss={:4.4f} | Avg PPL={:4.4f}\n'.format(epoch, vl_loss, vl_ppl)) # Save the model if the validation loss is the best we've seen so far. model_path = os.path.join(save_dir, "model") if not best_val_loss or vl_loss < best_val_loss: directory = os.path.join(model_path, '{}_{}_{}'.format(batch_size, hidden_size, rnn_layers)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'global_step': global_step, 'epoch': epoch, 'review_model': review_model.state_dict(), 'loss': losses, 'perplexity': perplexities }, os.path.join(directory, '{}_{}_{}.tar'.format(epoch, round(vl_loss, 4), 'review_model'))) best_val_loss = vl_loss if vl_loss > best_val_loss: print('validation loss is larger than best validation loss. Break!') break # learning rate adjust adjust_learning_rate(review_optimizer, epoch-last_epoch+1, learning_rate, lr_decay_epoch, lr_decay_ratio)
decoder = decoder.to(device) if inp: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20) if __name__ == '__main__': torch.set_grad_enabled(False) hidden_size = 512 n_layers = 1 attn_mode = 'dot' pinyin_voc, word_voc, pairs = loadPrepareData('data/touchpal_done.txt') pinyin_embedding = nn.Embedding(pinyin_voc.n_words, hidden_size) word_embedding = nn.Embedding(word_voc.n_words, hidden_size) encoder = EncoderRNN(pinyin_voc.n_words, hidden_size, pinyin_embedding, n_layers) decoder = LuongAttnDecoderRNN(attn_mode, word_embedding, hidden_size, word_voc.n_words, n_layers) checkpoint = torch.load( 'save/model/touchpal_done/1-1_512/6000_backup_bidir_model.tar') encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) encoder.train(False) decoder.train(False)
help="Pretrained fine-tuned model.") args = parser.parse_args() print(args) # Load pre-trained model (weights) model_version = 'bert-base-uncased' model = BertMLMDecoder.from_pretrained(model_version) model_file = args.model_file model.load_state_dict(torch.load(model_file)) model.eval() cuda = torch.cuda.is_available() if cuda: model = model.cuda() data, length = loadPrepareData(args) user_length, item_length = length #, user_length2, item_length2 = length # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained( model_version, do_lower_case=model_version.endswith("uncased")) def tokenize_batch(batch): return [tokenizer.convert_tokens_to_ids(sent) for sent in batch] def untokenize_batch(batch): return [tokenizer.convert_ids_to_tokens(sent) for sent in batch]
def trainIters(corpus, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, dropout, loadFilename=None, attn_model='dot', decoder_learning_ratio=5.0): voc, pairs = loadPrepareData(corpus) #todo:string转数字的字典,pairs为等待转换的对话 # training data corpus_name = os.path.split(corpus)[-1].split('.')[0] training_batches = None #todo:training_batches=随机抽取64组对话,交给batch2TrainData构成一组batch #TODO:没有采用epoch的模式,batch2TrainData负责將 load.py 所整理好的training pairs,轉換成input, output Variable。 总计循环n_iteration次, #TODO: 每次iteration调用batch2TrainData构造一个batch。每个batch为随机抽取64组对话,交给batch2TrainData构成一组batch。 因此此处有待改造 try: training_batches = torch.load(os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) except FileNotFoundError: print('Training pairs not found, generating ...') training_batches = [ batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)], reverse) for _ in range(n_iteration) ] # # model checkpoint = None print('Building encoder and decoder ...') embedding = nn.Embedding(voc.n_words, hidden_size) encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers, dropout) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers, dropout) if loadFilename: checkpoint = torch.load(loadFilename) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # use cuda # if torch.cuda.device_count()>1: # encoder=nn.DataParallel(encoder) #decoder=nn.DataParallel(decoder) encoder = encoder.to(device) decoder = decoder.to(device) # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(checkpoint['en_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # initialize print('Initializing ...') start_iteration = 1 perplexity = [] print_loss = 0 if loadFilename: start_iteration = checkpoint['iteration'] + 1 perplexity = checkpoint['plt'] for iteration in range(start_iteration, n_iteration + 1): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size) print_loss += loss perplexity.append(loss) if iteration % print_every == 0: print_loss_avg = math.exp(print_loss / print_every) #print('%d %d%% %.4f' % (iteration, iteration / n_iteration * 100, print_loss_avg)) with open('log.txt', 'a') as f: import time template = ' Iter: {:0>6d} process: {:.2f} avg_loss: {:.4f} time: {}\n' str = template.format( iteration, iteration / n_iteration * 100, print_loss_avg, time.asctime(time.localtime(time.time()))) f.write(str) print_loss = 0 if (iteration % save_every == 0): directory = os.path.join( save_dir, 'model', corpus_name, '{}-{}_{}'.format(n_layers, n_layers, hidden_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'iteration': iteration, 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(iteration, filename(reverse, 'backup_bidir_model'))))
def trainIters(corpus, pre_modelFile, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, loadFilename=None, attn_model='dot', decoder_learning_ratio=5.0): voc, pairs = loadPrepareData(corpus) # training data corpus_name = os.path.split(corpus)[-1].split('.')[0] training_batches = None try: training_batches = torch.load(os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) except FileNotFoundError: print('Training pairs not found, generating ...') training_batches = [ batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)], reverse) for _ in range(n_iteration) ] torch.save(training_batches, os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) # model checkpoint = None #print('Building pretrained word2vector model...') embedding = nn.Embedding( 300, hidden_size) #The dimension of google's model is 300 #----------------------------------------------------------------- #my code ''' EMBEDDING_DIM = 300 #Should be the same as hidden_size! if EMBEDDING_DIM != hidden_size: sys.exit("EMBEDDING_DIM do not equal to hidden_size. Please correct it.") CONTEXT_SIZE = 2 pre_checkpoint = torch.load(pre_modelFile) pretrained_model = NGramLanguageModeler(voc.n_words, EMBEDDING_DIM, CONTEXT_SIZE) pretrained_model.load_state_dict(pre_checkpoint['w2v']) pretrained_model.train(False) embedding = pretrained_model ''' if USE_CUDA: embedding = embedding.cuda() #----------------------------------------------------------------- #replace embedding by pretrained_model print('Building encoder and decoder ...') encoder = EncoderRNN(300, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) if loadFilename: checkpoint = torch.load(loadFilename) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # use cuda if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(checkpoint['en_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # Load Google's pre-trained Word2Vec model. print('Loading w2v_model ...') logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) w2v_model = gensim.models.KeyedVectors.load_word2vec_format(pre_modelFile, binary=True) print("Loading complete!") # initialize print('Initializing ...') start_iteration = 1 perplexity = [] print_loss = 0 if loadFilename: start_iteration = checkpoint['iteration'] + 1 perplexity = checkpoint['plt'] for iteration in tqdm(range(start_iteration, n_iteration + 1)): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, w2v_model, voc) print_loss += loss perplexity.append(loss) if iteration % print_every == 0: print_loss_avg = math.exp(print_loss / print_every) # perplexity.append(print_loss_avg) # plotPerplexity(perplexity, iteration) print('%d %d%% %.4f' % (iteration, iteration / n_iteration * 100, print_loss_avg)) print_loss = 0 if (iteration % save_every == 0): directory = os.path.join( save_dir, 'model', corpus_name, '{}-{}_{}'.format(n_layers, n_layers, hidden_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'iteration': iteration, 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(iteration, filename(reverse, 'backup_bidir_model'))))
'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(iteration, filename(reverse, 'backup_bidir_model')))) if __name__ == "__main__": corpus = "data/greeting.txt" voc, pairs = loadPrepareData(corpus) corpus_name = os.path.split(corpus)[-1].split('.')[0] # print(corpus_name) hidden_size = 768 embedding_dict = concate_embedding(pairs, voc, hidden_size) print(len(embedding_dict)) print(embedding_dict[3]) print(embedding_dict[0]) print('Generating training batches...') n_iteration = 10 batch_size = 16 reverse = False
def trainIters(corpus, reverse, n_iteration, learning_rate, batch_size, n_layers, hidden_size, print_every, save_every, loadFilename=None, attn_model='dot', decoder_learning_ratio=5.0): voc, pairs = loadPrepareData(corpus) # training data corpus_name = os.path.split(corpus)[-1].split('.')[0] training_batches = None try: training_batches = torch.load(os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) except BaseException: #OWEN: was FileNotFoundError print('Training pairs not found, generating ...') training_batches = [ batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)], reverse) for _ in range(n_iteration) ] torch.save(training_batches, os.path.join(save_dir, 'training_data', corpus_name, '{}_{}_{}.tar'.format(n_iteration, \ filename(reverse, 'training_batches'), \ batch_size))) # model checkpoint = None print('Building encoder and decoder ...') embedding = nn.Embedding(voc.n_words, hidden_size) encoder = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.n_words, n_layers) if loadFilename: checkpoint = torch.load(loadFilename) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # use cuda if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(checkpoint['en_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # initialize print('Initializing ...') start_iteration = 1 perplexity = [] print_loss = 0 if loadFilename: start_iteration = checkpoint['iteration'] + 1 perplexity = checkpoint['plt'] for iteration in tqdm(range(start_iteration, n_iteration + 1)): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size) print_loss += loss perplexity.append(loss) if iteration % print_every == 0: print_loss_avg = math.exp(print_loss / print_every) perplexity.append(print_loss_avg) # show perplexity (lots of numbers!): #print(perplexity, iteration) # plotPerplexity(perplexity, iteration) print('%d %d%% %.4f' % (iteration, iteration / n_iteration * 100, print_loss_avg)) print_loss = 0 if (iteration % save_every == 0): directory = os.path.join( save_dir, 'model', corpus_name, '{}-{}_{}'.format(n_layers, n_layers, hidden_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'iteration': iteration, 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format(iteration, filename(reverse, 'backup_bidir_model'))))
def trainIters(corpus, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, print_every, loadFilename=None, attn_model='dot', decoder_learning_ratio=1.0): print( "corpus: {}, reverse={}, n_epoch={}, learning_rate={}, batch_size={}, n_layers={}, hidden_size={}, decoder_learning_ratio={}" .format(corpus, reverse, n_epoch, learning_rate, batch_size, n_layers, hidden_size, decoder_learning_ratio)) voc, pairs, valid_pairs, test_pairs = loadPrepareData(corpus) print('load data...') path = "data/expansion" # training data corpus_name = corpus training_batches = None try: training_batches = torch.load( os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'training_batches'), batch_size))) except FileNotFoundError: print('Training pairs not found, generating ...') training_batches = batchify(pairs, batch_size, voc, reverse) print('Complete building training pairs ...') torch.save( training_batches, os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'training_batches'), batch_size))) # validation/test data eval_batch_size = 10 try: val_batches = torch.load( os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'val_batches'), eval_batch_size))) except FileNotFoundError: print('Validation pairs not found, generating ...') val_batches = batchify(valid_pairs, eval_batch_size, voc, reverse, evaluation=True) print('Complete building validation pairs ...') torch.save( val_batches, os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'val_batches'), eval_batch_size))) try: test_batches = torch.load( os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'test_batches'), eval_batch_size))) except FileNotFoundError: print('Test pairs not found, generating ...') test_batches = batchify(test_pairs, eval_batch_size, voc, reverse, evaluation=True) print('Complete building test pairs ...') torch.save( test_batches, os.path.join( save_dir, path, '{}_{}.tar'.format(filename(reverse, 'test_batches'), eval_batch_size))) # model checkpoint = None print('Building encoder and decoder ...') # aspect with open(os.path.join(save_dir, '15_aspect.pkl'), 'rb') as fp: aspect_ids = pickle.load(fp) aspect_num = 15 # 15 | 20 main aspects and each of them has 100 words aspect_ids = Variable( torch.LongTensor(aspect_ids), requires_grad=False ) # convert list into torch Variable, used to index word embedding # attribute embeddings attr_size = 64 # attr_num = 2 print( "corpus: {}, reverse={}, n_words={}, n_epoch={}, learning_rate={}, batch_size={}, n_layers={}, hidden_size={}, decoder_learning_ratio={}, attr_size={}, aspect_num={}" .format(corpus, reverse, voc.n_words, n_epoch, learning_rate, batch_size, n_layers, hidden_size, decoder_learning_ratio, attr_size, aspect_num)) with open(os.path.join(save_dir, 'user_item.pkl'), 'rb') as fp: user_dict, item_dict = pickle.load(fp) num_user = len(user_dict) num_item = len(item_dict) attr_embeddings = [] attr_embeddings.append(nn.Embedding(num_user, attr_size)) attr_embeddings.append(nn.Embedding(num_item, attr_size)) aspect_embeddings = [] aspect_embeddings.append(nn.Embedding(num_user, aspect_num)) aspect_embeddings.append(nn.Embedding(num_item, aspect_num)) if USE_CUDA: for attr_embedding in attr_embeddings: attr_embedding = attr_embedding.cuda() for aspect_embedding in aspect_embeddings: aspect_embedding = aspect_embedding.cuda() aspect_ids = aspect_ids.cuda() encoder1 = AttributeEncoder(attr_size, attr_num, hidden_size, attr_embeddings, n_layers) encoder2 = AttributeEncoder(aspect_num, attr_num, hidden_size, aspect_embeddings, n_layers) embedding = nn.Embedding(voc.n_words, hidden_size) encoder3 = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, attr_size, voc.n_words, aspect_ids, n_layers) if loadFilename: checkpoint = torch.load(loadFilename) encoder1.load_state_dict(checkpoint['en1']) encoder2.load_state_dict(checkpoint['en2']) encoder3.load_state_dict(checkpoint['en3']) decoder.load_state_dict(checkpoint['de']) # use cuda if USE_CUDA: encoder1 = encoder1.cuda() encoder2 = encoder2.cuda() encoder3 = encoder3.cuda() decoder = decoder.cuda() # optimizer print('Building optimizers ...') encoder1_optimizer = optim.Adam(encoder1.parameters(), lr=learning_rate) encoder2_optimizer = optim.Adam(encoder2.parameters(), lr=learning_rate) encoder3_optimizer = optim.Adam(encoder3.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder1_optimizer.load_state_dict(checkpoint['en1_opt']) encoder2_optimizer.load_state_dict(checkpoint['en2_opt']) encoder3_optimizer.load_state_dict(checkpoint['en3_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) # initialize print('Initializing ...') start_epoch = 0 perplexity = [] best_val_loss = None print_loss = 0 if loadFilename: start_epoch = checkpoint['epoch'] + 1 perplexity = checkpoint['plt'] for epoch in range(start_epoch, n_epoch): epoch_start_time = time.time() # train epoch encoder1.train() encoder2.train() encoder3.train() decoder.train() print_loss = 0 start_time = time.time() for batch, training_batch in enumerate(training_batches): attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len = training_batch loss = train(attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len, encoder1, encoder2, encoder3, decoder, embedding, encoder1_optimizer, encoder2_optimizer, encoder3_optimizer, decoder_optimizer, batch_size) print_loss += loss perplexity.append(loss) #print("batch {} loss={}".format(batch, loss)) if batch % print_every == 0 and batch > 0: cur_loss = print_loss / print_every elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(training_batches), learning_rate, elapsed * 1000 / print_every, cur_loss, math.exp(cur_loss))) print_loss = 0 start_time = time.time() # evaluate val_loss = 0 for val_batch in val_batches: attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len = val_batch loss = evaluate(attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len, encoder1, encoder2, encoder3, decoder, embedding, encoder1_optimizer, encoder2_optimizer, encoder3_optimizer, decoder_optimizer, batch_size) val_loss += loss val_loss /= len(val_batches) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: directory = os.path.join(save_dir, 'model', '{}_{}'.format(n_layers, hidden_size)) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'epoch': epoch, 'en1': encoder1.state_dict(), 'en2': encoder2.state_dict(), 'en3': encoder3.state_dict(), 'de': decoder.state_dict(), 'en1_opt': encoder1_optimizer.state_dict(), 'en2_opt': encoder2_optimizer.state_dict(), 'en3_opt': encoder3_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'plt': perplexity }, os.path.join( directory, '{}_{}.tar'.format( epoch, filename(reverse, 'lexicon_title_expansion_model')))) best_val_loss = val_loss # Run on test data. test_loss = 0 for test_batch in test_batches: attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len = test_batch loss = evaluate(attr_input, summary_input, summary_input_lengths, title_input, title_input_lengths, target_variable, mask, max_target_len, encoder1, encoder2, encoder3, decoder, embedding, encoder1_optimizer, encoder2_optimizer, encoder3_optimizer, decoder_optimizer, batch_size) test_loss += loss test_loss /= len(test_batches) print('-' * 89) print('| test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('-' * 89) if val_loss > best_val_loss: break