def main(): data_path = './data/chatbot.txt' voc, pairs = loadPrepareData(data_path) # 把含有低频词的句子扔掉 MIN_COUNT = Config.MIN_COUNT pairs = trimRareWords(voc, pairs, MIN_COUNT) training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(Config.batch_size)]) for _ in range(Config.total_step)] # 词嵌入部分 embedding = nn.Embedding(voc.num_words, Config.hidden_size) # 定义编码解码器 encoder = EncoderRNN(Config.hidden_size, embedding, Config.encoder_n_layers, Config.dropout) decoder = LuongAttnDecoderRNN(Config.attn_model, embedding, Config.hidden_size, voc.num_words, Config.decoder_n_layers, Config.dropout) # 定义优化器 encoder_optimizer = optim.Adam(encoder.parameters(), lr=Config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=Config.learning_rate * Config.decoder_learning_ratio) start_iteration = 1 save_every = 4000 # 多少步保存一次模型 for iteration in range(start_iteration, Config.total_step + 1): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch start_time = time.time() # Run a training iteration with batch loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, Config.batch_size, Config.clip) time_str = datetime.datetime.now().isoformat() log_str = "time: {}, Iteration: {}; Percent complete: {:.1f}%; loss: {:.4f}, spend_time: {:6f}".format(time_str, iteration, iteration / Config.total_step * 100, loss, time.time() - start_time) rainbow(log_str) # Save checkpoint if iteration % save_every == 0: save_path = './save_model/' if not os.path.exists(save_path): os.makedirs(save_path) torch.save({ 'iteration': iteration, 'encoder': encoder.state_dict(), 'decoder': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'voc_dict': voc.__dict__, 'embedding': embedding.state_dict() }, os.path.join(save_path, '{}_{}_model.tar'.format(iteration, 'checkpoint')))
def train(args): input_lang, output_lang, pairs = prepareData(args) print(random.choice(pairs)) model = {} model['hidden_size'] = 1000 model['dropout'] = 0.1 model['input_lang'] = input_lang model['output_lang'] = output_lang model['max_length'] = max(input_lang.max_length, output_lang.max_length) + 2 print('Max length: {}'.format(model['max_length'])) encoder1 = EncoderRNN(input_lang.n_words, model['hidden_size']).to(getDevice()) encoder1.train() attn_decoder1 = AttnDecoderRNN(model['hidden_size'], output_lang.n_words, dropout_p=model['dropout'], max_length=model['max_length']).to( getDevice()) attn_decoder1.train() n_iters = 30000 training_pairs = [ tensorsFromPair(input_lang, output_lang, random.choice(pairs)) for _ in range(n_iters) ] trainIters(training_pairs, encoder1, attn_decoder1, n_iters, print_every=1000, optim=args.optim, learning_rate=args.learning_rate, max_length=model['max_length']) print('saving models...') model['encoder_state'] = encoder1.state_dict() model['decoder_state'] = attn_decoder1.state_dict() torch.save( model, "data/{}_model_checkpoint.pth".format(args.phase.split('_')[-1]))
init_learning_rate = 0.01 decay_rate = 0.5 rate = init_learning_rate loss_list = [] for n in range(6): loss_all = 0.0 epoch_num = n + 1 for i in range(360): batch_no = i * 5 trainIters(encoder1, attn_decoder1, batch_no, print_every=5, learning_rate=0.005) if (i % 30 == 0): print('recorded!') evaluateRandomly(encoder1, attn_decoder1) torch.save(encoder1.state_dict(), 'encoder.pt') torch.save(attn_decoder1.state_dict(), 'decoder.pt') torch.save(encoder1.state_dict(), 'encoder.pt') torch.save(attn_decoder1.state_dict(), 'decoder.pt') loss_epoch = loss_all / 360 print('Avg. epoch loss: %f' % loss_epoch) loss_list.append(loss_epoch) print('Finished epoch %d' % epoch_num) print(loss_list) print('Done')