def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_lang, output_lang, pairs = prepareData('eng', 'fra', True, dir='data', filter=False) hidden_size = 512 batch_size = 64 iters = 50000 # encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) encoder = EncoderRNN(input_lang.n_words, hidden_size) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") encoder = nn.DataParallel(encoder) attn_decoder = nn.DataParallel(attn_decoder) encoder = encoder.to(device) attn_decoder = attn_decoder.to(device) # attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) trainIters(device, pairs, input_lang, output_lang, encoder, attn_decoder, batch_size, iters, print_every=250)
def main(): # Load vocabulary wrapper. with open(vocab_path) as f: vocab = pickle.load(f) encoder = EncoderCNN(4096, embed_dim) encoder.load_state_dict(torch.load('searchimage.pkl')) for p in encoder.parameters(): p.requires_grad = False word_encoder = EncoderRNN(embed_dim, embed_dim, len(vocab), num_layers_rnn) word_encoder.load_state_dict(torch.load('searchword.pkl')) if torch.cuda.is_available(): encoder.cuda() word_encoder.cuda() # Loss and Optimizer criterion = nn.MSELoss() params = list( word_encoder.parameters()) # + list(encoder.linear.parameters()) optimizer = torch.optim.Adam(params, lr=2e-6, weight_decay=0.001) #load data with open(image_data_file) as f: image_data = pickle.load(f) image_features = si.loadmat(image_feature_file) img_features = image_features['fc7'][0] img_features = np.concatenate(img_features) print 'here' iteration = 0 for i in range(10): # epoch use_caption = i % 5 print 'Epoch', i losses = [] for x, y in make_mini_batch(img_features, image_data, use_caption=use_caption): encoder.zero_grad() word_encoder.zero_grad() word_padding, lengths = make_word_padding(y, vocab) x = Variable(torch.from_numpy(x).cuda()) word_index = Variable(torch.from_numpy(word_padding).cuda()) features = encoder(x) outputs = word_encoder(word_index, lengths) loss = torch.mean((features - outputs).pow(2)) loss.backward() optimizer.step() losses.append(loss.data[0]) if iteration % 100 == 0: print 'loss', sum(losses) / float(len(losses)) losses = [] iteration += 1 torch.save(word_encoder.state_dict(), 'searchword.pkl') torch.save(encoder.state_dict(), 'searchimage.pkl')
def train(args, data, bidaf): device = torch.device( f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") utte_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device) span_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device) decoder = AttnDecoderRNN(args, data.WORD.vocab.vectors).to(device) utte_encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.learning_rate) span_encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.learning_rate) criterion = nn.NLLLoss() n_iters = 10 * len(data.train.examples) plot_loss_total = [] print_every = 10000 for iter in range(1, n_iters + 1): input_tensor = data.train.examples[i].q_word target_tensor = data.train.examples[i].ans span = ata.train.examples[i].span loss = train_each(input_tensor, target_tensor, utte_encoder, span_encoder, decoder, utte_encoder_optimizer, span_encoder_optimizer, decoder_optimizer, criterion) print_loss += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg))
def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional\ , embedding_size, n_parameter, m_parameter, learning_rate, clip,\ alpha, beta, pre_trained_file = None): self.batch_size = batch_size self.hidden_size = hidden_size self.embedding_size = embedding_size self.bidirectional = bidirectional self.n_parameter = n_parameter self.m_parameter = m_parameter self.learning_rate = learning_rate self.wm = wm self.clip = clip self.alpha = alpha self.beta = beta if pre_trained_file == None: self.encoder = EncoderRNN(self.wm, self.embedding_size,\ hidden_size, bidirectional) self.decoder = AttnDecoderRNN(self.hidden_size, 10) self.enc_optimizer = optim.Adam(self.encoder.parameters(),\ lr=self.learning_rate) self.dec_optimizer = optim.Adam(self.decoder.parameters(),\ lr=self.learning_rate) self.start = 0 else: self.resume_training = True self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\ self.start = self.load_model_state(pre_trained_file) self.decoder = self.decoder.to(device) self.encoder = self.encoder.to(device)
def example_test(): encoder_test = EncoderRNN(10, 10, 2, max_length=3) decoder_test = AttnDecoderRNN('general', 10, 10, 2) print(encoder_test) print(decoder_test) encoder_hidden = encoder_test.init_hidden(batch_size=4) # word_input = Variable(torch.LongTensor([[1, 2, 3]])) word_input = Variable(torch.LongTensor( [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]])) if USE_CUDA: encoder_test.cuda() word_input = word_input.cuda() encoder_hidden = encoder_hidden.cuda() encoder_outputs, encoder_hidden = encoder_test( word_input, encoder_hidden) # S B H, L B H print(encoder_outputs.shape, encoder_hidden.shape) # word_inputs = Variable(torch.LongTensor([[1, 2, 3]])) word_inputs = Variable(torch.LongTensor( [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]])) decoder_attns = torch.zeros(4, 3, 3) decoder_hidden = encoder_hidden decoder_context = Variable(torch.zeros(4, decoder_test.hidden_size)) if USE_CUDA: decoder_test.cuda() word_inputs = word_inputs.cuda() decoder_context = decoder_context.cuda() for i in range(3): decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test( word_inputs[:, i], decoder_context, decoder_hidden, encoder_outputs) print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size()) decoder_attns[:, i, :] = decoder_attn.squeeze(1).cpu().data
def load_model(): encoder = EncoderRNN(human_n_chars, hidden_size, n_layers) decoder = AttnDecoderRNN(attn_model, hidden_size, machine_n_chars, n_layers, dropout_p=dropout_p) encoder.load_state_dict(t.load('encoder.pth')) decoder.load_state_dict(t.load('decoder.pth')) return encoder, decoder
def epoch_training(train_iter, val_iter, num_epoch=100, learning_rate=1e-4, hidden_size=100, early_stop=False, patience=2, epsilon=1e-4): # define model encoder = EncoderRNN(input_size=len(EN.vocab), hidden_size=hidden_size) decoder = DecoderRNN(hidden_size=hidden_size, output_size=len(DE.vocab)) # define loss criterion criterion = nn.NLLLoss(ignore_index=PAD_token) encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) losses = np.ndarray(patience) res_loss = 13 res_encoder = None res_decoder = None res_epoch = 0 base_bleu = 0 not_updated = 0 for epoch in range(num_epoch): tl = train(train_iter, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) loss, val_bleu = evaluate(val_iter, encoder, decoder, criterion) logging.warning('******Epoch: ' + str(epoch) + ' Training Loss: ' + str(tl) + ' Validation Loss: ' + str(loss) + ' Validation Bleu: ' + str(val_bleu) + '*********') #save the model with the lowest validation loss if base_bleu <= val_bleu: base_bleu = val_bleu res_loss = loss res_encoder = encoder res_decoder = decoder res_epoch = epoch not_updated = 0 logging.warning('Updated validation loss as ' + str(res_loss) + 'With validation Bleu as ' + str(base_bleu) + ' at epoch ' + str(res_epoch)) else: not_updated += 1 if not_updated == patience: break print('Stop at Epoch: ' + str(res_epoch) + ", With Validation Loss: " + str(res_loss) + ", Validation Bleu: " + str(base_bleu)) logging.warning('Stop at Epoch: ' + str(res_epoch) + ", With Validation Loss: " + str(res_loss) + ", Validation Bleu: " + str(base_bleu)) return res_loss, res_encoder, res_decoder, base_bleu
def load_model_from_file(voc, file): # 从本地加载模型 checkpoint = torch.load(file) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] # 加载词向量 embedding = nn.Embedding(voc.num_words, hidden_size) encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) return encoder, decoder
def main(): # data input_lang, output_lang, pairs = prepare_data('eng', 'fra', reverse=True) encoder = EncoderRNN(input_lang.n_words, hidden_size, use_cuda) if use_attention: decoder = AttentionDecoderRNN(hidden_size, output_lang.n_words, use_cuda) else: decoder = DecoderRNN(hidden_size, output_lang.n_words, use_cuda) if use_cuda: encoder, decoder = encoder.cuda(), decoder.cuda() encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) plot_losses = [] print_total_loss = 0. plot_total_loss = 0. criterion = nn.CrossEntropyLoss() encoder_schedule = MultiStepLR(encoder_optimizer, [40000, 60000]) decoder_schedule = MultiStepLR(decoder_optimizer, [40000, 60000]) for iter in tqdm(range(1, num_iters + 1)): encoder_schedule.step() decoder_schedule.step() input_variable, output_variable = variable_from_pair( input_lang, output_lang, random.choice(pairs), use_cuda) loss = train(input_variable, output_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_total_loss += loss plot_total_loss += loss if iter % print_every == 0: print_avg_loss = print_total_loss / print_every print_total_loss = 0 tqdm.write("iter: {} Percent: {}% Loss: {}".format( iter, round(100 * iter / num_iters, 2), print_avg_loss)) if iter % plot_every == 0: plot_avg_loss = plot_total_loss / plot_every plot_losses.append(plot_avg_loss) plot_total_loss = 0 show_plot(plot_losses) evaluate_randomly(encoder, decoder, input_lang, output_lang, pairs)
def train(**kwargs): opt = Config() for k, v in kwargs.items(): #设置参数 setattr(opt, k, v) if(opt.use_gpu): torch.cuda.empty_cache()#清空缓存 # 数据 dataloader,datas = get_loader(opt) datas = dataloader.dataset.datas word2ix = datas['word2ix'] sos = word2ix.get(datas.get('sos')) voc_length = len(word2ix) #定义模型 encoder = EncoderRNN(opt, voc_length) decoder = AttentionDecoderRNN(opt, voc_length) #加载断点,从上次结束地方开始 if opt.model_ckpt: checkpoint = torch.load(opt.model_ckpt) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) #切换模式 encoder = encoder.to(opt.device) decoder = decoder.to(opt.device) encoder.train() decoder.train() #定义优化器(注意与encoder.to(device)前后不要反) encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=opt.learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=opt.learning_rate * opt.decoder_learning_ratio) if opt.model_ckpt: encoder_optimizer.load_state_dict(checkpoint['en_opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) #定义打印loss的变量 print_loss = 0 for epoch in range(opt.epoch): for i, data in enumerate(dataloader): #取一个batch训练 loss = train_by_batch(sos, opt, data, encoder_optimizer, decoder_optimizer, encoder, decoder) print_loss += loss #打印损失 if i % opt.print_every == 0: print_loss_avg = print_loss / opt.print_every print("Epoch: {}; Epoch Percent complete: {:.4f}%; Average loss: {:.8f}" .format(epoch, epoch / opt.epoch * 100, print_loss_avg)) print_loss = 0 # 保存checkpoint if epoch % opt.save_every == 0: checkpoint_path = '{prefix}_{time}'.format(prefix=opt.prefix,time=time.strftime('%m%d_%H%M')) torch.save({ 'en': encoder.state_dict(), 'de': decoder.state_dict(), 'en_opt': encoder_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), }, checkpoint_path)
def main(): ### load word embedding pickle_file = open(embedding_path, "rb") word_embedding = pickle.load(pickle_file) pickle_file.close() word_index = word_embedding[0] embedding_map = word_embedding[1] output_size = len(word_index) ### initialize model hidden_size = 100 encoder = EncoderRNN(hidden_size) decoder = DecoderRNN(hidden_size, output_size) ### load train data parser = AcademicParser("../train_data/Academic_papers/docs.json") abstracts = parser.get_paperAbstract() titles = parser.get_title() assert (len(abstracts) == len(titles)) ### prepare train data train_set = [] for i in range(len(abstracts)): abstract = abstracts[i] title = titles[i] new_pair = variablesFromPair((abstract, title), word_index, embedding_map) if (len(new_pair[1]) > 0): train_set.append(new_pair) trainIters(encoder, decoder, 20000, train_set)
def test_input_dropout_WITH_PROB_ZERO(self): rnn = EncoderRNN(self.vocab_size, None, 50, 16, input_dropout_p=0, n_layers=3,\ bidirectional=True, rnn_cell_name='lstm') print rnn for param in rnn.parameters(): param.data.uniform_(-1, 1) output1, _ = rnn(self.input_var, self.lengths) if isinstance(_, tuple): #print 'outputs', [elem.size() for elem in output1] print 'outputs', output1.size() print 'hidden', [elem.size() for elem in _] else: print 'outputs', output1.size() print 'hidden', _.size() output2, _ = rnn(self.input_var, self.lengths) self.assertTrue(torch.equal(output1.data, output2.data))
def get_encoder_decoder(vocab): """ Given the arguments, returns the correct combination of CNN/RNN/GAN encoders and decoders. """ if args.pretrain_rnn: encoder = EncoderRNN(len(vocab), args.embed_size, args.encoder_rnn_hidden_size, num_layers=args.num_layers).to(device) elif args.gan_embedding: gan = torch.load('DCGAN_embed_2.tch').to(device) encoder = gan.discriminator elif args.progan_embedding: pro_gan = pg.ProGAN(depth=7, latent_size=256, device=torch.device('cuda')) pro_gan.dis.load_state_dict(torch.load('progan_weights/GAN_DIS_6.pth')) # pro_gan.dis_optim.load_state_dict(torch.load('progan_weights/GAN_DIS_OPTIM_6.pth')) pro_gan.gen.load_state_dict(torch.load('progan_weights/GAN_GEN_6.pth')) # pro_gan.gen_optim.load_state_dict(torch.load('progan_weights/GAN_GEN_OPTIM_6.pth')) pro_gan.gen_shadow.load_state_dict( torch.load('progan_weights/GAN_GEN_SHADOW_6.pth')) print("Loaded proGAN weights.", flush=True) encoder = pro_gan.dis.to(device) else: encoder = EncoderCNN(args.embed_size).to(device) decoder = DecoderRNNOld(args.embed_size, args.decoder_rnn_hidden_size, len(vocab), args.num_layers, vocab, device=device).to(device) return encoder, decoder
def load_model_state(self, model_file): print("Resuming training from a given model...") model = torch.load(model_file, map_location=lambda storage, loc: storage) epoch = model['epoch'] encoder_state_dict = model['encoder_state_dict'] encoder_optimizer_state_dict = model['encoder_optimizer_state_dict'] decoder_state_dict = model['decoder_state_dict'] decoder_optimizer_state_dict = model['decoder_optimizer_state_dict'] loss = model['loss'] encoder = EncoderRNN(self.wm, self.embedding_size,\ self.hidden_size, self.bidirectional) decoder = AttnDecoderRNN("general", self.hidden_size, 10) enc_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate) dec_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate) return encoder, decoder, enc_optimizer, dec_optimizer, epoch
def test(opt): # 数据 dataloader = get_dataloader(opt) _data = dataloader.dataset._data word2ix, ix2word = _data['word2ix'], _data['ix2word'] sos = word2ix.get(_data.get('sos')) eos = word2ix.get(_data.get('eos')) unknown = word2ix.get(_data.get('unknown')) voc_length = len(word2ix) #定义模型 encoder = EncoderRNN(opt, voc_length) decoder = LuongAttnDecoderRNN(opt, voc_length) #加载模型 if opt.model_ckpt == None: raise ValueError('model_ckpt is None.') return False checkpoint = torch.load(opt.model_ckpt, map_location=lambda s, l: s) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) with torch.no_grad(): #切换模式 encoder = encoder.to(opt.device) decoder = decoder.to(opt.device) encoder.eval() decoder.eval() #定义seracher searcher = GreedySearchDecoder(encoder, decoder) return searcher, sos, eos, unknown, word2ix, ix2word
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, inp, corpus): torch.set_grad_enabled(False) voc, pairs = loadPrepareData(corpus) embedding = nn.Embedding(voc.num_words, hidden_size) encoder = EncoderRNN(hidden_size, embedding, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, n_layers) checkpoint = torch.load(modelFile, map_location=lambda storage, loc: storage) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) # train mode set to false, effect only on dropout, batchNorm encoder.train(False) decoder.train(False) encoder = encoder.to(device) decoder = decoder.to(device) if inp: evaluateInput(encoder, decoder, voc, beam_size) else: evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
def __init__(self, config, dataset): self.config = config self.n_epochs = config.n_epochs self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config) self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words, config=config) self.encoder_optimizer = config.optimizier(self.encoder.parameters(), lr=config.learning_rate) self.decoder_optimizer = config.optimizier(self.decoder.parameters(), lr=config.learning_rate) self.criterion = nn.NLLLoss() self.is_plot = config.is_plot self.clip_value = config.clip_value self.losses = [] if self.config.USE_CUDA: self.encoder.cuda(self.config.gpu_id) if self.config.USE_CUDA: self.decoder.cuda(device_id=self.config.gpu_id)
def main(): input_file = sys.argv[1] vocab = build.build_vocabulary(input_file) pairs = [tensors_from_pair(vocab, x.split("\t")) for x in open(input_file)] pairs = [(x,y) for x, y in pairs if x.size(0) <= MAX_LENGTH] hidden_size = 256 encoder1 = EncoderRNN(vocab.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, vocab.n_words, dropout_p=0.1).to(device) train_iter(pairs, encoder1, attn_decoder1, 75000, print_every=100)
def setUpClass(cls): cls.pre_processing = PreProcessing(sentences) cls.dataset = ds.process(cls.pre_processing) cls.word_embedding = WordEmbedding(source=cls.dataset.pairs) encoder = EncoderRNN(cls.word_embedding, 300, 1).to(settings.device) decoder = DecoderRNN(300, cls.word_embedding, 0.0, 1).to(settings.device) cls.model = Model(encoder, decoder) cls.model.train(cls.dataset)
def create_models(config, in_words, out_words): logging.info('Creating models...') encoder = EncoderRNN(in_words, int(config['hidden_size']), num_layers=int(config['num_layers'])).cuda() decoder = AttnDecoderRNN(int(config['hidden_size']), out_words, num_layers=int(config['num_layers']), dropout_p=float(config['dropout_p'])).cuda() return encoder, decoder
def inference(sentence, language, MODEL_DIR, codersum): encoder = EncoderRNN(language.n_words, config.HIDDEN_SIZE, config.NUM_LAYER, max_length=config.MAX_LENGTH + 1) decoder = AttnDecoderRNN(config.ATT_MODEL, config.HIDDEN_SIZE, language.n_words, config.NUM_LAYER, dropout_p=config.DROPOUT) encoder_path = os.path.join(MODEL_DIR, "encoder_" + str(codersum) + ".pth") decoder_path = os.path.join(MODEL_DIR, "decoder_" + str(codersum) + ".pth") encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) decoder.load_state_dict(torch.load(decoder_path, map_location="cpu")) encoder.eval() decoder.eval() batch_size = 1 input_index = indexes_from_sentence(language, sentence) input_index = pad_sentence(input_index) # 填充 input_variable = torch.LongTensor([input_index]) encoder_hidden, encoder_cell = encoder.init_hidden(batch_size) encoder_outputs, encoder_hidden, encoder_cell = encoder( input_variable, encoder_hidden, encoder_cell) decoder_input = torch.zeros(batch_size, 1).long() decoder_context = torch.zeros(batch_size, decoder.hidden_size) decoder_hidden = encoder_hidden decoder_cell = encoder_cell if config.USE_CUDA: decoder_input = decoder_input.cuda() decoder_context = decoder_context.cuda() decoded_words = [] # Run through decoder for di in range(config.MAX_LENGTH): decoder_output, decoder_context, decoder_hidden, decoder_cell, _ = decoder( decoder_input, decoder_context, decoder_hidden, decoder_cell, encoder_outputs) # Choose top word from output topv, topi = decoder_output.data.topk(1) ni = topi[0][0] if ni == 0: break else: decoded_words.append(language.index2word[ni.item()]) decoder_input = torch.LongTensor([[ni]]) if config.USE_CUDA: decoder_input = decoder_input.cuda() return "".join(decoded_words)
def main(): input_lang, output_lang, pairs = prepare_data('ques', 'ans', '../debug.json', reverse=False) encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1, max_length=1000).to(device) rate = 0.9 pairs_train, pairs_test = pairs[0:int(len(pairs) * rate)], pairs[int(len(pairs) * rate):] encoder.load_state_dict(torch.load('model/encoder-0.model')) encoder.eval() attn_decoder.load_state_dict(torch.load('model/decoder-0.model')) attn_decoder.eval() evaluate_all(encoder, attn_decoder, pairs_test, max_length=1000, input_lang=input_lang, output_lang=output_lang, n=len(pairs_test)) # show_plot(loss_history) print('done test')
def main(): args = parse_arguments() hidden_size = 300 embed_size = 50 kld_weight = 0.05 temperature = 0.9 use_cuda = torch.cuda.is_available() print("[!] preparing dataset...") TEXT = data.Field(lower=True, fix_length=30) LABEL = data.Field(sequential=False) train_data, test_data = datasets.IMDB.splits(TEXT, LABEL) TEXT.build_vocab(train_data, max_size=250000) LABEL.build_vocab(train_data) train_iter, test_iter = data.BucketIterator.splits( (train_data, test_data), batch_size=args.batch_size, repeat=False) vocab_size = len(TEXT.vocab) + 2 print("[!] Instantiating models...") encoder = EncoderRNN(vocab_size, hidden_size, embed_size, n_layers=2, dropout=0.5, use_cuda=use_cuda) decoder = DecoderRNN(embed_size, hidden_size, vocab_size, n_layers=2, dropout=0.5, use_cuda=use_cuda) vae = VAE(encoder, decoder) optimizer = optim.Adam(vae.parameters(), lr=args.lr) if use_cuda: print("[!] Using CUDA...") vae.cuda() best_val_loss = None for e in range(1, args.epochs + 1): train(e, vae, optimizer, train_iter, vocab_size, kld_weight, temperature, args.grad_clip, use_cuda, TEXT) val_loss = evaluate(vae, test_iter, vocab_size, kld_weight, use_cuda) print("[Epoch: %d] val_loss:%5.3f | val_pp:%5.2fS" % (e, val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.isdir("snapshot"): os.makedirs("snapshot") torch.save(vae.state_dict(), './snapshot/vae_{}.pt'.format(e)) best_val_loss = val_loss
def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional, embedding_size, n_parameter, m_parameter, learning_rate, clip, alpha, beta, pre_trained_file = None, decoder_type="original", teacher_forcing_ratio=0.7): self.batch_size = batch_size self.hidden_size = hidden_size self.embedding_size = embedding_size self.bidirectional = bidirectional self.n_parameter = n_parameter self.m_parameter = m_parameter self.learning_rate = learning_rate self.wm = wm self.clip = clip self.alpha = alpha self.beta = beta self.loss_list = [] self.teacher_forcing_ratio = teacher_forcing_ratio self.decoder_type = decoder_type if pre_trained_file == None: # define encoder and decoder self.encoder = EncoderRNN(self.wm, self.embedding_size, hidden_size, bidirectional, n_layers=1) # select decoder type if self.decoder_type == "original": self.decoder = AttnDecoderRNN("general", self.hidden_size, 10) elif self.decoder_type == "bahdanau": self.decoder = BahdanauAttnDecoderRNN(self.embedding_size, hidden_size, 10, discrete_representation=True) # define optimizer of encoder and decoder self.enc_optimizer = optim.Adam(self.encoder.parameters(), lr=self.learning_rate) self.dec_optimizer = optim.Adam(self.decoder.parameters(), lr=self.learning_rate) self.start = 1 else: self.resume_training = True self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\ self.start = self.load_model_state(pre_trained_file) self.decoder = self.decoder.to(device) self.encoder = self.encoder.to(device)
def trainIters(n_iteration, learning_rate, batch_size, n_layers, hidden_size, attn_model='dot', decoder_learning_ratio=5.0): voc, pairs = loadPrepareData() choise = [random.choice(pairs) for _ in range(batch_size)] training_batches = [ batch2TrainData(voc, choise) for _ in range(n_iteration) ] # model checkpoint = None print('Building encoder and decoder ...') encoder = EncoderRNN(voc, hidden_size, n_layers) attn_model = 'dot' decoder = LuongAttnDecoderRNN(voc, attn_model, hidden_size, n_layers) # optimizer print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) # initialize print('Initializing ...') start_iteration = 1 perplexity = [] print_loss = 0 for iteration in tqdm(range(start_iteration, n_iteration + 1)): training_batch = training_batches[iteration - 1] input_variable, lengths, target_variable, mask, max_target_len = training_batch
def eval(): parameter = Config() # 加载参数 save_dir = parameter.save_dir loadFilename = parameter.model_ckpt pretrained_embedding_path = parameter.pretrained_embedding_path dropout = parameter.dropout hidden_size = parameter.hidden_size num_layers = parameter.num_layers attn_model = parameter.method max_input_length = parameter.max_input_length max_generate_length = parameter.max_generate_length embedding_dim = parameter.embedding_dim #加载embedding voc = read_voc_file('./data/voc.pkl') embedding = get_weight(voc,pretrained_embedding_path) #输入 inputs = get_input_line('./test/test.txt') input_batches, lengths = get_batch_id(inputs) # encoder = EncoderRNN(hidden_size, embedding, num_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model,embedding,hidden_size,len(voc),num_layers,dropout) if loadFilename == None: raise ValueError('model_ckpt is None.') return False checkpoint = torch.load(loadFilename, map_location=lambda s, l: s) print(checkpoint['plt']) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) answer =[] with torch.no_grad(): encoder.to(device) decoder.to(device) #切换到测试模式 encoder.eval() decoder.eval() search = GreedySearchDecoder(encoder, decoder) for input_batch in input_batches: #print(input_batch) token,score = generate(input_batch, search, GO_ID, EOS_ID, device) print(token) answer.append(token) print(answer) return answer
def main(): input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) device = torch.device(args.device) print('device : {}'.format(device)) encoder = EncoderRNN(input_lang.n_words, args.hidden_size).to(device) decoder = AttnDecoderRNN(args.hidden_size, output_lang.n_words, dropout_p=0.1).to(device) encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.lr) decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.lr) model = Translator(input_lang, output_lang, encoder, decoder, encoder_optimizer, decoder_optimizer) trainIters(model, pairs, n_iters=10000, print_every=100, plot_every=100) evaluateRandomly(model, pairs) output_words, attentions = evaluate(model, "je suis trop froid .") plt.matshow(attentions.numpy())
def main(): nIters = 100000 loadFilename = os.path.join('checkpoints', '{}_{}.tar'.format(nIters, 'checkpoint')) checkpoint = torch.load(loadFilename) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_lang, output_lang, pairs = prepareData('eng', 'fra', True) # If loading a model trained on GPU to CPU encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] hidden_size = 256 encoder = EncoderRNN(input_lang.n_words, hidden_size, device).to(device) decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, device, dropout_p=0.1).to(device) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] input_lang.__dict__ = checkpoint['input_lang'] output_lang.__dict__ = checkpoint['output_lang'] evaluateRandomly(device, pairs, encoder, decoder, input_lang, output_lang)
def loadmodel(model_file, wm, hidden_size, bidirectional): """ Loads the trained model, returns the encoder and decoder for inferencing. We initialize 'empty models' in which we will load our parameters. It is important that the hyperparameters are the same as used for training. Keyword arguments: model_file - string with the model location wm - embedding matrix hidden_size - hidden size bidirectional - whether we use bidirectional GRU layers """ model = torch.load(model_file, map_location=lambda storage, loc: storage) epoch = model['epoch'] encoder_state_dict = model['encoder_state_dict'] encoder_optimizer_state_dict = model['encoder_optimizer_state_dict'] decoder_state_dict = model['decoder_state_dict'] decoder_optimizer_state_dict = model['decoder_optimizer_state_dict'] loss = model['loss'] encoder = EncoderRNN(wm, 300, hidden_size, bidirectional) decoder = AttnDecoderRNN(hidden_size, 10) enc_optimizer = optim.Adam(encoder.parameters(), lr=0.0001) dec_optimizer = optim.Adam(decoder.parameters(), lr=0.0001) return encoder, decoder
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_lang, output_lang, pairs = prepareData('eng', 'fra', True) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size, device).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, device, dropout_p=0.1).to(device) trainIters(device, pairs, input_lang, output_lang, encoder1, attn_decoder1, 100000, print_every=5000)