def example_test(): encoder_test = EncoderRNN(10, 10, 2, max_length=3) decoder_test = AttnDecoderRNN('general', 10, 10, 2) print(encoder_test) print(decoder_test) encoder_hidden = encoder_test.init_hidden(batch_size=4) # word_input = Variable(torch.LongTensor([[1, 2, 3]])) word_input = Variable(torch.LongTensor( [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]])) if USE_CUDA: encoder_test.cuda() word_input = word_input.cuda() encoder_hidden = encoder_hidden.cuda() encoder_outputs, encoder_hidden = encoder_test( word_input, encoder_hidden) # S B H, L B H print(encoder_outputs.shape, encoder_hidden.shape) # word_inputs = Variable(torch.LongTensor([[1, 2, 3]])) word_inputs = Variable(torch.LongTensor( [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]])) decoder_attns = torch.zeros(4, 3, 3) decoder_hidden = encoder_hidden decoder_context = Variable(torch.zeros(4, decoder_test.hidden_size)) if USE_CUDA: decoder_test.cuda() word_inputs = word_inputs.cuda() decoder_context = decoder_context.cuda() for i in range(3): decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test( word_inputs[:, i], decoder_context, decoder_hidden, encoder_outputs) print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size()) decoder_attns[:, i, :] = decoder_attn.squeeze(1).cpu().data
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_lang, output_lang, pairs = prepareData('eng', 'fra', True, dir='data', filter=False) hidden_size = 512 batch_size = 64 iters = 50000 # encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) encoder = EncoderRNN(input_lang.n_words, hidden_size) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") encoder = nn.DataParallel(encoder) attn_decoder = nn.DataParallel(attn_decoder) encoder = encoder.to(device) attn_decoder = attn_decoder.to(device) # attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) trainIters(device, pairs, input_lang, output_lang, encoder, attn_decoder, batch_size, iters, print_every=250)
def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional\ , embedding_size, n_parameter, m_parameter, learning_rate, clip,\ alpha, beta, pre_trained_file = None): self.batch_size = batch_size self.hidden_size = hidden_size self.embedding_size = embedding_size self.bidirectional = bidirectional self.n_parameter = n_parameter self.m_parameter = m_parameter self.learning_rate = learning_rate self.wm = wm self.clip = clip self.alpha = alpha self.beta = beta if pre_trained_file == None: self.encoder = EncoderRNN(self.wm, self.embedding_size,\ hidden_size, bidirectional) self.decoder = AttnDecoderRNN(self.hidden_size, 10) self.enc_optimizer = optim.Adam(self.encoder.parameters(),\ lr=self.learning_rate) self.dec_optimizer = optim.Adam(self.decoder.parameters(),\ lr=self.learning_rate) self.start = 0 else: self.resume_training = True self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\ self.start = self.load_model_state(pre_trained_file) self.decoder = self.decoder.to(device) self.encoder = self.encoder.to(device)
def train(args, data, bidaf): device = torch.device( f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu") utte_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device) span_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device) decoder = AttnDecoderRNN(args, data.WORD.vocab.vectors).to(device) utte_encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.learning_rate) span_encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.learning_rate) criterion = nn.NLLLoss() n_iters = 10 * len(data.train.examples) plot_loss_total = [] print_every = 10000 for iter in range(1, n_iters + 1): input_tensor = data.train.examples[i].q_word target_tensor = data.train.examples[i].ans span = ata.train.examples[i].span loss = train_each(input_tensor, target_tensor, utte_encoder, span_encoder, decoder, utte_encoder_optimizer, span_encoder_optimizer, decoder_optimizer, criterion) print_loss += loss if iter % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg))
def load_model(): encoder = EncoderRNN(human_n_chars, hidden_size, n_layers) decoder = AttnDecoderRNN(attn_model, hidden_size, machine_n_chars, n_layers, dropout_p=dropout_p) encoder.load_state_dict(t.load('encoder.pth')) decoder.load_state_dict(t.load('decoder.pth')) return encoder, decoder
def model_initialization(encoder_style, decoder_style, langs, embedding_size, learning_rate, use_model): # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() # Choose encoder style # TODO: Set up a choice for hierarchical or not if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) elif encoder_style == 'BiLSTMMax': encoder = EncoderBiLSTMMaxPooling(embedding_size, emb) elif encoder_style == 'HierarchicalBiLSTM': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalBiLSTM(**encoder_args) elif encoder_style == 'HierarchicalLIN': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalLIN(**encoder_args) else: # initialize hierarchical encoder rnn, (both global and local) encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalEncoderRNN(**encoder_args) # Choose decoder style and training function if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), # lr=learning_rate) if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) if not use_cuda: loss_optimizer.load_state_dict( torch.load(use_model[2], map_location=lambda storage, loc: storage)) else: loss_optimizer.load_state_dict(torch.load(use_model[2])) return encoder, decoder, loss_optimizer, train_func
def load_model_state(self, model_file): print("Resuming training from a given model...") model = torch.load(model_file, map_location=lambda storage, loc: storage) epoch = model['epoch'] encoder_state_dict = model['encoder_state_dict'] encoder_optimizer_state_dict = model['encoder_optimizer_state_dict'] decoder_state_dict = model['decoder_state_dict'] decoder_optimizer_state_dict = model['decoder_optimizer_state_dict'] loss = model['loss'] encoder = EncoderRNN(self.wm, self.embedding_size,\ self.hidden_size, self.bidirectional) decoder = AttnDecoderRNN("general", self.hidden_size, 10) enc_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate) dec_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate) return encoder, decoder, enc_optimizer, dec_optimizer, epoch
def __init__(self, config, dataset): self.config = config self.n_epochs = config.n_epochs self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config) self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words, config=config) self.encoder_optimizer = config.optimizier(self.encoder.parameters(), lr=config.learning_rate) self.decoder_optimizer = config.optimizier(self.decoder.parameters(), lr=config.learning_rate) self.criterion = nn.NLLLoss() self.is_plot = config.is_plot self.clip_value = config.clip_value self.losses = [] if self.config.USE_CUDA: self.encoder.cuda(self.config.gpu_id) if self.config.USE_CUDA: self.decoder.cuda(device_id=self.config.gpu_id)
def main(): input_file = sys.argv[1] vocab = build.build_vocabulary(input_file) pairs = [tensors_from_pair(vocab, x.split("\t")) for x in open(input_file)] pairs = [(x,y) for x, y in pairs if x.size(0) <= MAX_LENGTH] hidden_size = 256 encoder1 = EncoderRNN(vocab.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, vocab.n_words, dropout_p=0.1).to(device) train_iter(pairs, encoder1, attn_decoder1, 75000, print_every=100)
def create_models(config, in_words, out_words): logging.info('Creating models...') encoder = EncoderRNN(in_words, int(config['hidden_size']), num_layers=int(config['num_layers'])).cuda() decoder = AttnDecoderRNN(int(config['hidden_size']), out_words, num_layers=int(config['num_layers']), dropout_p=float(config['dropout_p'])).cuda() return encoder, decoder
def main(): input_lang, output_lang, pairs = prepare_data('ques', 'ans', '../debug.json', reverse=False) encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1, max_length=1000).to(device) rate = 0.9 pairs_train, pairs_test = pairs[0:int(len(pairs) * rate)], pairs[int(len(pairs) * rate):] encoder.load_state_dict(torch.load('model/encoder-0.model')) encoder.eval() attn_decoder.load_state_dict(torch.load('model/decoder-0.model')) attn_decoder.eval() evaluate_all(encoder, attn_decoder, pairs_test, max_length=1000, input_lang=input_lang, output_lang=output_lang, n=len(pairs_test)) # show_plot(loss_history) print('done test')
def loadmodel(model_file, wm, hidden_size, bidirectional): """ Loads the trained model, returns the encoder and decoder for inferencing. We initialize 'empty models' in which we will load our parameters. It is important that the hyperparameters are the same as used for training. Keyword arguments: model_file - string with the model location wm - embedding matrix hidden_size - hidden size bidirectional - whether we use bidirectional GRU layers """ model = torch.load(model_file, map_location=lambda storage, loc: storage) epoch = model['epoch'] encoder_state_dict = model['encoder_state_dict'] encoder_optimizer_state_dict = model['encoder_optimizer_state_dict'] decoder_state_dict = model['decoder_state_dict'] decoder_optimizer_state_dict = model['decoder_optimizer_state_dict'] loss = model['loss'] encoder = EncoderRNN(wm, 300, hidden_size, bidirectional) decoder = AttnDecoderRNN(hidden_size, 10) enc_optimizer = optim.Adam(encoder.parameters(), lr=0.0001) dec_optimizer = optim.Adam(decoder.parameters(), lr=0.0001) return encoder, decoder
def main(): input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) device = torch.device(args.device) print('device : {}'.format(device)) encoder = EncoderRNN(input_lang.n_words, args.hidden_size).to(device) decoder = AttnDecoderRNN(args.hidden_size, output_lang.n_words, dropout_p=0.1).to(device) encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.lr) decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.lr) model = Translator(input_lang, output_lang, encoder, decoder, encoder_optimizer, decoder_optimizer) trainIters(model, pairs, n_iters=10000, print_every=100, plot_every=100) evaluateRandomly(model, pairs) output_words, attentions = evaluate(model, "je suis trop froid .") plt.matshow(attentions.numpy())
def main(): nIters = 100000 loadFilename = os.path.join('checkpoints', '{}_{}.tar'.format(nIters, 'checkpoint')) checkpoint = torch.load(loadFilename) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_lang, output_lang, pairs = prepareData('eng', 'fra', True) # If loading a model trained on GPU to CPU encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] hidden_size = 256 encoder = EncoderRNN(input_lang.n_words, hidden_size, device).to(device) decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, device, dropout_p=0.1).to(device) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] input_lang.__dict__ = checkpoint['input_lang'] output_lang.__dict__ = checkpoint['output_lang'] evaluateRandomly(device, pairs, encoder, decoder, input_lang, output_lang)
def inference(sentence, language, MODEL_DIR, codersum): encoder = EncoderRNN(language.n_words, config.HIDDEN_SIZE, config.NUM_LAYER, max_length=config.MAX_LENGTH + 1) decoder = AttnDecoderRNN(config.ATT_MODEL, config.HIDDEN_SIZE, language.n_words, config.NUM_LAYER, dropout_p=config.DROPOUT) encoder_path = os.path.join(MODEL_DIR, "encoder_" + str(codersum) + ".pth") decoder_path = os.path.join(MODEL_DIR, "decoder_" + str(codersum) + ".pth") encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) decoder.load_state_dict(torch.load(decoder_path, map_location="cpu")) encoder.eval() decoder.eval() batch_size = 1 input_index = indexes_from_sentence(language, sentence) input_index = pad_sentence(input_index) # 填充 input_variable = torch.LongTensor([input_index]) encoder_hidden, encoder_cell = encoder.init_hidden(batch_size) encoder_outputs, encoder_hidden, encoder_cell = encoder( input_variable, encoder_hidden, encoder_cell) decoder_input = torch.zeros(batch_size, 1).long() decoder_context = torch.zeros(batch_size, decoder.hidden_size) decoder_hidden = encoder_hidden decoder_cell = encoder_cell if config.USE_CUDA: decoder_input = decoder_input.cuda() decoder_context = decoder_context.cuda() decoded_words = [] # Run through decoder for di in range(config.MAX_LENGTH): decoder_output, decoder_context, decoder_hidden, decoder_cell, _ = decoder( decoder_input, decoder_context, decoder_hidden, decoder_cell, encoder_outputs) # Choose top word from output topv, topi = decoder_output.data.topk(1) ni = topi[0][0] if ni == 0: break else: decoded_words.append(language.index2word[ni.item()]) decoder_input = torch.LongTensor([[ni]]) if config.USE_CUDA: decoder_input = decoder_input.cuda() return "".join(decoded_words)
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_lang, output_lang, pairs = prepareData('eng', 'fra', True) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size, device).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, device, dropout_p=0.1).to(device) trainIters(device, pairs, input_lang, output_lang, encoder1, attn_decoder1, 100000, print_every=5000)
def main(): input_lang, output_lang, pairs = prepare_data('eng', 'fra', True) print(random.choice(pairs)) encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) train_iters(input_lang, output_lang, pairs, encoder, attn_decoder, 75000, print_every=5000) evaluate_randomly(encoder, attn_decoder, input_lang, output_lang, pairs) output_words, attentions = evaluate(encoder, attn_decoder, input_lang, output_lang, "je suis trop froid .") mat_plot(attentions.numpy())
def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional, embedding_size, n_parameter, m_parameter, learning_rate, clip, alpha, beta, pre_trained_file = None, decoder_type="original", teacher_forcing_ratio=0.7): self.batch_size = batch_size self.hidden_size = hidden_size self.embedding_size = embedding_size self.bidirectional = bidirectional self.n_parameter = n_parameter self.m_parameter = m_parameter self.learning_rate = learning_rate self.wm = wm self.clip = clip self.alpha = alpha self.beta = beta self.loss_list = [] self.teacher_forcing_ratio = teacher_forcing_ratio self.decoder_type = decoder_type if pre_trained_file == None: # define encoder and decoder self.encoder = EncoderRNN(self.wm, self.embedding_size, hidden_size, bidirectional, n_layers=1) # select decoder type if self.decoder_type == "original": self.decoder = AttnDecoderRNN("general", self.hidden_size, 10) elif self.decoder_type == "bahdanau": self.decoder = BahdanauAttnDecoderRNN(self.embedding_size, hidden_size, 10, discrete_representation=True) # define optimizer of encoder and decoder self.enc_optimizer = optim.Adam(self.encoder.parameters(), lr=self.learning_rate) self.dec_optimizer = optim.Adam(self.decoder.parameters(), lr=self.learning_rate) self.start = 1 else: self.resume_training = True self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\ self.start = self.load_model_state(pre_trained_file) self.decoder = self.decoder.to(device) self.encoder = self.encoder.to(device)
def generate_text(model, data_file, output): encoder_src = model['encoder_path'] decoder_src = model['decoder_path'] encoder_style = None # Choose model architecture if 'RNN' in encoder_src: encoder = EncoderRNN(embedding_size, emb) encoder_style = 'RNN' elif 'LSTM' in encoder_src: encoder = EncoderBiLSTM(embedding_size, emb) encoder_style = 'LSTM' else: encoder = EncoderLIN(embedding_size, emb) encoder_style = 'LIN' decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) encoder = load_model(encoder, encoder_src) decoder = load_model(decoder, decoder_src) data_path = os.path.join(data_file['data_dir'], data_file['data_name'] + '.json') with open(data_path) as f: valuation_data = json.load(f) assert valuation_data is not None valid_data, _ = loaddata(data_file['data_dir'], data_file['data_name']) data_length = len(valid_data) valid_data = data2index(valid_data, train_lang) text_generator = evaluate(encoder, decoder, valid_data, train_lang['summary'], embedding_size, encoder_style=encoder_style, iter_time=data_length, beam_size=1, verbose=False) print('The text generation begin\n', flush=True) with open(output, 'w') as f: for idx, line in enumerate(text_generator): print('Summery generated, No{}'.format(idx + 1)) f.write(line + '\n')
def run(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device :", device, "\n") # Preprocess data input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print("Finished Preprocessing\n") # Seq2Seq Model hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) metadata = (input_lang, output_lang, pairs) trainIters(encoder1, attn_decoder1, metadata, n_iters=500, print_every=100) # 원래는 n_iters=75000, print_every=5000 # Check evaluateRandomly(encoder1, attn_decoder1, metadata) # Evaluate and Visualize output_words, attentions = evaluate(encoder1, attn_decoder1, metadata, "je suis trop froid .") plt.matshow(attentions.numpy()) evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "elle a cinq ans de moins que moi .") evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "elle est trop petit .") evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "je ne crains pas de mourir .") evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "c est un jeune directeur plein de talent .")
def load_model_param(language, model_dir): encoder = EncoderRNN(language.n_words, config.HIDDEN_SIZE, config.NUM_LAYER, max_length=17 + 1) decoder = AttnDecoderRNN(config.ATT_MODEL, config.HIDDEN_SIZE, language.n_words, config.NUM_LAYER, dropout_p=config.DROPOUT) encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth") decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth") encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) decoder.load_state_dict(torch.load(decoder_path, map_location="cpu")) encoder.eval() decoder.eval() return encoder, decoder
def main(): nIters = 50000 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") loadFilename = os.path.join('checkpoints', '{}_{}.tar'.format(nIters, 'checkpoint')) checkpoint = torch.load(loadFilename, map_location=device) # input_lang, output_lang, pairs = prepareData('eng', 'fra', True, 'data', filter=False) # If loading a model trained on GPU to CPU encoder_sd = checkpoint['en'] encoder_sd decoder_sd = checkpoint['de'] decoder_sd hidden_size = 512 input_lang = Lang('fra') output_lang = Lang('eng') input_lang.__dict__ = checkpoint['input_lang'] output_lang.__dict__ = checkpoint['output_lang'] encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0).to(device) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder.eval() decoder.eval() # encoder_optimizer_sd = checkpoint['en_opt'] # decoder_optimizer_sd = checkpoint['de_opt'] _, _, test_pairs = prepareData('eng', 'fra', True, dir='test', filter=False) evaluateRandomly(device, test_pairs, encoder, decoder, input_lang, output_lang) decode_batch(device, test_pairs, encoder, decoder, input_lang, output_lang, batch_size=64)
def main(args): global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) if args.checkpoint is None: decoder = AttnDecoderRNN(attention_dim=args.attention_dim, embed_dim=args.embed_dim, decoder_dim=args.decoder_dim, vocab_size=len(vocab), dropout=args.dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=args.decoder_lr) encoder = EncoderCNN() encoder.fine_tune(args.fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_lr) if args.fine_tune_encoder else None else: checkpoint = torch.load(args.checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_lr) decoder = decoder.to(device) encoder = encoder.to(device) criterion = nn.CrossEntropyLoss().to(device) # Image preprocessing, normalization for the pretrained resnet transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Build data loader train_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = get_loader(args.image_dir_val, args.caption_path_val, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) for epoch in range(args.start_epoch, args.epochs): if args.epochs_since_improvement == 20: break if args.epochs_since_improvement > 0 and args.epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if args.fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: args.epochs_since_improvement += 1 print("\nEpoch since last improvement: %d\n" % (args.epochs_since_improvement, )) else: args.epochs_since_improvement = 0 save_checkpoint(args.data_name, epoch, args.epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) else: encoder = EncoderRNN(embedding_size, emb) decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0) criterion = nn.NLLLoss() total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get the average loss on the sentences loss = sentenceloss(rt, re, rm, summary, encoder, decoder, loss_optimizer, criterion, embedding_size, encoder_style) total_loss += loss # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, avg loss = {:.4f}".format( gettime(start), iteration, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "{}_decoder_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return encoder, decoder
train_pairs, batch_size=config.BATCH_SIZE, max_length=17) val_dataloader = PairsLoader(chinese, val_pairs, batch_size=config.BATCH_SIZE, max_length=17) # Initialize models encoder = EncoderRNN(chinese.n_words, config.HIDDEN_SIZE, config.NUM_LAYER, max_length=config.MAX_LENGTH + 1) decoder = AttnDecoderRNN(config.ATT_MODEL, config.HIDDEN_SIZE, chinese.n_words, config.NUM_LAYER, dropout_p=config.DROPOUT) if config.RESTORE: encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth") decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth") encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) # Move models to GPU if config.USE_CUDA: encoder.cuda() decoder.cuda() # Initialize optimizers and criterion
encoder_src = './models/long4_encoder_2120' decoder_src = './models/long4_decoder_2120' encoder_style = None if 'RNN' == ENCODER_STYLE: encoder = EncoderRNN(embedding_size, emb) encoder_style = 'RNN' elif 'LSTM' == ENCODER_STYLE: encoder = EncoderBiLSTM(embedding_size, emb) encoder_style = 'BiLSTM' else: encoder = EncoderLIN(embedding_size, emb) encoder_style = 'LIN' decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) encoder = load_model(encoder, encoder_src) decoder = load_model(decoder, decoder_src) valid_data, _ = loaddata(file_loc, 'valid') data_length = len(valid_data) valid_data = data2index(valid_data, train_lang) text_generator = evaluate(encoder, decoder, valid_data, train_lang['summary'], embedding_size, encoder_style=encoder_style, iter_time=2, beam_size=1,
def eval_network(fn_in_model): # Input # fn_in_model : filename of saved model # # Create filename for output fn_out_res = fn_in_model fn_out_res = fn_out_res.replace('.tar', '.txt') fn_out_res_test = fn_out_res.replace('/net_', '/res_test_') # Load and evaluate the network in filename 'fn_in_model' assert (os.path.isfile(fn_in_model)) print(' Checkpoint found...') print(' Processing model: ' + fn_in_model) print(' Writing to file: ' + fn_out_res_test) checkpoint = torch.load(fn_in_model, map_location='cpu') # evaluate model on CPU input_lang = checkpoint['input_lang'] output_lang = checkpoint['output_lang'] emb_size = checkpoint['emb_size'] nlayers = checkpoint['nlayers'] dropout_p = checkpoint['dropout'] input_size = input_lang.n_symbols output_size = output_lang.n_symbols samples_val = checkpoint['episodes_validation'] disable_memory = checkpoint['disable_memory'] max_length_eval = checkpoint['max_length_eval'] if 'args' not in checkpoint or 'disable_attention' not in checkpoint[ 'args']: use_attention = True else: args = checkpoint['args'] use_attention = not args.disable_attention if disable_memory: encoder = WrapperEncoderRNN(emb_size, input_size, output_size, nlayers, dropout_p) else: encoder = MetaNetRNN(emb_size, input_size, output_size, nlayers, dropout_p) if use_attention: decoder = AttnDecoderRNN(emb_size, output_size, nlayers, dropout_p) else: decoder = DecoderRNN(emb_size, output_size, nlayers, dropout_p) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() encoder.load_state_dict(checkpoint['encoder_state_dict']) decoder.load_state_dict(checkpoint['decoder_state_dict']) with open(fn_out_res_test, 'w') as f_test: with redirect_stdout(f_test): if 'episode' in checkpoint: print(' Loading epoch ' + str(checkpoint['episode']) + ' of ' + str(checkpoint['num_episodes'])) describe_model(encoder) describe_model(decoder) if eval_type == 'val': print( 'Evaluating VALIDATION performance on pre-generated validation set' ) acc_val_gen, acc_val_retrieval = evaluation_battery( samples_val, encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) print('Acc Retrieval (val): ' + str(round(acc_val_retrieval, 1))) print('Acc Generalize (val): ' + str(round(acc_val_gen, 1))) elif eval_type == 'addprim_jump': print('Evaluating TEST performance on SCAN addprim_jump') print(' ...support set is just the isolated primitives') mybatch = scan_evaluation_prim_only('addprim_jump', 'test', input_lang, output_lang) acc_val_gen, acc_val_retrieval = evaluation_battery( [mybatch], encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) elif eval_type == 'length': print('Evaluating TEST performance on SCAN length') print( ' ...over multiple support sets as contributed by the pre-generated validation set' ) samples_val = scan_evaluation_val_support( 'length', 'test', input_lang, output_lang, samples_val) acc_val_gen, acc_val_retrieval = evaluation_battery( samples_val, encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) print('Acc Retrieval (val): ' + str(round(acc_val_retrieval, 1))) print('Acc Generalize (val): ' + str(round(acc_val_gen, 1))) elif eval_type == 'template_around_right': print('Evaluating TEST performance on the SCAN around right') print(' ...with just direction mappings as support set') mybatch = scan_evaluation_dir_only('template_around_right', 'test', input_lang, output_lang) acc_val_gen, acc_val_retrieval = evaluation_battery( [mybatch], encoder, decoder, input_lang, output_lang, max_length_eval, verbose=True) else: assert False
print("use cuda: {}".format(args.cuda)) print("seed: {}".format(args.seed)) # Load pairs.pkl and lang.pkl with open(args.data_path + "/pairs.pkl", 'rb') as f: (train_pairs, test_pairs) = pkl.load(f) with open(args.data_path + "/lang.pkl", 'rb') as f: lang_tuple = pkl.load(f) lang = Lang(lang_tuple) # Prepare dataloader for training train_dataiter = DataIter(train_pairs, lang, args.vocab_size, args.batch_size, args.cuda) # Set encoder and decoder encoder = Encoder(args.vocab_size, args.hidden_size) decoder = AttnDecoderRNN(args.attn, args.hidden_size, args.vocab_size, args.n_layers, args.dropout, args.cuda) if args.cuda: encoder = encoder.cuda() decoder = decoder.cuda() # Set optimizer and criterion encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay) decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.lr, weight_decay=args.weight_decay) encoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer=encoder_optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=0.00001) decoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
def evaluateRandomly(encoder, decoder, n=10): for i in range(n): pair = random.choice(pairs) print('>', pair[0]) print('=', pair[1]) output_words, attentions = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1) if use_cuda: encoder1 = encoder1.cuda() attn_decoder1 = attn_decoder1.cuda() trainIters(encoder1, attn_decoder1, 75000, print_every=5000) ###################################################################### # evaluateRandomly(encoder1, attn_decoder1) output_words, attentions = evaluate(encoder1, attn_decoder1, "je suis trop froid .") plt.matshow(attentions.numpy())
for i in range(len(target_batch)): while 0 in target_batch[i]: target_batch[i].pop(-1) target_batch[i] = [1] + target_batch[i] + [2] target_batch[i] = torch.from_numpy( np.array(target_batch[i], dtype=np.int64)).to(device).view(-1, 1) hidden_size = 256 MAX_LENGTH = max(max_source_len, max_target_len) encoder = EncoderRNN(len(describe_dic_i2w), hidden_size).to(device) embedder = EmbedderRNN(len(code_dic_i2w), len(code_dic_i2w), dropout=0.1).to(device) attn_decoder = AttnDecoderRNN(hidden_size, len(code_dic_i2w), dropout_p=0.1, max_length=max(max_source_len, max_target_len)).to(device) node_onehot_t = [[]] # h node_onehot_t[0] = node_list_onehot_dict for i in range(K): node_onehot_t.append(node_list_onehot_dict) # print(len(node_onehot_t[0][method_list[0]])) # exit() # encoder_outputs, encoder_hidden = encoder(source_batch[0][0], encoder.initHidden()) # decoder_input = torch.tensor(node_onehot_t[0][method_list[0]], dtype=torch.int64).to(device) # t = torch.tensor(node_onehot_t[0][method_list[0]], dtype=torch.float32).view(1, 1, -1).to(device) # print(t)