# build model from _model import EncoderRNN, DecoderRNN, Seq2seq from torch import optim import torch.nn as nn model = Seq2seq(embedding=embedding, encoder=EncoderRNN(embedding.shape[0], embedding.shape[1], amp=1, n_layers=2, direction=2, dropout=0.5), decoder=DecoderRNN(embedding.shape[1], embedding.shape[0], amp=1, n_layers=2, direction=1, dropout=0.5)).to(device) optimizer = optim.Adam(model.parameters()) criterion = nn.NLLLoss(ignore_index=PAD_token) # define train, evaluate, predict # train import random def train(input_tensor, target_tensor): model.train() batch_size = input_tensor.size(0) encoder_hidden = model.encoder.initHidden(batch_size, device)
encoder_n_layers = 1 encoder_direction = 2 encoder_dropout = 0.0 attn_size = encoder_direction * encoder_hidden_size decoder_input_size = embedding.shape[1] decoder_hidden_size = 256 * 2 decoder_output_size = embedding.shape[0] decoder_n_layers = 1 decoder_direction = 1 decoder_dropout = 0.0 model = Seq2seq( embedding=embedding, encoder=EncoderRNN(encoder_input_size, encoder_hidden_size, encoder_n_layers, encoder_direction, encoder_dropout), decoder=DecoderRNN(decoder_input_size, decoder_hidden_size, decoder_output_size, attn_size, decoder_n_layers, decoder_direction, decoder_dropout), attention = Attention(encoder_direction * encoder_hidden_size, decoder_n_layers * decoder_direction * decoder_hidden_size), dropout=0.5 ).to(device) # load model print('loading pretrained model...') checkpoint = torch.load(pretrained_ckpt) model.load_state_dict(checkpoint['model_state_dict']) print('done') # define predict import numpy as np def predict(input_tensor): model.eval()
def main(args): print("Data preprocessing ...") if args.data_mode == "jieba": vocab = Vocabulary(args.data_set, args.vec_min) tokenizer = None data_transformer = DRCDDataset(args.data_set, args.data_sel, args.data_mode, args.with_ans, vocab, tokenizer) data_loader = DataLoader(data_transformer, batch_size=args.batch_size, shuffle=True, collate_fn=create_jieba_batch) embedding = build_emb(args.save_dir, args.vec_path, vocab, args.emb_size, args.loadEmbedding) embedding = embedding.to(device) elif args.data_mode == "bert": vocab = None tokenizer = BertTokenizer.from_pretrained("bert-base-chinese") data_transformer = DRCDDataset(args.data_set, args.data_sel, args.data_mode, args.with_ans, vocab, tokenizer) data_loader = DataLoader(data_transformer, batch_size=args.batch_size, shuffle=True, collate_fn=create_bert_batch) embedding = None print('Building encoder and decoder ...') if args.data_mode == "jieba": encoder = EncoderRNN(embedding, args.hidden_size, args.transfer_layer, args.encoder_n_layers, args.dropout) vocab_size = vocab.num_words elif args.data_mode == "bert": encoder = BertEncoder(args.transfer_layer) embedding = encoder.embedding vocab_size = encoder.vocab_size if args.attn_model == 'none': decoder = DecoderRNN(embedding, args.hidden_size, vocab_size, args.decoder_n_layers, args.dropout) else: decoder = LuongAttnDecoderRNN(args.attn_model, embedding, args.hidden_size, vocab_size, args.decoder_n_layers, args.dropout) # Load model if a loadFilename is provided if args.loadEncoder: print("Loading pretrained Encoder ...") checkpoint = torch.load(args.loadEncoder) prencoder_sd = checkpoint['en'] encoder_sd = encoder.state_dict() prencoder_sd = { k: v for k, v in encoder_sd.items() if k in prencoder_sd } encoder_sd.update(prencoder_sd) encoder.load_state_dict(encoder_sd) if args.fixed_enc: for param in encoder.parameters(): param.requires_grad = False encoder.out.weight.requires_grad = True encoder.out.bias.requires_grad = True if args.loadDecoder: print("Loading pretrained Decoder ...") checkpoint = torch.load(args.loadDecoder) decoder_sd = checkpoint['de'] decoder.load_state_dict(decoder_sd) if args.loadFilename: print("Loading pretrained Model ...") checkpoint = torch.load(args.loadFilename) encoder_sd = checkpoint['en'] encoder.load_state_dict(encoder_sd) decoder_sd = checkpoint['de'] decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Ensure dropout layers are in train mode encoder.train() decoder.train() if args.training_flag: print('Building optimizers ...') if args.fixed_enc: encoder_optimizer = optim.Adam(filter(lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_op_lr) else: encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.encoder_op_lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.decoder_op_lr) if args.loadEncoder: checkpoint = torch.load(args.loadEncoder) prencoder_optimizer_sd = checkpoint['en_opt'] encoder_optimizer_sd = encoder_optimizer.state_dict() prencoder_optimizer_sd = { k: v for k, v in encoder_optimizer_sd.items() if k in prencoder_optimizer_sd } encoder_optimizer_sd.update(prencoder_optimizer_sd) encoder_optimizer.load_state_dict(encoder_optimizer_sd) if args.loadDecoder: checkpoint = torch.load(args.loadDecoder) decoder_optimizer_sd = checkpoint['de_opt'] decoder_optimizer.load_state_dict(decoder_optimizer_sd) if args.loadFilename: checkpoint = torch.load(args.loadFilename) prencoder_optimizer_sd = checkpoint['en_opt'] encoder_optimizer_sd = encoder_optimizer.state_dict() prencoder_optimizer_sd = { k: v for k, v in encoder_optimizer_sd.items() if k in prencoder_optimizer_sd } encoder_optimizer_sd.update(prencoder_optimizer_sd) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer_sd = checkpoint['de_opt'] decoder_optimizer.load_state_dict(decoder_optimizer_sd) # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() print("Starting training!") trainEpochs(args.save_dir, args.data_mode, data_loader, vocab, tokenizer, args.attn_model, encoder, decoder, encoder_optimizer, decoder_optimizer, args.it_percent, args.checkpoint_epoch, args.num_epochs, args.teacher_forcing_ratio, args.data_sel) # Set dropout layers to eval mode encoder.eval() decoder.eval() if args.dev_flag: dev_transformer = DRCDDataset(args.dev_set, args.data_sel, args.data_mode, args.with_ans, vocab, tokenizer) if args.data_mode == "jieba": dev_loader = DataLoader(dev_transformer, batch_size=args.batch_size, shuffle=True, collate_fn=create_jieba_batch) elif args.data_mode == "bert": dev_loader = DataLoader(dev_transformer, batch_size=args.batch_size, shuffle=True, collate_fn=create_bert_batch) print("Starting evaluation!") test_evaluation(args.eval_flag, args.data_mode, args.data_sel, dev_loader, args.attn_model, vocab, tokenizer, encoder, decoder, args.max_length, args.save_dir, args.checkpoint_epoch) if args.eval_flag: eval_transformer = DRCDDataset(args.eval_set, args.data_sel, args.data_mode, args.with_ans, vocab, tokenizer) if args.data_mode == "jieba": eval_loader = DataLoader(eval_transformer, batch_size=args.batch_size, shuffle=False, collate_fn=create_jieba_batch) elif args.data_mode == "bert": eval_loader = DataLoader(eval_transformer, batch_size=args.batch_size, shuffle=False, collate_fn=create_bert_batch) print("Starting evaluation!") test_evaluation(args.eval_flag, args.data_mode, args.data_sel, eval_loader, args.attn_model, vocab, tokenizer, encoder, decoder, args.max_length, args.save_dir, args.checkpoint_epoch)