Beispiel #1
0
import torch
from torch.utils.data import TensorDataset, DataLoader

test_dataset = TensorDataset(torch.from_numpy(test_X))

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# build model
from _model import EncoderRNN, DecoderRNN, Seq2seq
from torch import optim
import torch.nn as nn

model = Seq2seq(embedding=embedding,
                encoder=EncoderRNN(embedding.shape[0],
                                   embedding.shape[1],
                                   amp=1,
                                   n_layers=2,
                                   direction=2,
                                   dropout=0.5),
                decoder=DecoderRNN(embedding.shape[1],
                                   embedding.shape[0],
                                   amp=1,
                                   n_layers=2,
                                   direction=1,
                                   dropout=0.5)).to(device)
optimizer = optim.Adam(model.parameters())
criterion = nn.NLLLoss(ignore_index=PAD_token)

# load model
print('loading pretrained model...')
checkpoint = torch.load(pretrained_ckpt)
model.load_state_dict(checkpoint['model_state_dict'])
Beispiel #2
0
encoder_hidden_size = 256
encoder_n_layers = 1
encoder_direction = 2
encoder_dropout = 0.0

attn_size = encoder_direction * encoder_hidden_size
decoder_input_size = embedding.shape[1]
decoder_hidden_size = 256 * 2
decoder_output_size = embedding.shape[0]
decoder_n_layers = 1
decoder_direction = 1
decoder_dropout = 0.0

model = Seq2seq(
    embedding=embedding,
    encoder=EncoderRNN(encoder_input_size, encoder_hidden_size,
                       encoder_n_layers, encoder_direction, encoder_dropout),
    decoder=DecoderRNN(decoder_input_size, decoder_hidden_size,
                       decoder_output_size, attn_size, decoder_n_layers,
                       decoder_direction, decoder_dropout),
    attention=Attention(
        encoder_direction * encoder_hidden_size,
        decoder_n_layers * decoder_direction * decoder_hidden_size),
    dropout=0.5).to(device)

# load model
print('loading pretrained model...')
checkpoint = torch.load(pretrained_ckpt)
model.load_state_dict(checkpoint['model_state_dict'])
print('done')

# define predict
Beispiel #3
0
def main(args):

    print("Data preprocessing ...")
    if args.data_mode == "jieba":
        vocab = Vocabulary(args.data_set, args.vec_min)
        tokenizer = None
        data_transformer = DRCDDataset(args.data_set, args.data_sel,
                                       args.data_mode, args.with_ans, vocab,
                                       tokenizer)
        data_loader = DataLoader(data_transformer,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 collate_fn=create_jieba_batch)
        embedding = build_emb(args.save_dir, args.vec_path, vocab,
                              args.emb_size, args.loadEmbedding)
        embedding = embedding.to(device)
    elif args.data_mode == "bert":
        vocab = None
        tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
        data_transformer = DRCDDataset(args.data_set, args.data_sel,
                                       args.data_mode, args.with_ans, vocab,
                                       tokenizer)
        data_loader = DataLoader(data_transformer,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 collate_fn=create_bert_batch)
        embedding = None

    print('Building encoder and decoder ...')
    if args.data_mode == "jieba":
        encoder = EncoderRNN(embedding, args.hidden_size, args.transfer_layer,
                             args.encoder_n_layers, args.dropout)
        vocab_size = vocab.num_words
    elif args.data_mode == "bert":
        encoder = BertEncoder(args.transfer_layer)
        embedding = encoder.embedding
        vocab_size = encoder.vocab_size

    if args.attn_model == 'none':
        decoder = DecoderRNN(embedding, args.hidden_size, vocab_size,
                             args.decoder_n_layers, args.dropout)
    else:
        decoder = LuongAttnDecoderRNN(args.attn_model, embedding,
                                      args.hidden_size, vocab_size,
                                      args.decoder_n_layers, args.dropout)

    # Load model if a loadFilename is provided
    if args.loadEncoder:
        print("Loading pretrained Encoder ...")
        checkpoint = torch.load(args.loadEncoder)
        prencoder_sd = checkpoint['en']

        encoder_sd = encoder.state_dict()
        prencoder_sd = {
            k: v
            for k, v in encoder_sd.items() if k in prencoder_sd
        }
        encoder_sd.update(prencoder_sd)
        encoder.load_state_dict(encoder_sd)

        if args.fixed_enc:
            for param in encoder.parameters():
                param.requires_grad = False
            encoder.out.weight.requires_grad = True
            encoder.out.bias.requires_grad = True

    if args.loadDecoder:
        print("Loading pretrained Decoder ...")
        checkpoint = torch.load(args.loadDecoder)
        decoder_sd = checkpoint['de']
        decoder.load_state_dict(decoder_sd)

    if args.loadFilename:
        print("Loading pretrained Model ...")
        checkpoint = torch.load(args.loadFilename)
        encoder_sd = checkpoint['en']
        encoder.load_state_dict(encoder_sd)
        decoder_sd = checkpoint['de']
        decoder.load_state_dict(decoder_sd)

    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    # Ensure dropout layers are in train mode
    encoder.train()
    decoder.train()

    if args.training_flag:
        print('Building optimizers ...')
        if args.fixed_enc:
            encoder_optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                                  encoder.parameters()),
                                           lr=args.encoder_op_lr)
        else:
            encoder_optimizer = optim.Adam(encoder.parameters(),
                                           lr=args.encoder_op_lr)

        decoder_optimizer = optim.Adam(decoder.parameters(),
                                       lr=args.decoder_op_lr)

        if args.loadEncoder:
            checkpoint = torch.load(args.loadEncoder)
            prencoder_optimizer_sd = checkpoint['en_opt']

            encoder_optimizer_sd = encoder_optimizer.state_dict()
            prencoder_optimizer_sd = {
                k: v
                for k, v in encoder_optimizer_sd.items()
                if k in prencoder_optimizer_sd
            }
            encoder_optimizer_sd.update(prencoder_optimizer_sd)
            encoder_optimizer.load_state_dict(encoder_optimizer_sd)

        if args.loadDecoder:
            checkpoint = torch.load(args.loadDecoder)
            decoder_optimizer_sd = checkpoint['de_opt']
            decoder_optimizer.load_state_dict(decoder_optimizer_sd)

        if args.loadFilename:
            checkpoint = torch.load(args.loadFilename)
            prencoder_optimizer_sd = checkpoint['en_opt']

            encoder_optimizer_sd = encoder_optimizer.state_dict()
            prencoder_optimizer_sd = {
                k: v
                for k, v in encoder_optimizer_sd.items()
                if k in prencoder_optimizer_sd
            }
            encoder_optimizer_sd.update(prencoder_optimizer_sd)
            encoder_optimizer.load_state_dict(encoder_optimizer_sd)

            decoder_optimizer_sd = checkpoint['de_opt']
            decoder_optimizer.load_state_dict(decoder_optimizer_sd)

        # If you have cuda, configure cuda to call
        for state in encoder_optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()
        for state in decoder_optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        print("Starting training!")
        trainEpochs(args.save_dir, args.data_mode, data_loader, vocab,
                    tokenizer, args.attn_model, encoder, decoder,
                    encoder_optimizer, decoder_optimizer, args.it_percent,
                    args.checkpoint_epoch, args.num_epochs,
                    args.teacher_forcing_ratio, args.data_sel)

    # Set dropout layers to eval mode
    encoder.eval()
    decoder.eval()

    if args.dev_flag:
        dev_transformer = DRCDDataset(args.dev_set, args.data_sel,
                                      args.data_mode, args.with_ans, vocab,
                                      tokenizer)
        if args.data_mode == "jieba":
            dev_loader = DataLoader(dev_transformer,
                                    batch_size=args.batch_size,
                                    shuffle=True,
                                    collate_fn=create_jieba_batch)
        elif args.data_mode == "bert":
            dev_loader = DataLoader(dev_transformer,
                                    batch_size=args.batch_size,
                                    shuffle=True,
                                    collate_fn=create_bert_batch)

        print("Starting evaluation!")
        test_evaluation(args.eval_flag, args.data_mode, args.data_sel,
                        dev_loader, args.attn_model, vocab, tokenizer, encoder,
                        decoder, args.max_length, args.save_dir,
                        args.checkpoint_epoch)

    if args.eval_flag:
        eval_transformer = DRCDDataset(args.eval_set, args.data_sel,
                                       args.data_mode, args.with_ans, vocab,
                                       tokenizer)
        if args.data_mode == "jieba":
            eval_loader = DataLoader(eval_transformer,
                                     batch_size=args.batch_size,
                                     shuffle=False,
                                     collate_fn=create_jieba_batch)
        elif args.data_mode == "bert":
            eval_loader = DataLoader(eval_transformer,
                                     batch_size=args.batch_size,
                                     shuffle=False,
                                     collate_fn=create_bert_batch)

        print("Starting evaluation!")
        test_evaluation(args.eval_flag, args.data_mode, args.data_sel,
                        eval_loader, args.attn_model, vocab, tokenizer,
                        encoder, decoder, args.max_length, args.save_dir,
                        args.checkpoint_epoch)