Ejemplo n.º 1
0
    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size=128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                             bidirectional=bidirectional, variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss, batch_size=32,
                          checkpoint_every=50,
                          print_every=10, expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq, train,
Ejemplo n.º 2
0
                         rnn_cell=params['rnn_cell'])
    decoder = DecoderRNN(len(tgt.vocab),
                         max_len,
                         hidden_size * 2 if bidirectional else hidden_size,
                         dropout_p=0.2,
                         use_attention=True,
                         bidirectional=bidirectional,
                         rnn_cell=params['rnn_cell'],
                         n_layers=params['n_layers'],
                         eos_id=tgt.eos_id,
                         sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer.
    #
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

logging.info(seq2seq)

# train
t = SupervisedTrainer(loss=loss,
                      batch_size=params['batch_size'],
Ejemplo n.º 3
0
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size= 128
        bidirectional = opt.bidirectional
        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, dropout_p = 0.25,input_dropout_p = 0.25,
                             bidirectional=bidirectional, n_layers=2, variable_lengths=True, vocab = input_vocab)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size*2 if bidirectional else hidden_size,
                             dropout_p=0.2, input_dropout_p=0.25, use_attention=opt.use_attn, bidirectional=bidirectional, n_layers=2,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id, attn_mode = opt.attn_mode)
        seq2seq = Seq2seq(encoder, decoder)
        
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.1, 0.1)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr = 0.001), max_grad_norm=5)
        #scheduler = StepLR(optimizer.optimizer, 1)
        scheduler = ReduceLROnPlateau(optimizer.optimizer, 'min', factor = 0.1, verbose=True, patience=9)
        optimizer.set_scheduler(scheduler)
    expt_dir = opt.expt_dir + '_hidden_{}'.format(hidden_size)
    # train
    t = SupervisedTrainer(loss=loss, batch_size=64,
                          checkpoint_every=1800,
                          print_every=100, expt_dir=expt_dir, input_vocab=input_vocab, output_vocab=output_vocab)
    
Ejemplo n.º 4
0
                      att_mlp=args.att_mlp,
                      att_type=args.att_type)
# decoder3 = DecoderRNN(args.decoder3_n_layer, args.vocab_size, max_len, hidden_size * 2 if bidirectional else hidden_size,
#                      dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
#                      eos_id=eos_id, sos_id=sos_id)

# seq2seq = Seq2seq(args, decoder1, decoder2, decoder3)
seq2seq = Seq2seq(args, decoder1, decoder2)
seq2seq.cuda()
seq2seq = torch.nn.DataParallel(seq2seq)
cudnn.benchmark = True

print('Initialize model parameter ...')
if args.init == 'uniform':
    print('uniform init !')
    for param in seq2seq.parameters():
        param.data.uniform_(-args.init_weight, args.init_weight)
elif args.init == 'mos':
    print('mos init !')
    for m in seq2seq.modules():
        if type(m) in [nn.GRU, nn.LSTM, nn.RNN]:
            for name, param in m.named_parameters():
                if 'weight_ih' in name:
                    # torch.nn.init.xavier_uniform_(param.data)
                    torch.nn.init.uniform_(param.data, -0.1, 0.1)
                elif 'weight_hh' in name:
                    # torch.nn.init.orthogonal_(param.data)
                    torch.nn.init.uniform_(param.data, -0.1, 0.1)
                elif 'bias' in name:
                    param.data.fill_(0)
else:
Ejemplo n.º 5
0
def offline_training(opt, traget_file_path):

    # Prepare dataset with torchtext
    src = SourceField(tokenize=treebank_tokenizer)
    tgt = TargetField(tokenize=treebank_tokenizer)

    def sample_filter(sample):
        """ sample example for future purpose"""
        return True

    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='tsv',
                                          fields=[('src', src), ('tgt', tgt)],
                                          filter_pred=sample_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='tsv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=sample_filter)
    test = torchtext.data.TabularDataset(path=opt.dev_path,
                                         format='tsv',
                                         fields=[('src', src), ('tgt', tgt)],
                                         filter_pred=sample_filter)
    src.build_vocab(train, max_size=opt.src_vocab_size)
    tgt.build_vocab(train, max_size=opt.tgt_vocab_size)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # NOTE: If the source field name and the target field name
    # are different from 'src' and 'tgt' respectively, they have
    # to be set explicitly before any training or inference
    # seq2seq.src_field_name = 'src'
    # seq2seq.tgt_field_name = 'tgt'

    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    if opt.loss == 'perplexity':
        loss = Perplexity(weight, pad)
    else:
        raise TypeError

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        encoder = EncoderRNN(vocab_size=len(src.vocab),
                             max_len=opt.max_length,
                             hidden_size=opt.hidden_size,
                             input_dropout_p=opt.intput_dropout_p,
                             dropout_p=opt.dropout_p,
                             n_layers=opt.n_layers,
                             bidirectional=opt.bidirectional,
                             rnn_cell=opt.rnn_cell,
                             variable_lengths=True,
                             embedding=input_vocab.vectors
                             if opt.use_pre_trained_embedding else None,
                             update_embedding=opt.update_embedding)
        decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                             max_len=opt.max_length,
                             hidden_size=opt.hidden_size *
                             2 if opt.bidirectional else opt.hidden_size,
                             sos_id=tgt.sos_id,
                             eos_id=tgt.eos_id,
                             n_layers=opt.n_layers,
                             rnn_cell=opt.rnn_cell,
                             bidirectional=opt.bidirectional,
                             input_dropout_p=opt.input_dropout_p,
                             dropout_p=opt.dropout_p,
                             use_attention=opt.use_attention)
        seq2seq = Seq2seq(encoder=encoder, decoder=decoder)
        if opt.gpu >= 0 and torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
    # train
    trainer = SupervisedTrainer(loss=loss,
                                batch_size=opt.batch_size,
                                checkpoint_every=opt.checkpoint_every,
                                print_every=opt.print_every,
                                expt_dir=opt.expt_dir)
    seq2seq = trainer.train(model=seq2seq,
                            data=train,
                            num_epochs=opt.epochs,
                            resume=opt.resume,
                            dev_data=dev,
                            optimizer=optimizer,
                            teacher_forcing_ratio=opt.teacher_forcing_rate)
                             variable_lengths=True,
                             vectors=vocab.vectors)
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len, (hidden_size *
                                       2) if bidirectional else hidden_size,
                             n_layers=num_layers,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            # param.data.uniform_(-0.08, 0.08)
            param.data.normal_(0.0, 0.1)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        optimizer = Optimizer(torch.optim.SGD(seq2seq.parameters(),
                                              lr=0.05,
                                              momentum=0.9),
                              max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

    # train