Beispiel #1
0
        decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                             max_len=max_len,
                             hidden_size=hidden_size *
                             2 if bidirectional else 1,
                             dropout_p=opt.dropout,
                             use_attention=True,
                             bidirectional=bidirectional,
                             n_layers=1,
                             rnn_cell='gru',
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
            print(param.data[0:3])
        _, _, norm_val = encoder.vectors_stats()
        encoder.init_vectors(src.vocab.vectors)
        # encoder.scale_vectors(0.08)
        encoder.normalize_vectors(norm_val)
        encoder.vectors_stats()
        for param in seq2seq.parameters():
            print(param.data[0:3])

        if torch.cuda.is_available():
            seq2seq.cuda()

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr=0.001),
                              max_grad_norm=5)
        # optimizer = Optimizer(torch.optim.SGD(seq2seq.parameters(), lr=0.01, momentum=0.9), max_grad_norm=5)
    if not opt.resume:
        # Initialize model
        # hidden_size=128
        hidden_size = 300
        bidirectional = True

        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                             bidirectional=bidirectional, variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else 1,
                             dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
            print(param.data)
        encoder.vectors_stats()
        # encoder.init_vectors(src.vocab.vectors)
        # for param in seq2seq.parameters():
        #     print(param.data)

        if torch.cuda.is_available():
            seq2seq.cuda()

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, step_size=10, gamma=0.5)
        optimizer.set_scheduler(scheduler)

    # train