Esempio n. 1
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train.dataset, max_size=opt.src_vocab)
    tgt.build_vocab(train.dataset, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab), opt.max_len, hidden_size, opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab), opt.max_len, decoder_hidden_size,
                         dropout_p=opt.dropout_p_decoder,
                         n_layers=opt.n_layers,
                         attention_method=opt.attention_method,
                         full_focus=opt.full_focus,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         eos_id=tgt.eos_id, sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab
Esempio n. 2
0
def initialize_model(parameters, src, tgt, train):
    # build vocabulary
    src.build_vocab(train.dataset, max_size=50000)
    tgt.build_vocab(train.dataset, max_size=50000)

    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = parameters['hidden_size']
    encoder = EncoderRNN(len(src.vocab),
                         parameters['max_len'],
                         hidden_size,
                         parameters['embedding_size'],
                         rnn_cell=parameters['rnn_cell'],
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab),
                         parameters['max_len'],
                         hidden_size,
                         attention_method=parameters['attention_method'],
                         full_focus=parameters['full_focus'],
                         rnn_cell=parameters['rnn_cell'],
                         eos_id=tgt.eos_id,
                         sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    return seq2seq, output_vocab
Esempio n. 3
0
def get_baseline_model(src, tgt, max_len=50, hidden_size=50, embedding_size=100):
    # Initialize model
    encoder = EncoderRNN(len(src.vocab),
                         max_len,
                         hidden_size,
                         embedding_size,
                         rnn_cell='gru')
    decoder = DecoderRNN(len(tgt.vocab),
                         max_len,
                         hidden_size,
                         rnn_cell='gru',
                         eos_id=tgt.eos_id,
                         sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)

    # # initialize weights
    # for param in seq2seq.parameters():
    #     param.data.uniform_(-0.08, 0.08)

    return seq2seq
Esempio n. 4
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train.dataset, max_size=opt.src_vocab)
    tgt.build_vocab(train.dataset, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab),
                         opt.max_len,
                         hidden_size,
                         opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab),
                         opt.max_len,
                         decoder_hidden_size,
                         dropout_p=opt.dropout_p_decoder,
                         n_layers=opt.n_layers,
                         attention_method=opt.attention_method,
                         full_focus=opt.full_focus,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         eos_id=tgt.eos_id,
                         sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)

    # This enables using all GPUs available
    if torch.cuda.device_count() > 1:
        logging.info("Using {} GPUs".format(torch.cuda.device_count()))
        seq2seq = torch.nn.DataParallel(seq2seq)

    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab
Esempio n. 5
0
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab),
                         max_len,
                         decoder_hidden_size,
                         dropout_p=opt.dropout_p_decoder,
                         n_layers=opt.n_layers,
                         use_attention=opt.attention,
                         attention_method=opt.attention_method,
                         full_focus=opt.full_focus,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         eos_id=tgt.eos_id,
                         sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

input_vocabulary = input_vocab.itos
output_vocabulary = output_vocab.itos

# random.seed(3)

# print "Input vocabulary:"
# for i, word in enumerate(input_vocabulary):
#     print i, word
#
# print "Output vocabulary:"
Esempio n. 6
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train.dataset, max_size=opt.src_vocab)
    tgt.build_vocab(train.dataset, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab),
                         opt.max_len,
                         hidden_size,
                         opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(
        len(tgt.vocab),
        opt.max_len,
        decoder_hidden_size,
        dropout_p=opt.dropout_p_decoder,
        n_layers=opt.n_layers,
        use_attention=opt.attention,
        attention_method=opt.attention_method,
        use_positional_attention=opt.positional_attention,
        bidirectional=opt.bidirectional,
        rnn_cell=opt.rnn_cell,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id,
        positioning_generator_size=opt.positioning_generator_size,
        attention_mixer=opt.attention_mixer)

    # initialize weights using uniform distribution
    def uniform_weights_init(m):
        if isinstance(m, nn.LSTM):
            for name, param in m.named_parameters():
                if 'bias' in name:
                    nn.init.constant_(param, 0.0)
                elif 'weight' in name:
                    nn.init.uniform_(param, -opt.param_init, opt.param_init)
        if isinstance(m, nn.Linear) or isinstance(m, nn.Embedding):
            nn.init.uniform_(m.weight, -opt.param_init, opt.param_init)

    if opt.param_init > 0.0:
        encoder.apply(uniform_weights_init)
        decoder.apply(uniform_weights_init)

    seq2seq = Seq2seq(encoder, decoder)

    if torch.cuda.device_count() > 1:
        logging.info("Using {} GPUs".format(torch.cuda.device_count()))
        seq2seq = nn.DataParallel(seq2seq)

    # xavier initialization if flag
    if opt.param_init_glorot:
        for p in seq2seq.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab