def main():
    vocabulary = pickle.load(open(f'{EMBEDDING_DIR}/vocab.pkl', 'rb'))
    print("Number of words in data set: %d" % len(vocabulary))
    embedding_matrix, vocab_to_index = map_vocab_to_embedding(vocabulary)

    hidden_size = 600
    encoder = EncoderRNN(embedding_matrix, hidden_size)
    decoder = DecoderRNN(embedding_matrix, hidden_size)
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()
    train_file = open(os.path.join(EMBEDDING_DIR, "train.pkl"), 'rb')
    train_data = pickle.load(train_file)
    train_file.close()
    n_iters = 2000
    train(train_data, vocab_to_index, vocabulary, encoder, decoder, n_iters)
Esempio n. 2
0
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout,
                attention, init_value):
    EXPERIMENT.param("Hidden", hidden_size)
    EXPERIMENT.param("Bidirectional", bidirectional)
    EXPERIMENT.param("Dropout", dropout)
    EXPERIMENT.param("Attention", attention)
    EXPERIMENT.param("Mini-batch", mini_batch_size)
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    encoder = EncoderRNN(len(src.vocab),
                         MAX_LEN,
                         hidden_size,
                         rnn_cell="lstm",
                         bidirectional=bidirectional,
                         dropout_p=dropout,
                         variable_lengths=False)
    decoder = DecoderRNN(
        len(tgt.vocab),
        MAX_LEN,
        hidden_size,  # * 2 if bidirectional else hidden_size,
        rnn_cell="lstm",
        use_attention=attention,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    using_cuda = False
    if torch.cuda.is_available():
        using_cuda = True
        encoder.cuda()
        decoder.cuda()
        seq2seq.cuda()
        loss.cuda()
    EXPERIMENT.param("CUDA", using_cuda)
    for param in seq2seq.parameters():
        param.data.uniform_(-init_value, init_value)

    trainer = SupervisedTrainer(loss=loss,
                                batch_size=mini_batch_size,
                                checkpoint_every=5000,
                                random_seed=42,
                                print_every=1000)
    return seq2seq, trainer