コード例 #1
0
def example_test():
    encoder_test = EncoderRNN(10, 10, 2, max_length=3)
    decoder_test = AttnDecoderRNN('general', 10, 10, 2)
    print(encoder_test)
    print(decoder_test)

    encoder_hidden = encoder_test.init_hidden(batch_size=4)
    # word_input = Variable(torch.LongTensor([[1, 2, 3]]))
    word_input = Variable(torch.LongTensor(
        [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]))
    if USE_CUDA:
        encoder_test.cuda()
        word_input = word_input.cuda()
        encoder_hidden = encoder_hidden.cuda()
    encoder_outputs, encoder_hidden = encoder_test(
        word_input, encoder_hidden)  # S B H, L B H
    print(encoder_outputs.shape, encoder_hidden.shape)
    # word_inputs = Variable(torch.LongTensor([[1, 2, 3]]))
    word_inputs = Variable(torch.LongTensor(
        [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]))
    decoder_attns = torch.zeros(4, 3, 3)
    decoder_hidden = encoder_hidden
    decoder_context = Variable(torch.zeros(4, decoder_test.hidden_size))

    if USE_CUDA:
        decoder_test.cuda()
        word_inputs = word_inputs.cuda()
        decoder_context = decoder_context.cuda()

    for i in range(3):
        decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(
            word_inputs[:, i], decoder_context, decoder_hidden, encoder_outputs)
        print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size())
        decoder_attns[:, i, :] = decoder_attn.squeeze(1).cpu().data
コード例 #2
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_lang, output_lang, pairs = prepareData('eng',
                                                 'fra',
                                                 True,
                                                 dir='data',
                                                 filter=False)
    hidden_size = 512
    batch_size = 64
    iters = 50000
    # encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    encoder = EncoderRNN(input_lang.n_words, hidden_size)
    attn_decoder = AttnDecoderRNN(hidden_size,
                                  output_lang.n_words,
                                  dropout_p=0.1)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        encoder = nn.DataParallel(encoder)
        attn_decoder = nn.DataParallel(attn_decoder)
    encoder = encoder.to(device)
    attn_decoder = attn_decoder.to(device)

    # attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
    trainIters(device,
               pairs,
               input_lang,
               output_lang,
               encoder,
               attn_decoder,
               batch_size,
               iters,
               print_every=250)
コード例 #3
0
 def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional\
         , embedding_size, n_parameter, m_parameter, learning_rate, clip,\
             alpha, beta, pre_trained_file = None):
     self.batch_size = batch_size
     self.hidden_size = hidden_size
     self.embedding_size = embedding_size
     self.bidirectional = bidirectional
     self.n_parameter = n_parameter
     self.m_parameter = m_parameter
     self.learning_rate = learning_rate
     self.wm = wm
     self.clip = clip
     self.alpha = alpha
     self.beta = beta
     if pre_trained_file == None:
         self.encoder = EncoderRNN(self.wm, self.embedding_size,\
             hidden_size, bidirectional)
         self.decoder = AttnDecoderRNN(self.hidden_size, 10)
         self.enc_optimizer = optim.Adam(self.encoder.parameters(),\
             lr=self.learning_rate)
         self.dec_optimizer = optim.Adam(self.decoder.parameters(),\
             lr=self.learning_rate)
         self.start = 0
     else:
         self.resume_training = True
         self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\
             self.start = self.load_model_state(pre_trained_file)
     self.decoder = self.decoder.to(device)
     self.encoder = self.encoder.to(device)
コード例 #4
0
ファイル: main.py プロジェクト: XiWen0426/Capstone-Project
def train(args, data, bidaf):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    utte_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device)
    span_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device)
    decoder = AttnDecoderRNN(args, data.WORD.vocab.vectors).to(device)

    utte_encoder_optimizer = optim.SGD(encoder.parameters(),
                                       lr=args.learning_rate)
    span_encoder_optimizer = optim.SGD(encoder.parameters(),
                                       lr=args.learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.learning_rate)
    criterion = nn.NLLLoss()

    n_iters = 10 * len(data.train.examples)
    plot_loss_total = []
    print_every = 10000
    for iter in range(1, n_iters + 1):
        input_tensor = data.train.examples[i].q_word
        target_tensor = data.train.examples[i].ans
        span = ata.train.examples[i].span
        loss = train_each(input_tensor, target_tensor, utte_encoder,
                          span_encoder, decoder, utte_encoder_optimizer,
                          span_encoder_optimizer, decoder_optimizer, criterion)
        print_loss += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))
コード例 #5
0
def load_model():
    encoder = EncoderRNN(human_n_chars, hidden_size, n_layers)
    decoder = AttnDecoderRNN(attn_model,
                             hidden_size,
                             machine_n_chars,
                             n_layers,
                             dropout_p=dropout_p)
    encoder.load_state_dict(t.load('encoder.pth'))
    decoder.load_state_dict(t.load('decoder.pth'))
    return encoder, decoder
コード例 #6
0
ファイル: train.py プロジェクト: rchanda/Data2Doc
def model_initialization(encoder_style, decoder_style, langs, embedding_size,
                         learning_rate, use_model):
    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    # Choose encoder style
    # TODO: Set up a choice for hierarchical or not
    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    elif encoder_style == 'BiLSTMMax':
        encoder = EncoderBiLSTMMaxPooling(embedding_size, emb)
    elif encoder_style == 'HierarchicalBiLSTM':
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalBiLSTM(**encoder_args)
    elif encoder_style == 'HierarchicalLIN':
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalLIN(**encoder_args)
    else:
        # initialize hierarchical encoder rnn, (both global and local)
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalEncoderRNN(**encoder_args)

    # Choose decoder style and training function
    if decoder_style == 'HierarchicalRNN':
        decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words)
        train_func = Hierarchical_seq_train
    else:
        decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)
        train_func = Plain_seq_train

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)

    # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()),
    #                             lr=learning_rate)

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])
        if not use_cuda:
            loss_optimizer.load_state_dict(
                torch.load(use_model[2],
                           map_location=lambda storage, loc: storage))
        else:
            loss_optimizer.load_state_dict(torch.load(use_model[2]))

    return encoder, decoder, loss_optimizer, train_func
コード例 #7
0
 def load_model_state(self, model_file):
     print("Resuming training from a given model...")
     model = torch.load(model_file, map_location=lambda storage, loc: storage)
     epoch = model['epoch']
     encoder_state_dict = model['encoder_state_dict']
     encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
     decoder_state_dict = model['decoder_state_dict']
     decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
     loss = model['loss']
     encoder = EncoderRNN(self.wm, self.embedding_size,\
         self.hidden_size, self.bidirectional)
     decoder = AttnDecoderRNN("general", self.hidden_size, 10)
     enc_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate)
     dec_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate)
     
     return encoder, decoder, enc_optimizer, dec_optimizer, epoch
コード例 #8
0
 def __init__(self, config, dataset):
     self.config = config
     self.n_epochs = config.n_epochs
     self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config)
     self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words,
                                   config=config)
     self.encoder_optimizer = config.optimizier(self.encoder.parameters(),
                                                lr=config.learning_rate)
     self.decoder_optimizer = config.optimizier(self.decoder.parameters(),
                                                lr=config.learning_rate)
     self.criterion = nn.NLLLoss()
     self.is_plot = config.is_plot
     self.clip_value = config.clip_value
     self.losses = []
     if self.config.USE_CUDA:
         self.encoder.cuda(self.config.gpu_id)
     if self.config.USE_CUDA:
         self.decoder.cuda(device_id=self.config.gpu_id)
コード例 #9
0
ファイル: train.py プロジェクト: ecly/namas_pytorch
def main():
    input_file = sys.argv[1]
    vocab = build.build_vocabulary(input_file)
    pairs = [tensors_from_pair(vocab, x.split("\t")) for x in open(input_file)]
    pairs = [(x,y) for x, y in pairs if x.size(0) <= MAX_LENGTH]

    hidden_size = 256
    encoder1 = EncoderRNN(vocab.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size, vocab.n_words, dropout_p=0.1).to(device)
    train_iter(pairs, encoder1, attn_decoder1, 75000, print_every=100)
コード例 #10
0
def create_models(config, in_words, out_words):
    logging.info('Creating models...')
    encoder = EncoderRNN(in_words,
                         int(config['hidden_size']),
                         num_layers=int(config['num_layers'])).cuda()

    decoder = AttnDecoderRNN(int(config['hidden_size']),
                             out_words,
                             num_layers=int(config['num_layers']),
                             dropout_p=float(config['dropout_p'])).cuda()
    return encoder, decoder
コード例 #11
0
def main():
    input_lang, output_lang, pairs = prepare_data('ques',
                                                  'ans',
                                                  '../debug.json',
                                                  reverse=False)
    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder = AttnDecoderRNN(hidden_size,
                                  output_lang.n_words,
                                  dropout_p=0.1,
                                  max_length=1000).to(device)

    rate = 0.9
    pairs_train, pairs_test = pairs[0:int(len(pairs) *
                                          rate)], pairs[int(len(pairs) *
                                                            rate):]
    encoder.load_state_dict(torch.load('model/encoder-0.model'))
    encoder.eval()
    attn_decoder.load_state_dict(torch.load('model/decoder-0.model'))
    attn_decoder.eval()
    evaluate_all(encoder,
                 attn_decoder,
                 pairs_test,
                 max_length=1000,
                 input_lang=input_lang,
                 output_lang=output_lang,
                 n=len(pairs_test))
    # show_plot(loss_history)
    print('done test')
コード例 #12
0
def loadmodel(model_file, wm, hidden_size, bidirectional):
    """
    Loads the trained model, returns the encoder and decoder for inferencing.
    We initialize 'empty models' in which we will load our parameters.
    It is important that the hyperparameters are the same as used for training.

    Keyword arguments:
    model_file - string with the model location
    wm - embedding matrix
    hidden_size - hidden size
    bidirectional - whether we use bidirectional GRU layers
    """
    model = torch.load(model_file, map_location=lambda storage, loc: storage)
    epoch = model['epoch']
    encoder_state_dict = model['encoder_state_dict']
    encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
    decoder_state_dict = model['decoder_state_dict']
    decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
    loss = model['loss']
    encoder = EncoderRNN(wm, 300, hidden_size, bidirectional)
    decoder = AttnDecoderRNN(hidden_size, 10)
    enc_optimizer = optim.Adam(encoder.parameters(), lr=0.0001)
    dec_optimizer = optim.Adam(decoder.parameters(), lr=0.0001)
    return encoder, decoder
コード例 #13
0
ファイル: main.py プロジェクト: komeme/basic_nmt
def main():
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    print(random.choice(pairs))

    device = torch.device(args.device)
    print('device : {}'.format(device))

    encoder = EncoderRNN(input_lang.n_words, args.hidden_size).to(device)
    decoder = AttnDecoderRNN(args.hidden_size,
                             output_lang.n_words,
                             dropout_p=0.1).to(device)

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.lr)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.lr)

    model = Translator(input_lang, output_lang, encoder, decoder,
                       encoder_optimizer, decoder_optimizer)

    trainIters(model, pairs, n_iters=10000, print_every=100, plot_every=100)

    evaluateRandomly(model, pairs)

    output_words, attentions = evaluate(model, "je suis trop froid .")
    plt.matshow(attentions.numpy())
コード例 #14
0
ファイル: eval.py プロジェクト: jianyiyang5/mt
def main():
    nIters = 100000
    loadFilename = os.path.join('checkpoints',
                                '{}_{}.tar'.format(nIters, 'checkpoint'))
    checkpoint = torch.load(loadFilename)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    # If loading a model trained on GPU to CPU
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    hidden_size = 256
    encoder = EncoderRNN(input_lang.n_words, hidden_size, device).to(device)
    decoder = AttnDecoderRNN(hidden_size,
                             output_lang.n_words,
                             device,
                             dropout_p=0.1).to(device)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    input_lang.__dict__ = checkpoint['input_lang']
    output_lang.__dict__ = checkpoint['output_lang']
    evaluateRandomly(device, pairs, encoder, decoder, input_lang, output_lang)
コード例 #15
0
def inference(sentence, language, MODEL_DIR, codersum):
    encoder = EncoderRNN(language.n_words,
                         config.HIDDEN_SIZE,
                         config.NUM_LAYER,
                         max_length=config.MAX_LENGTH + 1)
    decoder = AttnDecoderRNN(config.ATT_MODEL,
                             config.HIDDEN_SIZE,
                             language.n_words,
                             config.NUM_LAYER,
                             dropout_p=config.DROPOUT)

    encoder_path = os.path.join(MODEL_DIR, "encoder_" + str(codersum) + ".pth")
    decoder_path = os.path.join(MODEL_DIR, "decoder_" + str(codersum) + ".pth")
    encoder.load_state_dict(torch.load(encoder_path, map_location="cpu"))
    decoder.load_state_dict(torch.load(decoder_path, map_location="cpu"))
    encoder.eval()
    decoder.eval()
    batch_size = 1

    input_index = indexes_from_sentence(language, sentence)
    input_index = pad_sentence(input_index)  # 填充
    input_variable = torch.LongTensor([input_index])
    encoder_hidden, encoder_cell = encoder.init_hidden(batch_size)
    encoder_outputs, encoder_hidden, encoder_cell = encoder(
        input_variable, encoder_hidden, encoder_cell)

    decoder_input = torch.zeros(batch_size, 1).long()
    decoder_context = torch.zeros(batch_size, decoder.hidden_size)
    decoder_hidden = encoder_hidden
    decoder_cell = encoder_cell
    if config.USE_CUDA:
        decoder_input = decoder_input.cuda()
        decoder_context = decoder_context.cuda()

    decoded_words = []

    # Run through decoder
    for di in range(config.MAX_LENGTH):
        decoder_output, decoder_context, decoder_hidden, decoder_cell, _ = decoder(
            decoder_input, decoder_context, decoder_hidden, decoder_cell,
            encoder_outputs)

        # Choose top word from output
        topv, topi = decoder_output.data.topk(1)
        ni = topi[0][0]
        if ni == 0:
            break
        else:
            decoded_words.append(language.index2word[ni.item()])

        decoder_input = torch.LongTensor([[ni]])
        if config.USE_CUDA:
            decoder_input = decoder_input.cuda()

    return "".join(decoded_words)
コード例 #16
0
ファイル: train.py プロジェクト: jianyiyang5/mt
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    hidden_size = 256
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size, device).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size,
                                   output_lang.n_words,
                                   device,
                                   dropout_p=0.1).to(device)
    trainIters(device,
               pairs,
               input_lang,
               output_lang,
               encoder1,
               attn_decoder1,
               100000,
               print_every=5000)
コード例 #17
0
ファイル: train.py プロジェクト: huaiwen/seq2seq
def main():
    input_lang, output_lang, pairs = prepare_data('eng', 'fra', True)
    print(random.choice(pairs))

    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder = AttnDecoderRNN(hidden_size,
                                  output_lang.n_words,
                                  dropout_p=0.1).to(device)

    train_iters(input_lang,
                output_lang,
                pairs,
                encoder,
                attn_decoder,
                75000,
                print_every=5000)

    evaluate_randomly(encoder, attn_decoder, input_lang, output_lang, pairs)

    output_words, attentions = evaluate(encoder, attn_decoder, input_lang,
                                        output_lang, "je suis trop froid .")
    mat_plot(attentions.numpy())
コード例 #18
0
 def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional, 
                 embedding_size, n_parameter, m_parameter, learning_rate, clip, 
                 alpha, beta, pre_trained_file = None, decoder_type="original", teacher_forcing_ratio=0.7):
     self.batch_size = batch_size
     self.hidden_size = hidden_size
     self.embedding_size = embedding_size
     self.bidirectional = bidirectional
     self.n_parameter = n_parameter
     self.m_parameter = m_parameter
     self.learning_rate = learning_rate
     self.wm = wm
     self.clip = clip
     self.alpha = alpha
     self.beta = beta
     self.loss_list = []
     self.teacher_forcing_ratio = teacher_forcing_ratio
     self.decoder_type = decoder_type
     
     if pre_trained_file == None:
         # define encoder and decoder
         self.encoder = EncoderRNN(self.wm, self.embedding_size, hidden_size, bidirectional, n_layers=1)
         
         # select decoder type
         if self.decoder_type == "original":
             self.decoder = AttnDecoderRNN("general", self.hidden_size, 10)
         elif self.decoder_type == "bahdanau":
             self.decoder = BahdanauAttnDecoderRNN(self.embedding_size, hidden_size, 10, discrete_representation=True)
         
         # define optimizer of encoder and decoder
         self.enc_optimizer = optim.Adam(self.encoder.parameters(), lr=self.learning_rate)
         self.dec_optimizer = optim.Adam(self.decoder.parameters(), lr=self.learning_rate)
         self.start = 1
     else:
         self.resume_training = True
         self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\
             self.start = self.load_model_state(pre_trained_file)      
     self.decoder = self.decoder.to(device)
     self.encoder = self.encoder.to(device)
コード例 #19
0
ファイル: evaluate.py プロジェクト: rchanda/Data2Doc
def generate_text(model, data_file, output):
    encoder_src = model['encoder_path']
    decoder_src = model['decoder_path']
    encoder_style = None

    # Choose model architecture
    if 'RNN' in encoder_src:
        encoder = EncoderRNN(embedding_size, emb)
        encoder_style = 'RNN'
    elif 'LSTM' in encoder_src:
        encoder = EncoderBiLSTM(embedding_size, emb)
        encoder_style = 'LSTM'
    else:
        encoder = EncoderLIN(embedding_size, emb)
        encoder_style = 'LIN'

    decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)
    encoder = load_model(encoder, encoder_src)
    decoder = load_model(decoder, decoder_src)
    data_path = os.path.join(data_file['data_dir'], data_file['data_name'] + '.json')
    with open(data_path) as f:
        valuation_data = json.load(f)
    assert valuation_data is not None

    valid_data, _ = loaddata(data_file['data_dir'], data_file['data_name'])
    data_length = len(valid_data)
    valid_data = data2index(valid_data, train_lang)
    text_generator = evaluate(encoder, decoder, valid_data,
                              train_lang['summary'], embedding_size,
                              encoder_style=encoder_style, iter_time=data_length,
                              beam_size=1, verbose=False)
    print('The text generation begin\n', flush=True)
    with open(output, 'w') as f:
        for idx, line in enumerate(text_generator):
            print('Summery generated, No{}'.format(idx + 1))
            f.write(line + '\n')
コード例 #20
0
ファイル: main.py プロジェクト: subong0508/ESC-20FALL
def run():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device :", device, "\n")
    # Preprocess data
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    print("Finished Preprocessing\n")
    # Seq2Seq Model
    hidden_size = 256
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1)
    metadata = (input_lang, output_lang, pairs)
    trainIters(encoder1, attn_decoder1, metadata, n_iters=500, print_every=100)  # 원래는 n_iters=75000, print_every=5000

    # Check
    evaluateRandomly(encoder1, attn_decoder1, metadata)

    # Evaluate and Visualize
    output_words, attentions = evaluate(encoder1, attn_decoder1, metadata, "je suis trop froid .")
    plt.matshow(attentions.numpy())

    evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "elle a cinq ans de moins que moi .")
    evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "elle est trop petit .")
    evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "je ne crains pas de mourir .")
    evaluateAndShowAttention(encoder1, attn_decoder1, metadata, "c est un jeune directeur plein de talent .")
コード例 #21
0
def load_model_param(language, model_dir):
    encoder = EncoderRNN(language.n_words,
                         config.HIDDEN_SIZE,
                         config.NUM_LAYER,
                         max_length=17 + 1)
    decoder = AttnDecoderRNN(config.ATT_MODEL,
                             config.HIDDEN_SIZE,
                             language.n_words,
                             config.NUM_LAYER,
                             dropout_p=config.DROPOUT)

    encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth")
    decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth")

    encoder.load_state_dict(torch.load(encoder_path, map_location="cpu"))
    decoder.load_state_dict(torch.load(decoder_path, map_location="cpu"))
    encoder.eval()
    decoder.eval()
    return encoder, decoder
コード例 #22
0
def main():
    nIters = 50000
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loadFilename = os.path.join('checkpoints',
                                '{}_{}.tar'.format(nIters, 'checkpoint'))
    checkpoint = torch.load(loadFilename, map_location=device)

    # input_lang, output_lang, pairs = prepareData('eng', 'fra', True, 'data', filter=False)
    # If loading a model trained on GPU to CPU
    encoder_sd = checkpoint['en']
    encoder_sd
    decoder_sd = checkpoint['de']
    decoder_sd
    hidden_size = 512
    input_lang = Lang('fra')
    output_lang = Lang('eng')
    input_lang.__dict__ = checkpoint['input_lang']
    output_lang.__dict__ = checkpoint['output_lang']
    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size, output_lang.n_words,
                             dropout_p=0).to(device)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    encoder.eval()
    decoder.eval()
    # encoder_optimizer_sd = checkpoint['en_opt']
    # decoder_optimizer_sd = checkpoint['de_opt']
    _, _, test_pairs = prepareData('eng',
                                   'fra',
                                   True,
                                   dir='test',
                                   filter=False)
    evaluateRandomly(device, test_pairs, encoder, decoder, input_lang,
                     output_lang)
    decode_batch(device,
                 test_pairs,
                 encoder,
                 decoder,
                 input_lang,
                 output_lang,
                 batch_size=64)
コード例 #23
0
def main(args):
    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    if args.checkpoint is None:
        decoder = AttnDecoderRNN(attention_dim=args.attention_dim,
                                 embed_dim=args.embed_dim,
                                 decoder_dim=args.decoder_dim,
                                 vocab_size=len(vocab),
                                 dropout=args.dropout)
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=args.decoder_lr)
        encoder = EncoderCNN()
        encoder.fine_tune(args.fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=args.encoder_lr) if args.fine_tune_encoder else None
    else:
        checkpoint = torch.load(args.checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=args.encoder_lr)
    decoder = decoder.to(device)
    encoder = encoder.to(device)

    criterion = nn.CrossEntropyLoss().to(device)

    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Build data loader
    train_loader = get_loader(args.image_dir,
                              args.caption_path,
                              vocab,
                              transform,
                              args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)

    val_loader = get_loader(args.image_dir_val,
                            args.caption_path_val,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    for epoch in range(args.start_epoch, args.epochs):
        if args.epochs_since_improvement == 20:
            break
        if args.epochs_since_improvement > 0 and args.epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if args.fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        recent_bleu4 = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)

        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            args.epochs_since_improvement += 1
            print("\nEpoch since last improvement: %d\n" %
                  (args.epochs_since_improvement, ))
        else:
            args.epochs_since_improvement = 0

        save_checkpoint(args.data_name, epoch, args.epochs_since_improvement,
                        encoder, decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
コード例 #24
0
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    else:
        encoder = EncoderRNN(embedding_size, emb)

    decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)
    # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0)

    criterion = nn.NLLLoss()

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        print("Epoch #%d" % (epo))
        # Get data

        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)
            summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Get the average loss on the sentences
            loss = sentenceloss(rt, re, rm, summary, encoder, decoder,
                                loss_optimizer, criterion, embedding_size,
                                encoder_style)
            total_loss += loss

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, avg loss = {:.4f}".format(
                    gettime(start), iteration, total_loss / get_loss))
                total_loss = 0
        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "{}_decoder_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return encoder, decoder
コード例 #25
0
                               train_pairs,
                               batch_size=config.BATCH_SIZE,
                               max_length=17)
val_dataloader = PairsLoader(chinese,
                             val_pairs,
                             batch_size=config.BATCH_SIZE,
                             max_length=17)

# Initialize models
encoder = EncoderRNN(chinese.n_words,
                     config.HIDDEN_SIZE,
                     config.NUM_LAYER,
                     max_length=config.MAX_LENGTH + 1)
decoder = AttnDecoderRNN(config.ATT_MODEL,
                         config.HIDDEN_SIZE,
                         chinese.n_words,
                         config.NUM_LAYER,
                         dropout_p=config.DROPOUT)
if config.RESTORE:
    encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth")
    decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth")

    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))

# Move models to GPU
if config.USE_CUDA:
    encoder.cuda()
    decoder.cuda()

# Initialize optimizers and criterion
コード例 #26
0
encoder_src = './models/long4_encoder_2120'
decoder_src = './models/long4_decoder_2120'

encoder_style = None

if 'RNN' == ENCODER_STYLE:
    encoder = EncoderRNN(embedding_size, emb)
    encoder_style = 'RNN'
elif 'LSTM' == ENCODER_STYLE:
    encoder = EncoderBiLSTM(embedding_size, emb)
    encoder_style = 'BiLSTM'
else:
    encoder = EncoderLIN(embedding_size, emb)
    encoder_style = 'LIN'

decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)

encoder = load_model(encoder, encoder_src)
decoder = load_model(decoder, decoder_src)

valid_data, _ = loaddata(file_loc, 'valid')
data_length = len(valid_data)
valid_data = data2index(valid_data, train_lang)
text_generator = evaluate(encoder,
                          decoder,
                          valid_data,
                          train_lang['summary'],
                          embedding_size,
                          encoder_style=encoder_style,
                          iter_time=2,
                          beam_size=1,
コード例 #27
0
def eval_network(fn_in_model):
    # Input
    #  fn_in_model : filename of saved model
    #
    # Create filename for output
    fn_out_res = fn_in_model
    fn_out_res = fn_out_res.replace('.tar', '.txt')
    fn_out_res_test = fn_out_res.replace('/net_', '/res_test_')

    # Load and evaluate the network in filename 'fn_in_model'
    assert (os.path.isfile(fn_in_model))
    print('  Checkpoint found...')
    print('  Processing model: ' + fn_in_model)
    print('  Writing to file: ' + fn_out_res_test)
    checkpoint = torch.load(fn_in_model,
                            map_location='cpu')  # evaluate model on CPU
    input_lang = checkpoint['input_lang']
    output_lang = checkpoint['output_lang']
    emb_size = checkpoint['emb_size']
    nlayers = checkpoint['nlayers']
    dropout_p = checkpoint['dropout']
    input_size = input_lang.n_symbols
    output_size = output_lang.n_symbols
    samples_val = checkpoint['episodes_validation']
    disable_memory = checkpoint['disable_memory']
    max_length_eval = checkpoint['max_length_eval']
    if 'args' not in checkpoint or 'disable_attention' not in checkpoint[
            'args']:
        use_attention = True
    else:
        args = checkpoint['args']
        use_attention = not args.disable_attention
    if disable_memory:
        encoder = WrapperEncoderRNN(emb_size, input_size, output_size, nlayers,
                                    dropout_p)
    else:
        encoder = MetaNetRNN(emb_size, input_size, output_size, nlayers,
                             dropout_p)
    if use_attention:
        decoder = AttnDecoderRNN(emb_size, output_size, nlayers, dropout_p)
    else:
        decoder = DecoderRNN(emb_size, output_size, nlayers, dropout_p)
    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    encoder.load_state_dict(checkpoint['encoder_state_dict'])
    decoder.load_state_dict(checkpoint['decoder_state_dict'])

    with open(fn_out_res_test, 'w') as f_test:
        with redirect_stdout(f_test):
            if 'episode' in checkpoint:
                print(' Loading epoch ' + str(checkpoint['episode']) + ' of ' +
                      str(checkpoint['num_episodes']))
            describe_model(encoder)
            describe_model(decoder)
            if eval_type == 'val':
                print(
                    'Evaluating VALIDATION performance on pre-generated validation set'
                )
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    samples_val,
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
                print('Acc Retrieval (val): ' +
                      str(round(acc_val_retrieval, 1)))
                print('Acc Generalize (val): ' + str(round(acc_val_gen, 1)))
            elif eval_type == 'addprim_jump':
                print('Evaluating TEST performance on SCAN addprim_jump')
                print('  ...support set is just the isolated primitives')
                mybatch = scan_evaluation_prim_only('addprim_jump', 'test',
                                                    input_lang, output_lang)
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    [mybatch],
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
            elif eval_type == 'length':
                print('Evaluating TEST performance on SCAN length')
                print(
                    '  ...over multiple support sets as contributed by the pre-generated validation set'
                )
                samples_val = scan_evaluation_val_support(
                    'length', 'test', input_lang, output_lang, samples_val)
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    samples_val,
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
                print('Acc Retrieval (val): ' +
                      str(round(acc_val_retrieval, 1)))
                print('Acc Generalize (val): ' + str(round(acc_val_gen, 1)))
            elif eval_type == 'template_around_right':
                print('Evaluating TEST performance on the SCAN around right')
                print(' ...with just direction mappings as support set')
                mybatch = scan_evaluation_dir_only('template_around_right',
                                                   'test', input_lang,
                                                   output_lang)
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    [mybatch],
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
            else:
                assert False
コード例 #28
0
    print("use cuda: {}".format(args.cuda))
    print("seed: {}".format(args.seed))

    # Load pairs.pkl and lang.pkl
    with open(args.data_path + "/pairs.pkl", 'rb') as f:
        (train_pairs, test_pairs) = pkl.load(f)
    with open(args.data_path + "/lang.pkl", 'rb') as f:
        lang_tuple = pkl.load(f)
    lang = Lang(lang_tuple)

    # Prepare dataloader for training
    train_dataiter = DataIter(train_pairs, lang, args.vocab_size, args.batch_size, args.cuda)

    # Set encoder and decoder
    encoder = Encoder(args.vocab_size, args.hidden_size)
    decoder = AttnDecoderRNN(args.attn, args.hidden_size, args.vocab_size, args.n_layers, args.dropout, args.cuda)
    if args.cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    
    # Set optimizer and criterion
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    encoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=encoder_optimizer, 
        mode='min', 
        factor=0.1, 
        patience=5, 
        verbose=True,
        min_lr=0.00001)
    decoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
コード例 #29
0

def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')


hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1)

if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

######################################################################
#

evaluateRandomly(encoder1, attn_decoder1)

output_words, attentions = evaluate(encoder1, attn_decoder1,
                                    "je suis trop froid .")
plt.matshow(attentions.numpy())
コード例 #30
0
ファイル: run.py プロジェクト: RuYunW/ADG-Seq2Seq
    for i in range(len(target_batch)):
        while 0 in target_batch[i]:
            target_batch[i].pop(-1)
        target_batch[i] = [1] + target_batch[i] + [2]
        target_batch[i] = torch.from_numpy(
            np.array(target_batch[i], dtype=np.int64)).to(device).view(-1, 1)

hidden_size = 256
MAX_LENGTH = max(max_source_len, max_target_len)

encoder = EncoderRNN(len(describe_dic_i2w), hidden_size).to(device)
embedder = EmbedderRNN(len(code_dic_i2w), len(code_dic_i2w),
                       dropout=0.1).to(device)
attn_decoder = AttnDecoderRNN(hidden_size,
                              len(code_dic_i2w),
                              dropout_p=0.1,
                              max_length=max(max_source_len,
                                             max_target_len)).to(device)

node_onehot_t = [[]]  # h
node_onehot_t[0] = node_list_onehot_dict
for i in range(K):
    node_onehot_t.append(node_list_onehot_dict)

# print(len(node_onehot_t[0][method_list[0]]))
# exit()

# encoder_outputs, encoder_hidden = encoder(source_batch[0][0], encoder.initHidden())
# decoder_input = torch.tensor(node_onehot_t[0][method_list[0]], dtype=torch.int64).to(device)
# t = torch.tensor(node_onehot_t[0][method_list[0]], dtype=torch.float32).view(1, 1, -1).to(device)
# print(t)