Example #1
0
def example_test():
    encoder_test = EncoderRNN(10, 10, 2, max_length=3)
    decoder_test = AttnDecoderRNN('general', 10, 10, 2)
    print(encoder_test)
    print(decoder_test)

    encoder_hidden = encoder_test.init_hidden(batch_size=4)
    # word_input = Variable(torch.LongTensor([[1, 2, 3]]))
    word_input = Variable(torch.LongTensor(
        [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]))
    if USE_CUDA:
        encoder_test.cuda()
        word_input = word_input.cuda()
        encoder_hidden = encoder_hidden.cuda()
    encoder_outputs, encoder_hidden = encoder_test(
        word_input, encoder_hidden)  # S B H, L B H
    print(encoder_outputs.shape, encoder_hidden.shape)
    # word_inputs = Variable(torch.LongTensor([[1, 2, 3]]))
    word_inputs = Variable(torch.LongTensor(
        [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]))
    decoder_attns = torch.zeros(4, 3, 3)
    decoder_hidden = encoder_hidden
    decoder_context = Variable(torch.zeros(4, decoder_test.hidden_size))

    if USE_CUDA:
        decoder_test.cuda()
        word_inputs = word_inputs.cuda()
        decoder_context = decoder_context.cuda()

    for i in range(3):
        decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(
            word_inputs[:, i], decoder_context, decoder_hidden, encoder_outputs)
        print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size())
        decoder_attns[:, i, :] = decoder_attn.squeeze(1).cpu().data
Example #2
0
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')


hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1)

if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

######################################################################
#

evaluateRandomly(encoder1, attn_decoder1)

output_words, attentions = evaluate(encoder1, attn_decoder1,
                                    "je suis trop froid .")
plt.matshow(attentions.numpy())


def showAttention(input_sentence, output_words, attentions):
    # Set up figure with colorbar
Example #3
0
def eval_network(fn_in_model):
    # Input
    #  fn_in_model : filename of saved model
    #
    # Create filename for output
    fn_out_res = fn_in_model
    fn_out_res = fn_out_res.replace('.tar', '.txt')
    fn_out_res_test = fn_out_res.replace('/net_', '/res_test_')

    # Load and evaluate the network in filename 'fn_in_model'
    assert (os.path.isfile(fn_in_model))
    print('  Checkpoint found...')
    print('  Processing model: ' + fn_in_model)
    print('  Writing to file: ' + fn_out_res_test)
    checkpoint = torch.load(fn_in_model,
                            map_location='cpu')  # evaluate model on CPU
    input_lang = checkpoint['input_lang']
    output_lang = checkpoint['output_lang']
    emb_size = checkpoint['emb_size']
    nlayers = checkpoint['nlayers']
    dropout_p = checkpoint['dropout']
    input_size = input_lang.n_symbols
    output_size = output_lang.n_symbols
    samples_val = checkpoint['episodes_validation']
    disable_memory = checkpoint['disable_memory']
    max_length_eval = checkpoint['max_length_eval']
    if 'args' not in checkpoint or 'disable_attention' not in checkpoint[
            'args']:
        use_attention = True
    else:
        args = checkpoint['args']
        use_attention = not args.disable_attention
    if disable_memory:
        encoder = WrapperEncoderRNN(emb_size, input_size, output_size, nlayers,
                                    dropout_p)
    else:
        encoder = MetaNetRNN(emb_size, input_size, output_size, nlayers,
                             dropout_p)
    if use_attention:
        decoder = AttnDecoderRNN(emb_size, output_size, nlayers, dropout_p)
    else:
        decoder = DecoderRNN(emb_size, output_size, nlayers, dropout_p)
    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    encoder.load_state_dict(checkpoint['encoder_state_dict'])
    decoder.load_state_dict(checkpoint['decoder_state_dict'])

    with open(fn_out_res_test, 'w') as f_test:
        with redirect_stdout(f_test):
            if 'episode' in checkpoint:
                print(' Loading epoch ' + str(checkpoint['episode']) + ' of ' +
                      str(checkpoint['num_episodes']))
            describe_model(encoder)
            describe_model(decoder)
            if eval_type == 'val':
                print(
                    'Evaluating VALIDATION performance on pre-generated validation set'
                )
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    samples_val,
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
                print('Acc Retrieval (val): ' +
                      str(round(acc_val_retrieval, 1)))
                print('Acc Generalize (val): ' + str(round(acc_val_gen, 1)))
            elif eval_type == 'addprim_jump':
                print('Evaluating TEST performance on SCAN addprim_jump')
                print('  ...support set is just the isolated primitives')
                mybatch = scan_evaluation_prim_only('addprim_jump', 'test',
                                                    input_lang, output_lang)
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    [mybatch],
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
            elif eval_type == 'length':
                print('Evaluating TEST performance on SCAN length')
                print(
                    '  ...over multiple support sets as contributed by the pre-generated validation set'
                )
                samples_val = scan_evaluation_val_support(
                    'length', 'test', input_lang, output_lang, samples_val)
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    samples_val,
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
                print('Acc Retrieval (val): ' +
                      str(round(acc_val_retrieval, 1)))
                print('Acc Generalize (val): ' + str(round(acc_val_gen, 1)))
            elif eval_type == 'template_around_right':
                print('Evaluating TEST performance on the SCAN around right')
                print(' ...with just direction mappings as support set')
                mybatch = scan_evaluation_dir_only('template_around_right',
                                                   'test', input_lang,
                                                   output_lang)
                acc_val_gen, acc_val_retrieval = evaluation_battery(
                    [mybatch],
                    encoder,
                    decoder,
                    input_lang,
                    output_lang,
                    max_length_eval,
                    verbose=True)
            else:
                assert False
Example #4
0
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    else:
        encoder = EncoderRNN(embedding_size, emb)

    decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)
    # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0)

    criterion = nn.NLLLoss()

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        print("Epoch #%d" % (epo))
        # Get data

        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)
            summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Get the average loss on the sentences
            loss = sentenceloss(rt, re, rm, summary, encoder, decoder,
                                loss_optimizer, criterion, embedding_size,
                                encoder_style)
            total_loss += loss

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, avg loss = {:.4f}".format(
                    gettime(start), iteration, total_loss / get_loss))
                total_loss = 0
        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "{}_decoder_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return encoder, decoder
Example #5
0
decoder = AttnDecoderRNN(config.ATT_MODEL,
                         config.HIDDEN_SIZE,
                         chinese.n_words,
                         config.NUM_LAYER,
                         dropout_p=config.DROPOUT)
if config.RESTORE:
    encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth")
    decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth")

    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))

# Move models to GPU
if config.USE_CUDA:
    encoder.cuda()
    decoder.cuda()

# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.LR)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.LR)
criterion = LanguageModelCriterion()  #nn.NLLLoss(ignore_index=0)

# Keep track of time elapsed and running averages
start = time.time()
plot_losses = []
print_loss_total = 0
plot_loss_total = 0

for epoch in range(1, config.NUM_ITER + 1):

    # Get training data for this cycle
Example #6
0
    # Load pairs.pkl and lang.pkl
    with open(args.data_path + "/pairs.pkl", 'rb') as f:
        (train_pairs, test_pairs) = pkl.load(f)
    with open(args.data_path + "/lang.pkl", 'rb') as f:
        lang_tuple = pkl.load(f)
    lang = Lang(lang_tuple)

    # Prepare dataloader for training
    train_dataiter = DataIter(train_pairs, lang, args.vocab_size, args.batch_size, args.cuda)

    # Set encoder and decoder
    encoder = Encoder(args.vocab_size, args.hidden_size)
    decoder = AttnDecoderRNN(args.attn, args.hidden_size, args.vocab_size, args.n_layers, args.dropout, args.cuda)
    if args.cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    
    # Set optimizer and criterion
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    encoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=encoder_optimizer, 
        mode='min', 
        factor=0.1, 
        patience=5, 
        verbose=True,
        min_lr=0.00001)
    decoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=decoder_optimizer, 
        mode='min', 
        factor=0.1,
Example #7
0
class Train(object):
    """

    """

    #TODO : need to change, with no dataset in the train class, a apply method should be
    #TODO : in this class, which accept a dataset parameters and train the dataset .

    def __init__(self, config, dataset):
        self.config = config
        self.n_epochs = config.n_epochs
        self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config)
        self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words,
                                      config=config)
        self.encoder_optimizer = config.optimizier(self.encoder.parameters(),
                                                   lr=config.learning_rate)
        self.decoder_optimizer = config.optimizier(self.decoder.parameters(),
                                                   lr=config.learning_rate)
        self.criterion = nn.NLLLoss()
        self.is_plot = config.is_plot
        self.clip_value = config.clip_value
        self.losses = []
        if self.config.USE_CUDA:
            self.encoder.cuda(self.config.gpu_id)
        if self.config.USE_CUDA:
            self.decoder.cuda(device_id=self.config.gpu_id)

    def train(self, dataset):
        if self.is_plot:
            fig, ax = plt.subplots()
            grid(True)
            plt.ion()
        for epoch in range(self.n_epochs):
            training_pair = dataset.get_sample_var()
            loss, result_output = self.step(training_pair)
            print("At Epoch : {:5},Get loss : {:10}\n".format(epoch, loss))
            self.losses.append(loss)
            if self.is_plot:
                ax.plot(range(epoch + 1), self.losses, "b")
                plt.pause(0.0001)
                plt.show()
            if epoch % 100 == 0:
                print ''.join([
                    dataset.target.index2word[i]
                    for i in training_pair[1].squeeze(1).data.tolist()
                ])
                print ''.join(
                    [dataset.target.index2word[i] for i in result_output])

    def step(self, training_pair):
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()

        input_variable = training_pair[0]
        target_variable = training_pair[1]

        loss = 0
        input_length = input_variable.size()[0]
        target_length = target_variable.size()[0]

        encoder_hidden = self.encoder.init_hidden()
        encoder_outputs, encoder_hidden = self.encoder(input_variable,
                                                       encoder_hidden)
        decoder_input = Variable(torch.LongTensor([[self.config.SOS_token]]))
        decoder_context = Variable(torch.zeros(1, self.decoder.hidden_dim))
        decoder_hidden = encoder_hidden
        if self.config.USE_CUDA:
            decoder_input = decoder_input.cuda(device_id=self.config.gpu_id)
            decoder_context = decoder_context.cuda(
                device_id=self.config.gpu_id)
            assert type(decoder_input.data) == torch.cuda.LongTensor
            assert type(decoder_context.data) == torch.cuda.FloatTensor
        result_output = []
        for di in range(target_length):
            decoder_output, \
            decoder_context, \
            decoder_hidden, \
            decoder_attention = self.decoder(decoder_input,
                                             decoder_context,
                                             decoder_hidden,
                                             encoder_outputs)
            loss += self.criterion(decoder_output[0], target_variable[di])
            topv, topi = decoder_output.data.topk(1)
            ni = topi[0][0]
            decoder_input = Variable(torch.LongTensor([[ni]]))
            if self.config.USE_CUDA:
                decoder_input = decoder_input.cuda(
                    device_id=self.config.gpu_id)
            result_output.append(ni)
            if ni == self.config.EOS_token: break
        loss.backward()
        # TODO : clip value
        torch.nn.utils.clip_grad_norm(self.encoder.parameters(),
                                      self.clip_value)
        torch.nn.utils.clip_grad_norm(self.decoder.parameters(),
                                      self.clip_value)

        self.encoder_optimizer.step()
        self.decoder_optimizer.step()
        if self.config.USE_CUDA:
            return loss.cpu().data[0] / target_length, result_output
        return loss.data[0] / target_length, result_output
Example #8
0
def main(args):

    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224,
                              0.225))
    ])

    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers)

    encoder = EncoderCNN(args.embed_size)
    decoder = AttnDecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    total_step = len(data_loader)
    decoder_hidden = decoder.init_hidden()

    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            images = cuda_variable(images, volatile=True)
            captions = cuda_variable(captions)
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)

            outputs = decoder(captions, decoder_hidden, features, lengths)
            # outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            if i % args.log_step == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                      %(epoch, args.num_epochs, i, total_step,
                        loss.data[0], np.exp(loss.data[0])))

            if (i+1) % args.save_step == 0:
                torch.save(decoder.state_dict(),
                           os.path.join(args.model_path,
                                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
                torch.save(encoder.state_dict(),
                           os.path.join(args.model_path,
                                        'encoder-%d-%d.pkl' %(epoch+1, i+1)))
Example #9
0
# Word vector
word_vector = KeyedVectors.load_word2vec_format("word_vector/koran.vec",
                                                binary=True)

hidden_size = 64
max_len = 50
encoder = EncoderEmbeddingInputRNN(src_lang.n_words, hidden_size, word_vector)
attn_decoder = AttnDecoderRNN(hidden_size,
                              tgt_lang.n_words,
                              dropout_p=0.1,
                              max_length=max_len)

if use_cuda:
    encoder = encoder.cuda()
    attn_decoder = attn_decoder.cuda()

num_iter = 100000
trainer = Trainer(src_lang, tgt_lang, pairs)
trainer.train(encoder,
              attn_decoder,
              num_iter,
              print_every=num_iter // 100,
              max_len=max_len)
trainer.evaluateRandomly(encoder, attn_decoder, max_len=max_len)
# trainer.evaluateAll(encoder, attn_decoder)

encoder.saveState('model/encoder-embedding2-h64' + str(num_iter) + '.pt')
attn_decoder.saveState('model/decoder-embedding2-h64' + str(num_iter) + '.pt')

# Open testfile as test and build pairs from it
Example #10
0
    return print_losses


# Create data loader
dataloader = DataLoader(args.data_base_dir, args.label_path,
                        args.max_aspect_ratio, args.max_encoder_l_h,
                        args.max_encoder_l_w, args.max_decoder_l)

# Create the modules of the algorithm
cnn1 = CNN()
encoder1 = EncoderBRNN(args.batch_size, args.num_layers_encoder,
                       args.hidden_dim_encoder, use_cuda)
decoder1 = AttnDecoderRNN(args.hidden_dim_encoder // 2,
                          args.output_dim_decoder, args.num_layers_decoder,
                          args.max_length_decoder, dataloader.vocab_size)

if use_cuda:
    cnn1 = cnn1.cuda()
    encoder1 = encoder1.cuda()
    decoder1 = decoder1.cuda()

trainIters(args.batch_size,
           cnn1,
           encoder1,
           decoder1,
           dataloader,
           args.learning_rate,
           n_iters=75000,
           print_every=10,
           use_cuda=use_cuda)