Ejemplo n.º 1
0
    print_loss_total += loss
    plot_loss_total += loss

    if epoch % config.PRINT_STEP == 0:
        print_loss_avg = print_loss_total / config.PRINT_STEP
        print_loss_total = 0
        print_summary = '%s (%d %d%%) %.4f' % (time_since(
            start, epoch / config.NUM_ITER), epoch, epoch / config.NUM_ITER *
                                               100, print_loss_avg)
        print(print_summary)

    if epoch % config.CHECKPOINT_STEP == 0:
        encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth")
        decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth")
        torch.save(encoder.state_dict(), encoder_path)
        torch.save(decoder.state_dict(), decoder_path)
"""
def evaluate(sentence, max_length=MAX_LENGTH):
    input_index, output_index = val_dataloader.indexes_from_sentence(sentence)
    input_variable = Variable(torch.LongTensor(input_index))
    output_variable = Variable(torch.LongTensor(output_index))
    input_variable = variable_from_sentence(chinese, sentence)
    input_length = input_variable.size()[0]

    # Run through encoder
    encoder_hidden = encoder.init_hidden()
    encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)

    # Create starting vectors for decoder
    decoder_input = Variable(torch.LongTensor([[SOS_token]]))  # SOS
    decoder_context = Variable(torch.zeros(1, decoder.hidden_size))
Ejemplo n.º 2
0
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    else:
        encoder = EncoderRNN(embedding_size, emb)

    decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)
    # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0)

    criterion = nn.NLLLoss()

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        print("Epoch #%d" % (epo))
        # Get data

        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)
            summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Get the average loss on the sentences
            loss = sentenceloss(rt, re, rm, summary, encoder, decoder,
                                loss_optimizer, criterion, embedding_size,
                                encoder_style)
            total_loss += loss

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, avg loss = {:.4f}".format(
                    gettime(start), iteration, total_loss / get_loss))
                total_loss = 0
        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "{}_decoder_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return encoder, decoder
Ejemplo n.º 3
0
                               1,
                               dropout_p=0.1)
if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()

logger.info('train start. ')
# 训练过程,指定迭代次数,此处为迭代75000次,每5000次打印中间信息
trainIters(input_lang,
           output_lang,
           pairs,
           encoder1,
           attn_decoder1,
           75000,
           print_every=5000)
logger.info('train end. ')

# 保存编码器和解码器网络状态
torch.save(encoder1.state_dict(),
           open('./data/%s_%s_encoder1.stat' % (input, output), 'wb'))
torch.save(attn_decoder1.state_dict(),
           open('./data/%s_%s_attn_decoder1.stat' % (input, output), 'wb'))
logger.info('stat saved.')

# 保存整个网络
torch.save(encoder1, open('./data/%s_%s_encoder1.model' % (input, output),
                          'wb'))
torch.save(attn_decoder1,
           open('./data/%s_%s_attn_decoder1.model' % (input, output), 'wb'))
logger.info('model saved.')
Ejemplo n.º 4
0
     max_length_eval)
 print(
     '{:s} ({:d} {:.0f}% finished) TrainLoss: {:.4f}, ValAccRetrieval: {:.1f}, ValAccGeneralize: {:.1f}'
     .format(
         timeSince(start,
                   float(episode) / float(num_episodes)),
         episode,
         float(episode) / float(num_episodes) * 100.,
         avg_train_loss / counter, acc_val_retrieval,
         acc_val_gen))
 avg_train_loss = 0.
 counter = 0
 if episode % 1000 == 0 or episode == num_episodes:
     state = {
         'encoder_state_dict': encoder.state_dict(),
         'decoder_state_dict': decoder.state_dict(),
         'input_lang': input_lang,
         'output_lang': output_lang,
         'episodes_validation': samples_val,
         'episode_type': episode_type,
         'emb_size': emb_size,
         'dropout': dropout_p,
         'nlayers': nlayers,
         'episode': episode,
         'disable_memory': disable_memory,
         'disable_recon_loss': disable_recon_loss,
         'use_attention': use_attention,
         'max_length_eval': max_length_eval,
         'num_episodes': num_episodes,
         'args': args
     }
Ejemplo n.º 5
0
def main(args):

    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224,
                              0.225))
    ])

    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers)

    encoder = EncoderCNN(args.embed_size)
    decoder = AttnDecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    total_step = len(data_loader)
    decoder_hidden = decoder.init_hidden()

    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            images = cuda_variable(images, volatile=True)
            captions = cuda_variable(captions)
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)

            outputs = decoder(captions, decoder_hidden, features, lengths)
            # outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            if i % args.log_step == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                      %(epoch, args.num_epochs, i, total_step,
                        loss.data[0], np.exp(loss.data[0])))

            if (i+1) % args.save_step == 0:
                torch.save(decoder.state_dict(),
                           os.path.join(args.model_path,
                                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
                torch.save(encoder.state_dict(),
                           os.path.join(args.model_path,
                                        'encoder-%d-%d.pkl' %(epoch+1, i+1)))
                                     )  # detach from history as input

                    loss += criterion(decoder_output, target_tensor[di])
                    if decoder_input.item() == dataset.end_of_string_token_idx:
                        break

            loss.backward()

            encoder_optimizer.step()
            decoder_optimizer.step()

            if i % 1000 == 0:
                torch.save(
                    {
                        "encoder": encoder.state_dict(),
                        "decoder": decoder.state_dict(),
                    },
                    f"{PATH}/{i}.pt",
                )

            loss_total += loss.item() / target_length
        progress.set_description(loss_total)

    torch.save(
        {
            "encoder": encoder.state_dict(),
            "decoder": decoder.state_dict(),
        },
        f"{PATH}/final.pt",
    )
Ejemplo n.º 7
0
        loss = train(
                     title, text, new_batch, words_padding_mask,target,
                     embedder, encoder, decoder ,embedder_optimizer,
                     encoder_optimizer, decoder_optimizer, criterion)

        print_loss_total += loss
        plot_loss_total += loss
        print_loss  += loss
    print(print_loss )
    print_loss = 0
    if epoch == 0: continue
    
    

    if epoch%print_every == 0:
        print_loss_avg = print_loss_total / print_every
        loss_list.append(print_loss_avg)
        print_summary = '%s (%d %d%%) %.4f' % (time_since(start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg)
        print(print_summary)

    if epoch % plot_every == 0:
        plot_loss_avg = plot_loss_total / plot_every
        plot_losses.append(plot_loss_avg)
        plot_loss_total = 0

    if epoch %5 == 0:
        torch.save({'embedder':embedder,'encoder': encoder, 'decoder': decoder}, str(epoch)+'model.pkl')
    if epoch %5 == 0:
        torch.save({'embedder':embedder.state_dict(),'encoder': encoder.state_dict(), 'decoder': decoder.state_dict()},'check/checkpoint.pkl')