Esempio n. 1
0
def evaluation_loop():
  """Generate a set of captions for evaluation"""
  dataloader = torch.utils.data.DataLoader(
    coco_data_loader.CocoDataValid(),
    batch_size = BATCH_SIZE,
    num_workers = 16,
    shuffle = True,
  )

  model = caption_net.CaptionNet().cuda()
  model.load_state_dict(torch.load('caption_net.t7'))
  model.eval()

  valid_out = []
  for batch_ix, (image_ids, images) in enumerate(dataloader):
    print('Evaluation %d/%d' % (batch_ix, len(dataloader)))

    images = Variable(images).cuda()
    captions = model(images)

    for image_id, caption in zip(image_ids, captions):
      caption = ' '.join(caption)
      valid_out.append({
        'image_id': image_id,
        'caption': caption,
      })

  with open('valid.json', 'w') as json_out_file:
    json.dump(valid_out, json_out_file, indent = 2)
Esempio n. 2
0
def caption_single_image(imgfile):
  """Generate a caption for a new image"""
  model = caption_net.CaptionNet().cuda()
  model.load_state_dict(torch.load('caption_net.t7'))
  model.eval()

  img = Image.open(imgfile)
  transforms = torchvision.transforms.Compose([
    torchvision.transforms.Lambda(coco_data_loader.resize_and_pad),
    torchvision.transforms.ToTensor(),
  ])
  img = transforms(img).unsqueeze(0)
  img = Variable(img).cuda()
  out = model(img)

  print(out)
Esempio n. 3
0
def main():
    np.random.seed(RNG_SEED)
    torch.manual_seed(RNG_SEED)

    #test_vgg_on_image()
    model = caption_net.CaptionNet().cuda()

    img = Image.open(TEST_IMAGE)
    transforms = torchvision.transforms.Compose([
        torchvision.transforms.Lambda(resize_and_pad),
        torchvision.transforms.ToTensor(),
    ])
    img = transforms(img).unsqueeze(0)
    img = Variable(img).cuda()
    out = model(img)

    print(out)
Esempio n. 4
0
def training_loop():
  train_dataloader = torch.utils.data.DataLoader(
    coco_data_loader.CocoData(mode = 'train'),
    batch_size = BATCH_SIZE,
    num_workers = 16,
    shuffle = True,
  )

  # Initialize model
  model = caption_net.CaptionNet().cuda()
  model.train()
  optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
  best_validation_loss = 1e8

  for epoch in range(EPOCHS):
    for batch_ix, (images, sentences, wordvecs) in enumerate(train_dataloader):
      optimizer.zero_grad()
      images = Variable(images).cuda()
      batch_loss = model.forward_perplexity(images, sentences, wordvecs)

      # Update parameters
      batch_loss.backward()
      optimizer.step()
      print('Epoch %d, batch %d/%d, loss %0.9f' % (epoch, batch_ix, len(train_dataloader), batch_loss))

      if (batch_ix+1) % SAVE_MODEL_EVERY == 0:
        print('Saving...')
        torch.save(model.state_dict(), 'caption_net.t7')

    # Calculate validation loss at end of epoch
    validation_loss = get_validation_loss(model)
    print('Epoch %d, validation Loss %0.9f' % (epoch, validation_loss))

    # Save if validation loss improved, otherwise stop early
    if validation_loss < best_validation_loss:
      best_validation_loss = validation_loss
      print('Saving...')
      torch.save(model.state_dict(), 'caption_net.t7')
    else:
      break