Example #1
0
            "folder_path":
            dir_path,
            "folder_name":
            osp.basename(dir_path),
            "file_name":
            ['_'.join(osp.basename(p).split('_')[:-1]) for p in image_path],
            "image_paths":
            image_path
        })

for info in tqdm(data_infos):
    for i, image_path in enumerate(info['image_paths']):
        image = load_image(image_path, transform=transform)
        image_tensor = image.to(device)
        features = caption_cnn(image_tensor, subset='test')
        sampled_ids = caption_rnn.sample(features)
        sampled_ids = sampled_ids[0].cpu().numpy(
        )  # (1, max_seq_length) -> (max_seq_length)

        # Convert word_ids to words
        sampled_caption = []
        for word_id in sampled_ids:
            word = dataset.ixtoword[word_id]
            sampled_caption.append(word)
            if word == '<end>':
                break
        sentence = ' '.join(sampled_caption)
        print(sentence)
        text = sentence.replace('<start>', '').replace('<end>', '').strip()
        if not osp.exists(osp.join(OUTPUT_DIR, info['folder_name'])):
            os.mkdir(osp.join(OUTPUT_DIR, info['folder_name']))
Example #2
0
        cfg.CAP.num_layers
    ).cuda()
    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))
    encoder.eval()
    decoder.eval()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    for i, data in enumerate(dataloader):
        imgs, captions, cap_lens, class_ids, keys = prepare_data(data)
        #targets = pack_padded_sequence(captions.unsqueeze(0), cap_lens, batch_first=True)[0]
        with torch.no_grad():
            features = encoder(imgs[-1])
            sampled_ids = decoder.sample(features)
        sampled_ids = sampled_ids[0].cpu().numpy()
        sampled_captions = []
        for word_id in sampled_ids:
            word = dataset.ixtoword[word_id]
            sampled_captions.append(word)
            if word == '<end>':
                break
        sentence = ' '.join(sampled_captions)
        original_img = unnorm(imgs[-1].squeeze()).cpu().numpy().transpose((1, 2, 0))
        fig = plt.figure()
        plt.imshow(original_img)
        plt.title(sentence)
        #ax1 = fig.add_axes((0.1,0.4,0.8,0.5))
        #ax1.set_title(sentence)
        directory, filename = keys[0].split('/')