"folder_path": dir_path, "folder_name": osp.basename(dir_path), "file_name": ['_'.join(osp.basename(p).split('_')[:-1]) for p in image_path], "image_paths": image_path }) for info in tqdm(data_infos): for i, image_path in enumerate(info['image_paths']): image = load_image(image_path, transform=transform) image_tensor = image.to(device) features = caption_cnn(image_tensor, subset='test') sampled_ids = caption_rnn.sample(features) sampled_ids = sampled_ids[0].cpu().numpy( ) # (1, max_seq_length) -> (max_seq_length) # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = dataset.ixtoword[word_id] sampled_caption.append(word) if word == '<end>': break sentence = ' '.join(sampled_caption) print(sentence) text = sentence.replace('<start>', '').replace('<end>', '').strip() if not osp.exists(osp.join(OUTPUT_DIR, info['folder_name'])): os.mkdir(osp.join(OUTPUT_DIR, info['folder_name']))
cfg.CAP.num_layers ).cuda() encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) encoder.eval() decoder.eval() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) for i, data in enumerate(dataloader): imgs, captions, cap_lens, class_ids, keys = prepare_data(data) #targets = pack_padded_sequence(captions.unsqueeze(0), cap_lens, batch_first=True)[0] with torch.no_grad(): features = encoder(imgs[-1]) sampled_ids = decoder.sample(features) sampled_ids = sampled_ids[0].cpu().numpy() sampled_captions = [] for word_id in sampled_ids: word = dataset.ixtoword[word_id] sampled_captions.append(word) if word == '<end>': break sentence = ' '.join(sampled_captions) original_img = unnorm(imgs[-1].squeeze()).cpu().numpy().transpose((1, 2, 0)) fig = plt.figure() plt.imshow(original_img) plt.title(sentence) #ax1 = fig.add_axes((0.1,0.4,0.8,0.5)) #ax1.set_title(sentence) directory, filename = keys[0].split('/')