def evaluate(): coco_dataset = CocoCaptions( root="../data/train2014/train2014", annFile= train_ann_file ) capgen = CaptionGenerator(coco_dataset, k=k, early_stop=train_early_stop, load_knn=load_knn) # evaluate val_dataset = CocoCaptions( root="../data/val2014/val2014", annFile=valid_ann_file ) best_captions, cap_map = capgen.evaluate(val_dataset, early_stop=val_early_stop) # best_captions = list(dict('image_id':img_id, 'caption':'caption')) with open(res_file, 'w') as f: json.dump(best_captions, f) # evaluate best captions against gt coco_result = capgen.coco.coco.loadRes(res_file) cocoEval = COCOEvalCap(capgen.coco.coco, coco_result) cocoEval.params['image_id'] = coco_result.getImgIds() cocoEval.evaluate() indices = ["BLEU 1-gram", "BLEU 2-gram", "BLEU 3-gram", "BLEU 4-gram", "METEOR", "ROUGE_L", "CIDEr", "SPICE"] data = [cocoEval.eval['Bleu_1']] + [cocoEval.eval['Bleu_2']] + [cocoEval.eval['Bleu_3']] + [cocoEval.eval['Bleu_4']] + \ [cocoEval.eval['METEOR']] + [cocoEval.eval['ROUGE_L']] + [cocoEval.eval['CIDEr']] + [cocoEval.eval['SPICE']] results = pd.DataFrame(columns=[f"k={k}_Train_num={train_early_stop}_Val_num={val_early_stop}"], index=indices, data=data) results.to_excel(out_file) print(f"Results saved to {out_file}")
def calc_bleu(): image_dir = "/home/roberto/Documentos/TFM-UOC/pytorch-tutorial/tutorials/03-advanced/image_captioning/data/" json = image_dir + "annotations/captions_train2014.json" cap = CocoCaptions(root=image_dir + "train2014/", annFile=json) print('Number of samples: ', len(cap)) img, target = cap[0] # load 4th sample print(target)
def _get_extract_dataloader(data_path, image_shape=None, batch_size=1, num_workers=0): from torch.utils.data.dataloader import DataLoader transform = get_transform(image_shape) dataset = CocoCaptions(data_path, data_path / 'captions.json', transform) return DataLoader(dataset, batch_size, num_workers=num_workers)
def __init__(self, root, annFile, filepath, transform=None, freq_threadshold=5, train=True): self.data = CocoCaptions(root=root, annFile=annFile, transform=transform) self.vocab = Vocabulary(freq_threadshold) self.vocab.build_vocabulary(self.data, filepath) self.train = train
import matplotlib.pyplot as plt image_dir = "./data/" json_path = image_dir + "annotations/captions_train2014.json" ''' coco = COCO(json_path) img, target, description = coco[3] print(target) imgplot = plt.imshow(img) plt.show() ''' import torchvision.transforms as transforms cap = CocoCaptions(root=image_dir + "train2014/", annFile=json_path) print('Number of samples: ', len(cap)) img, target = cap[2] # load 4th sample print(target) imgplot = plt.imshow(img) plt.show() ''' print(target) import matplotlib.pyplot as plt import matplotlib.image as mpimg imgplot = plt.imshow(img)
def main(): if len(sys.argv) == 2: sample_random = True load_knn = sys.argv[1] elif len(sys.argv) == 3: sample_random = False load_knn = sys.argv[1] img_path = sys.argv[2] else: raise Exception( f"Got {len(sys.argv)-1} args, was expecting 1 or 2 (path_to_knn-model, [img_path])" ) ### CHANGE PARAMETERS HERE ### train_ann_file = "../annotations/annotations_trainval2014/annotations/captions_train2014.json" valid_ann_file = "../annotations/annotations_trainval2014/annotations/captions_val2014.json" coco_dataset = CocoCaptions(root="../data/train2014/train2014", annFile=train_ann_file) k = int(load_knn.split("knn_k=")[-1].split("_num")[0]) train_early_stop = int(load_knn.split("_num_")[-1]) res_file = f"./results/val2014_k={k}_num_{train_early_stop}_results" capgen = CaptionGenerator(coco_dataset, k=k, early_stop=train_early_stop, load_knn=load_knn) # get images from validation file if sample_random: val_dataset = CocoCaptions(root="../data/val2014/val2014", annFile=valid_ann_file) sample_imgs = [] sample_img_ids = [] for i in range(5): idx = random.choice(range(len(val_dataset))) img_id = val_dataset.ids[idx] sample_img_ids.append(img_id) img, caps = val_dataset[idx] sample_imgs.append(img) else: sample_imgs = [img_path] img_names = sample_img_ids if sample_random else img_path print(f"Getting caption prediction for images: {img_names}") best_captions = capgen.get_captions(sample_imgs) if sample_random: # evaluate and save results results, references, hypothesis = evaluate(img_names, best_captions, capgen) with open(res_file, 'w') as f: json.dump(results, f) print( "-------------------------------------RESULTS-------------------------------------" ) # evaluate results # coco_result = capgen.coco.coco.loadRes(res_file) # cocoEval = COCOEvalCap(capgen.coco.coco, coco_result) # cocoEval.params['image_id'] = coco_result.getImgIds() # output = cocoEval.evaluate() print( "--------------------------------------------------------------------------------" ) for idx, img_id in enumerate(sample_img_ids): real_caption = references[img_id][img_id][0] bleu, scores = Bleu(4).compute_score(references[img_id], hypothesis[img_id], verbose=0) # bleu = scores[2] # 3-gram print( f"ID: {img_id} \n Real caption (1 of 5): {real_caption} \n Sampled caption: {best_captions[idx]} \n BLEU: {bleu}" ) else: print(f"Sampled caption: {best_captions[0]}") out_path = "results/" + img_path.split("/")[-1] if img_path.startswith('http'): img = Image.open(requests.get(img_path, stream=True).raw) else: img = Image.open(img_path) plt.axis('off') plt.title(img_path) plt.imshow(img) plt.figtext(0.5, 0.01, best_captions[0], wrap=True, horizontalalignment='center', fontsize=12) plt.savefig(out_path) print(f"Output saved to {out_path}")
def _load_data(self, root, split, img_input_size): coco = CocoCaptions(root=f'{root}/{split}2014', annFile=f'{root}/annotations/captions_{split}2014.json', transform=get_image_transforms(img_input_size)) captions = self._extract_captions(coco) return coco, captions
NUM_IMAGES_SAVE = 4 # data print(IMAGE_PATH) compose = T.Compose([ T.Resize(IMAGE_SIZE), T.CenterCrop(IMAGE_SIZE), T.ToTensor(), ]) def collate_fn(batch): return tuple(zip(*batch)) ds = CocoCaptions(root=IMAGE_PATH, annFile=ANNO_PATH, transform=compose) dl = DataLoader(ds, BATCH_SIZE, shuffle=True, num_workers=8, collate_fn=collate_fn) vae_params = dict(image_size=IMAGE_SIZE, num_layers=NUM_LAYERS, num_tokens=NUM_TOKENS, codebook_dim=EMB_DIM, hidden_dim=HID_DIM, num_resnet_blocks=NUM_RESNET_BLOCKS) vae = DiscreteVAE(**vae_params, smooth_l1_loss=SMOOTH_L1_LOSS,
def __init__(self, root, annFile): CocoCaptions.__init__(self,root, annFile, transform= img_transform) self.pred = []