def convert_box_labeling( label_file: Path = typer.Argument(..., exists=True, dir_okay=False), image_dir: Path = typer.Argument(..., exists=True, file_okay=False), cats=typer.Option(None, '-c'), output: Path = typer.Option('/dev/stdout', '-o', help='default output to stdout')): """ convert human labeling result into coco format """ if cats is None: with open(label_file) as f: cats = { item['type'] for v in json.load(f).values() for item in v['data'] } if cats == {'body', 'head', 'realface'}: cats = ['body', 'head', 'realface'] elif cats == {'head', 'realface'}: cats = ['head', 'realface'] elif len(cats) == 1: cats = list(cats) else: raise Exception( f'Cannot auto predict cats from {cats}, using argument like "-c body,head,face"' ) else: cats = cats.split(',') COCO.from_label_file(labeling_file_name=label_file, image_dir=image_dir, categories_list=cats).to_json(out_file=output, indent=consts.INDENT)
def print_evaluation_scores_old(json_file): import pycocotools.mask as COCOeval from coco import COCO ret = {} assert cfg.DATA.BASEDIR and os.path.isdir(cfg.DATA.BASEDIR) annofile = os.path.join(cfg.DATA.BASEDIR, 'annotations', 'instances_{}.json'.format(cfg.DATA.VAL)) coco = COCO(annofile) cocoDt = coco.loadRes(json_file) cocoEval = COCOeval(coco, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() fields = [ 'IoU=0.5:0.95', 'IoU=0.5', 'IoU=0.75', 'small', 'medium', 'large' ] for k in range(6): ret['mAP(bbox)/' + fields[k]] = cocoEval.stats[k] if cfg.MODE_MASK: cocoEval = COCOeval(coco, cocoDt, 'segm') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() for k in range(6): ret['mAP(segm)/' + fields[k]] = cocoEval.stats[k] return ret
def build_vocabulary(config): """ Build the vocabulary from the training data and save it to a file. """ coco = COCO(config.train_caption_file) coco.filter_by_cap_len(config.max_caption_length) vocabulary = Vocabulary(config.vocabulary_size) vocabulary.build(coco.all_captions()) # vocabulary.save(config.vocabulary_file) return vocabulary
def __init__(self, annotation_file=None): """ Constructor of KITTI helper class for reading and visualizing annotations. :param annotation_file (str): location of annotation file :param image_folder (str): location to the folder that hosts images. :return: """ # load dataset COCO.__init__(self, annotation_file)
def cityperson_eval(src_pth, annFile, CUT_WH=None, ignore_uncertain=False, use_iod_for_ignore=False, catIds=[], use_citypersons_standard=True, tiny_scale=1.0, iou_ths=None, setup_labels=None): if os.path.isdir(src_pth): resFile = src_pth + '/' + 'bbox.json' else: resFile = src_pth Params.CITYPERSON_STANDARD = use_citypersons_standard if use_citypersons_standard: kwargs = {} if CUT_WH is None: CUT_WH = (1, 1) else: kwargs = {'filter_type': 'size'} if CUT_WH is None: CUT_WH = (1, 1) Params.TINY_SCALE = tiny_scale Params.IOU_THS = iou_ths kwargs.update({ 'use_iod_for_ignore': use_iod_for_ignore, 'ignore_uncertain': ignore_uncertain }) kwargs['given_catIds'] = len(catIds) > 0 annType = 'bbox' # specify type here print('Running demo for *%s* results.' % annType) # running evaluation print('CUT_WH:', CUT_WH) print('use_citypersons_standard:', use_citypersons_standard) print('tiny_scale:', tiny_scale) print(kwargs) res_file = open("results.txt", "w") Params.CUT_WH = CUT_WH setupLbl = Params().SetupLbl for id_setup in range(len(setupLbl)): if (setup_labels is None) or (setupLbl[id_setup] in setup_labels): cocoGt = COCO(annFile) cocoDt = cocoGt.loadRes(resFile) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, annType, **kwargs) cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() cocoEval.summarize(id_setup, res_file) res_file.close()
def validate(annFile, dt_path): mean_MR = [] for id_setup in range(0, 4): cocoGt = COCO(annFile) cocoDt = cocoGt.loadRes(dt_path) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() mean_MR.append(cocoEval.summarize_nofile(id_setup)) return mean_MR
def __init__(self, root, json, vocab, transform=None): """Set the path for images, captions and vocabulary wrapper. Args: root: image directory. json: coco annotation file path. vocab: vocabulary wrapper. transform: image transformer. """ self.root = root self.coco = COCO(json) self.ids = list(self.coco.anns.keys()) self.vocab = vocab self.transform = transform
def evaluate(test_annotation_file, user_submission_file, aesthetic, fps=None, mem=None): print("\n----------Starting Evaluation----------\n") cocoGT = COCO(test_annotation_file) cocoDt = cocoGT.loadRes(user_submission_file, aesthetic=aesthetic) cocoEval = COCOeval(cocoGT, cocoDt, 'bbox') cocoEval.params.multi_label = aesthetic cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() stats = cocoEval.stats if not aesthetic: output = { "AP": stats[0], "AP [email protected]": stats[1], "AP [email protected]": stats[2], } score = stats[0] else: output = { "AP": stats[0], "AP [email protected]": stats[1], "AP [email protected]": stats[2], "Multi-Label Precision": stats[12], "Multi-Label Recall": stats[13], "Multi-Label F-2 Score ([email protected])": stats[14], } score = stats[14] if fps is not None and mem is not None: output["FPS"] = fps output["GPU Memory (MB)"] = mem if not aesthetic and score >= 0.5: output["3S"] = calculate_final(score, fps, mem) elif aesthetic and score >= 0.5 and stats[0] >= 0.5: output["3S"] = calculate_final(score, fps, mem) else: print( "Score is too low for consideration. Minimum score for mAP is 0.5 and multi-label f2-score is 0.5." ) output["3S"] = 0 print("\n----------Completed Evaluation----------\n") return output
def from_image_dir(image_dir: Path = typer.Argument(..., exists=True, file_okay=False), with_box: bool = typer.Option(False, '--with-box/--no-box', '-wb/-nb'), output_coco: Path = typer.Option('/dev/stdout', '-o', dir_okay=False)): """ create empty coco file from image dir """ logger.debug('enter: from image dir') COCO.from_image_dir(image_dir=image_dir, with_box=with_box).to_json(output_coco, indent=consts.INDENT)
def evaluate(gt_file: Path = typer.Argument(..., exists=True, dir_okay=False), dt_file: Path = typer.Argument(..., exists=True, dir_okay=False), ann_type: EvaluateType = EvaluateType.bbox): """ evaluate coco result """ COCO(gt_file).evaluate(str(dt_file), ann_type=ann_type)
def to_str_id(coco_file: Path = typer.Argument(..., exists=True, dir_okay=False)): output_file = coco_file.with_name( f'{coco_file.stem}.str_id{coco_file.suffix}') COCO(coco_file).to_str_id().to_json(output_file, indent=consts.INDENT) print(f'output to {output_file}')
def preprocess_dataset(split_name, coco_split_name, batch_stream_length, vocab=None, aligned=True): with open(SPLITS_PATTERN % split_name, 'r') as split_file: split_image_ids = [int(line) for line in split_file.readlines()] output_dataset_name = split_name if aligned: output_dataset_name += '_aligned_%d' % MAX_WORDS else: output_dataset_name += '_unaligned' output_path = OUTPUT_DIR_PATTERN % output_dataset_name coco = COCO(COCO_ANNO_PATH % coco_split_name) sg = CocoSequenceGenerator(coco, BUFFER_SIZE, split_ids=split_image_ids, vocab=vocab, align=aligned, pad=aligned, truncate=aligned) sg.batch_stream_length = batch_stream_length writer = HDF5SequenceWriter(sg, output_dir=output_path) writer.write_to_exhaustion() writer.write_filelists() if vocab is None: vocab_out_path = '%s/vocabulary.txt' % OUTPUT_DIR sg.dump_vocabulary(vocab_out_path) image_out_path = '%s/image_list.txt' % output_path image_dummy_labels_out_path = '%s/image_list.with_dummy_labels.txt' % output_path sg.dump_image_file(image_out_path, image_dummy_labels_out_path) num_outs = sg.num_outs num_pads = sg.num_pads num_truncates = sg.num_truncates print 'Padded %d/%d sequences; truncated %d/%d sequences' % \ (num_pads, num_outs, num_truncates, num_outs) return sg.vocabulary_inverted
def eval_ssd300_mlperf_coco(args): from coco import COCO # Check that GPUs are actually available use_cuda = not args.no_cuda and torch.cuda.is_available() dboxes = dboxes300_coco() encoder = Encoder(dboxes) val_trans = SSDTransformer(dboxes, (300, 300), val=True) val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") val_coco_root = os.path.join(args.data, "val2017") cocoGt = COCO(annotation_file=val_annotate) val_coco = COCODetection(val_coco_root, val_annotate, val_trans) inv_map = {v:k for k,v in val_coco.label_map.items()} ssd300 = SSD300(val_coco.labelnum) print("loading model checkpoint", args.checkpoint) od = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) ssd300.load_state_dict(od["model"]) if use_cuda: ssd300.cuda(args.device) loss_func = Loss(dboxes) if use_cuda: loss_func.cuda(args.device) coco_eval(ssd300, val_coco, cocoGt, encoder, inv_map, args.threshold,args.device)
def __init__(self, anno_path, image_dir, db): """Constructor. Will load the GUI from a Glade description, then read any existing labels.""" builder = Gtk.Builder() builder.add_from_file('coco_minus_people.glade') builder.connect_signals(self) self.window = builder.get_object('application_window') self.image_widget = builder.get_object('current_frame') self.filename_label = builder.get_object('datum_name') self.status_label = builder.get_object('status_label') self.label_combo = builder.get_object('label_combo') self.skip_checkbutton = builder.get_object('skip_checkbutton') self.coco = COCO(anno_path) self.db = db self.last_save = self._time() # we'll iterate through this list self.image_ids = sorted(self.coco.imgs.keys()) self.image_dir = image_dir self.image_index = 0 logging.info('Pre-initialising DB') pre_init_db(self.db, self.coco) logging.info('Pre-initialisation done!') self.refresh_image()
def visualize( coco_file: Path = typer.Argument(..., exists=True, dir_okay=False), img_dir: Path = typer.Argument(..., exists=True, file_okay=False), ): """ visualize inputs """ try: logger.debug('visualize {} with images in {}', coco_file, img_dir) COCO(coco_file).visualize(img_dir=img_dir) except (json.JSONDecodeError, TypeError): logger.info( f'non-coco input file {coco_file} detect, trying to convert') empty_gt = COCO.from_image_dir(img_dir).tmp_file_name() COCO.from_detect_file(str(coco_file), empty_gt).print_stat().visualize(img_dir=img_dir)
def build_vocab(json, threshold): """Build a simple vocabulary wrapper.""" coco = COCO(json) counter = Counter() ids = coco.anns.keys() for i, id in enumerate(ids): caption = str(coco.anns[id]['caption']) tokens = nltk.tokenize.word_tokenize(caption.lower()) counter.update(tokens) if (i + 1) % 1000 == 0: print("[{}/{}] Tokenized the captions.".format(i + 1, len(ids))) # If the word frequency is less than 'threshold', then the word is discarded. words = [word for word, cnt in counter.items() if cnt >= threshold] # Create a vocab wrapper and add some special tokens. vocab = Vocabulary() vocab.add_word('<pad>') vocab.add_word('<start>') vocab.add_word('<end>') vocab.add_word('<unk>') # Add the words to the vocabulary. for i, word in enumerate(words): vocab.add_word(word) return vocab
def evaluation(annFile,resFile,outFile = "results.txt"): from coco import COCO # IMPORT THEIR COCO, not pycocotools from eval_MR_multisetup import COCOeval # running evaluation res_file = open("results.txt", "w") for id_setup in range(0,4): cocoGt = COCO(annFile) cocoDt = cocoGt.loadRes(resFile) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt,cocoDt,'bbox') cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() cocoEval.summarize(id_setup,res_file) res_file.close()
def print_stat(coco_files: List[Path] = typer.Argument(..., exists=True, dir_okay=False)): """ print coco stats (img length, ann length, cat length) """ for i in coco_files: print(f'Stat of {i}:') COCO(i).print_stat()
def mMR(resFile, annFile): ''' :param resFile: json file detect result : list =[ dict, dict ...] dict = {'image_id':, 'bbox':, 'score':, 'category_id': }bbox = [x,y,w,h] image_id = ***(no .jpg) category_id = 1 for person score must be sort from high to low :param annFile: json file format is same as mscoco dataset for example instances_val_{}2014.json :return: None ''' res_file = open("results.txt", "w") for id_setup in range(3, 4): cocoGt = COCO(annFile) cocoDt = cocoGt.loadRes(resFile) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, annType) cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() cocoEval.summarize(id_setup, res_file) res_file.close()
def eval_json_reasonable(annFile, resFile): dt_path = os.path.split(resFile)[0] respath = os.path.join(dt_path, 'results.txt') res_file = open(respath, "w") mr_reasonable = None for id_setup in range(6): cocoGt = COCO(annFile) cocoDt = cocoGt.loadRes(resFile) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, annType) cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() mean_mr = cocoEval.summarize(id_setup, res_file) if id_setup == 0: mr_reasonable = mean_mr print('') res_file.close() return mr_reasonable
def eval_ssd_r34_mlperf_coco(args): from coco import COCO # Check that GPUs are actually available use_cuda = not args.no_cuda and torch.cuda.is_available() dboxes = dboxes_R34_coco(args.image_size, args.strides) encoder = Encoder(dboxes) val_trans = SSDTransformer(dboxes, (args.image_size[0], args.image_size[1]), val=True) if not args.dummy: val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") val_coco_root = os.path.join(args.data, "val2017") cocoGt = COCO(annotation_file=val_annotate) val_coco = COCODetection(val_coco_root, val_annotate, val_trans) inv_map = {v: k for k, v in val_coco.label_map.items()} if args.accuracy_mode: val_dataloader = DataLoader(val_coco, batch_size=args.batch_size, shuffle=False, sampler=None, num_workers=args.workers) else: val_dataloader = DataLoader(val_coco, batch_size=args.batch_size, shuffle=False, sampler=None, num_workers=args.workers, drop_last=True) labelnum = val_coco.labelnum else: cocoGt = None encoder = None inv_map = None val_dataloader = None labelnum = 81 ssd_r34 = SSD_R34(labelnum, strides=args.strides) if args.checkpoint: print("loading model checkpoint", args.checkpoint) od = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) ssd_r34.load_state_dict(od["model"]) if use_cuda: ssd_r34.cuda(args.device) coco_eval(ssd_r34, val_dataloader, cocoGt, encoder, inv_map, args)
def split_dataset( coco_file: str = typer.Argument(..., exists=True, dir_okay=False), image_dir: str = typer.Argument(..., exists=True, file_okay=False), ): split_args = EasyDict() if 'all' in coco_file: split_args.front = coco_file.replace('all', 'train') split_args.tail = coco_file.replace('all', 'val') if 'all' in image_dir: split_args.front_dir_name = image_dir.replace('all', 'train') split_args.tail_dir_name = image_dir.replace('all', 'val') COCO(coco_file).split_dataset(image_dir=image_dir, indent=consts.INDENT, **split_args)
def __init__(self, image_set, year): #imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options self.config = { 'top_k': 2000, 'use_salt': True, 'cleanup': True, 'crowd_thresh': 0.7, 'min_size': 2 } # name, paths self._year = year self._image_set = image_set self._data_path = osp.join(cfg.DATA_DIR, 'coco') # load COCO API, classes, class <-> id mappings self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) #print self._COCO.anns[185487] # anns = [self._COCO.anns[185487]] #self._COCO.showAnns(anns) #image_ids = self._COCO.getImgIds() #print image_ids self.test()
def process_dataset(split_name, coco_split_name, batch_stream_length, vocab=None, aligned=True): with open(SPLITS_PATTERN % split_name, 'r') as split_file: split_image_ids = [ line.strip().replace('.jpg', '') for line in split_file.readlines() ] # line.strip(): remove the '\n' in each line output_dataset_name = split_name if aligned: output_dataset_name += '_aligned_%d' % MAX_WORDS else: output_dataset_name += '_unaligned' output_path = OUTPUT_DIR_PATTERN % output_dataset_name coco = COCO() image_root = COCO_IMAGE_PATTERN % coco_split_name #pdb.set_trace() #print image_root sg = CocoSequenceGenerator(coco, split_name, BUFFER_SIZE, image_root, split_ids=split_image_ids, vocab=vocab, align=aligned, pad=aligned, truncate=aligned) sg.batch_stream_length = batch_stream_length #batch_stream_length 100000 writer = HDF5SequenceWriter(sg, output_dir=output_path) writer.write_to_exhaustion() # call hdf5_sequence_generator writer.write_filelists() if vocab is None: vocab_out_path = '%s/vocabulary.txt' % OUTPUT_DIR sg.dump_vocabulary(vocab_out_path) image_out_path = '%s/image_list.txt' % output_path image_dummy_labels_out_path = '%s/image_list.with_dummy_labels.txt' % output_path sg.dump_image_file(image_out_path, image_dummy_labels_out_path) num_outs = sg.num_outs num_pads = sg.num_pads num_truncates = sg.num_truncates print 'Padded %d/%d sequences; truncated %d/%d sequences' % \ (num_pads, num_outs, num_truncates, num_outs) return sg.vocabulary_inverted
def prepare_train_coco_data(): """ Prepare relevant COCO data for training the model. """ image_dir, annotation_file, data_dir = train_coco_image_dir, train_coco_annotation_file, train_coco_data_dir coco = COCO(annotation_file) img_ids = list(coco.imgToAnns.keys()) img_files = [] img_heights = [] img_widths = [] anchor_files = [] gt_classes = [] gt_bboxes = [] for img_id in img_ids: img_files.append( os.path.join(image_dir, coco.imgs[img_id]['file_name'])) img_heights.append(coco.imgs[img_id]['height']) img_widths.append(coco.imgs[img_id]['width']) # import ipdb # ipdb.set_trace() anchor_files.append( os.path.join( data_dir, os.path.splitext(coco.imgs[img_id]['file_name'])[0] + '_' + basic_model + '_anchor.npz')) classes = [] bboxes = [] for ann in coco.imgToAnns[img_id]: classes.append(coco_category_to_class[ann['category_id']]) bboxes.append([ ann['bbox'][1], ann['bbox'][0], ann['bbox'][3] + 1, ann['bbox'][2] + 1 ]) gt_classes.append(classes) gt_bboxes.append(bboxes) print("Building the training dataset...") dataset = DataSet(img_ids, img_files, img_heights, img_widths, batch_size, anchor_files, gt_classes, gt_bboxes, True, True) print("Dataset built.") return coco, dataset
def evaluate_predictions_on_coco(coco_gt, json_result_file, iou_type="bbox"): import json #from pycocotools.coco import COCO #from pycocotools.cocoeval import COCOeval from coco import COCO from cocoeval import COCOeval coco_dt = coco_gt.loadEntireRes( str(json_result_file)) if json_result_file else COCO() # coco_dt = coco_gt.loadRes(coco_results) coco_eval = COCOeval(coco_gt, coco_dt, iou_type) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval
def merge(inputs_cocos: List[Path] = typer.Argument(..., exists=True, dir_okay=False), output: Path = typer.Option(..., '-o', '--output', writable=True, dir_okay=False)): """ merge all coco inputs """ assert len(inputs_cocos) >= 2 dst = COCO(inputs_cocos[0]) for i in inputs_cocos[1:]: logger.debug(i) dst.merge(COCO(i)) dst.to_json(out_file=output, indent=consts.INDENT)
def eval_ssd_r34_mlperf_coco(args): from coco import COCO # Check that GPUs are actually available use_cuda = not args.no_cuda and torch.cuda.is_available() dboxes = dboxes_R34_coco(args.image_size, args.strides) encoder = Encoder(dboxes) val_trans = SSDTransformer(dboxes, (args.image_size[0], args.image_size[1]), val=True) val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") val_coco_root = os.path.join(args.data, "val2017") cocoGt = COCO(annotation_file=val_annotate) val_coco = COCODetection(val_coco_root, val_annotate, val_trans) inv_map = {v: k for k, v in val_coco.label_map.items()} print('ssd r34') ssd_r34 = SSD_R34(val_coco.labelnum, strides=args.strides) print("loading model checkpoint", args.checkpoint) od = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) # import pdb; pdb.set_trace() ssd_r34.load_state_dict(od["model"]) if use_cuda: ssd_r34.cuda(args.device) loss_func = Loss(dboxes) if use_cuda: loss_func.cuda(args.device) if args.onnx: if args.onnx == 'export': return coco_eval_export(ssd_r34, val_coco, cocoGt, encoder, inv_map, args.threshold, args.device, use_cuda) elif args.onnx == 'eval': return coco_eval_onnx(ssd_r34, val_coco, cocoGt, encoder, inv_map, args.threshold, args.device, use_cuda) return coco_eval(ssd_r34, val_coco, cocoGt, encoder, inv_map, args.threshold, args.device, use_cuda)
def _load_all(self, anno_file, shuffle): """ initialize all entries given annotation json file Parameters: ---------- anno_file: str annotation json file shuffle: bool whether to shuffle image list """ image_set_index = [] labels = [] coco = COCO(anno_file) img_ids = coco.getImgIds() for img_id in img_ids: # filename image_info = coco.loadImgs(img_id)[0] filename = image_info["file_name"] subdir = filename.split('_')[1] height = image_info["height"] width = image_info["width"] # label anno_ids = coco.getAnnIds(imgIds=img_id) annos = coco.loadAnns(anno_ids) label = [] for anno in annos: cat_id = int(anno["category_id"]) bbox = anno["bbox"] assert len(bbox) == 4 xmin = float(bbox[0]) / width ymin = float(bbox[1]) / height xmax = xmin + float(bbox[2]) / width ymax = ymin + float(bbox[3]) / height label.append([cat_id, xmin, ymin, xmax, ymax, 0]) if label: labels.append(np.array(label)) image_set_index.append(os.path.join(subdir, filename)) if shuffle: import random indices = list(range(len(image_set_index))) random.shuffle(indices) image_set_index = [image_set_index[i] for i in indices] labels = [labels[i] for i in indices] # store the results self.image_set_index = image_set_index self.labels = labels
class CocoDataset(Dataset): """COCO Custom Dataset compatible with torch.utils.data.DataLoader.""" def __init__(self, root, json, vocab, transform=None): """Set the path for images, captions and vocabulary wrapper. Args: root: image directory. json: coco annotation file path. vocab: vocabulary wrapper. transform: image transformer. """ self.root = root self.coco = COCO(json) self.ids = list(self.coco.anns.keys()) self.vocab = vocab self.transform = transform def __getitem__(self, index): """Returns one data pair (image and caption).""" ann_id = self.ids[index] img_id = self.coco.anns[ann_id]['image_id'] caption = self.coco.anns[ann_id]['caption'] path = self.coco.loadImgs(img_id)[0]['file_name'] # Load image from disk and perform required transformations image = Image.open(os.path.join(self.root, path)).convert('RGB') if self.transform is not None: image = self.transform(image) # numericalize: convert caption to token ids. tokens = tokenizer(str(caption)) # nltk.tokenize.word_tokenize(str(caption).lower()) caption = [] caption.append(self.vocab('<BOS>')) caption.extend([self.vocab(token) for token in tokens]) caption.append(self.vocab('<EOS>')) target = torch.Tensor(caption) return image, target def __len__(self): return len(self.ids)