def main():
  ref_data_root = '<path/to/refer/data/folder>'
  all_refs = []
  for dataset, split_bys in [
      ('refcoco', ['google', 'unc']),
      ('refcoco+', ['unc']),
      ('refcocog', ['google', 'umd'])
  ]:
    for split_by in split_bys:
      refer = REFER(ref_data_root, dataset, split_by)
      for ref_id in refer.getRefIds():
        ref = refer.Refs[ref_id]
        ann = refer.refToAnn[ref_id]
        ref['ann'] = ann
        ref['dataset'] = dataset
        ref['dataset_partition'] = split_by
        all_refs.append(ref)

  coco_annotations_file = '<path/to/instances_train2014.json>'
  coco = COCO(coco_annotations_file)
  ref_image_ids = set(x['image_id'] for x in all_refs)
  coco_anns = {image_id: {'info': coco.imgs[image_id],
                          'anns': coco.imgToAnns[image_id]}
               for image_id in ref_image_ids}

  out_file = '<path/to/refcoco.json>'
  with open(out_file, 'w') as f:
    json.dump({'ref': all_refs, 'coco_anns': coco_anns}, f)
예제 #2
0
class RefDataset(Dataset):
    def __init__(self, split):
        self.refer = REFER(dataset='refcoco+', splitBy='unc')
        self.ref_ids = self.refer.getRefIds(split=split)

        self.image_embeds = np.load(
            os.path.join("data", "embeddings", "FINALImageEmbeddings.npy"))
        self.image_ids = list(
            np.load(os.path.join("data", "embeddings", "FINALImageIDs.npy")))
        before_text_embeds = time.time()
        self.text_embeds = np.concatenate(
            (np.load(
                os.path.join("data", "embeddings",
                             "FINALTextEmbeddings1of2.npy")),
             np.load(
                 os.path.join("data", "embeddings",
                              "FINALTextEmbeddings2of2.npy"))),
            axis=0)
        after_text_embeds = time.time()
        print("Text Embedding Time: ", after_text_embeds - before_text_embeds)
        assert (len(self.text_embeds) == 141564)
        assert (self.text_embeds[0].shape[1] == 3072)
        print('Found {} referred objects in {} split.'.format(
            len(self.ref_ids), split))

    def __len__(self):
        return len(self.ref_ids)

    def __getitem__(self, i):
        ref_id = self.ref_ids[i]
        ref = self.refer.loadRefs(ref_id)[0]

        image_id = ref['image_id']
        image = self.refer.Imgs[image_id]
        image_idx = self.image_ids.index(image_id)
        image_embed = self.image_embeds[image_idx, :, :, :]

        height = image['height']
        width = image['width']
        bound_box = torch.Tensor(self.refer.getRefBox(ref_id))
        bound_box[0] /= width
        bound_box[1] /= height
        bound_box[2] /= width
        bound_box[3] /= height
        #bound_box = bound_box.unsqueeze(dim=0)

        #whole_file_name = ref['file_name']
        #file_name = whole_file_name[:whole_file_name.rfind("_")]+".jpg"

        sent = random.choice(ref['sentences'])
        ref_expr = sent['raw']
        text_id = sent['sent_id']

        text_idx = text_id
        text_embed = torch.from_numpy(self.text_embeds[text_idx])

        return image_embed, text_embed, bound_box
예제 #3
0
파일: create_cache.py 프로젝트: hyzcn/VQD
def create_cache(**kwargs):

    data_root = kwargs.get('data_root')
    dataset = kwargs.get('dataset')
    splitBy = kwargs.get('splitBy')
    splits = kwargs.get('splits')
    refer = REFER(data_root, dataset, splitBy)

    # print stats about the given dataset
    print('dataset [%s_%s] contains: ' % (dataset, splitBy))
    ref_ids = refer.getRefIds()
    image_ids = refer.getImgIds()
    print('%s expressions for %s refs in %s images.' %
          (len(refer.Sents), len(ref_ids), len(image_ids)))

    checkpoint_dir = osp.join('cache', 'prepro', ds + "_" + splitBy)
    if not osp.isdir(checkpoint_dir): os.makedirs(checkpoint_dir)

    for split in splits + ['train']:
        ref_ids = refer.getRefIds(split=split)
        print('%s refs are in split [%s].' % (len(ref_ids), split))
        #have to sample various sentences and their tokens from here.
        data = []
        for ref_id in ref_ids:
            ref = refer.Refs[ref_id]
            image_id = ref['image_id']
            ref['image_info'] = refer.Imgs[image_id]
            sentences = ref.pop('sentences')
            ref.pop('sent_ids')
            coco_boxes_info = refer.imgToAnns[image_id]
            coco_boxes = [box_ann['bbox'] for box_ann in coco_boxes_info]
            gtbox = refer.refToAnn[ref_id]['bbox']
            for sentence in sentences:
                entnew = copy.deepcopy(ref)
                entnew['boxes'] = coco_boxes
                entnew['sentence'] = sentence
                entnew['gtbox'] = gtbox
                data.append(entnew)

        data_json = osp.join('cache/prepro', ds + "_" + splitBy,
                             split + '.json')
        with open(data_json, 'w') as f:
            json.dump(data, f)
예제 #4
0
class RefDataset(Dataset):
    def __init__(self):
        self.refer = REFER(dataset='refcoco+', splitBy='unc')
        self.ref_ids = self.refer.getRefIds()

    def __len__(self):
        return len(self.ref_ids)

    def __getitem__(self, i):

        ref_id = self.ref_ids[i]
        ref = self.refer.loadRefs(ref_id)[0]

        for sent in ref['sentences']:
            s = sent['raw']
            sid = sent['sent_id']

        return s, sid
예제 #5
0
def gen_ref_coco_data():

    dataroot = "/projectnb/llamagrp/shawnlin/ref-exp-gen/dataset/refer2/refer/data"
    dataset = "refcoco"
    refer = REFER(dataroot, dataset, "google")

    ref_ids = refer.getRefIds(split="test")[:]
    print("total ref ids:", len(ref_ids))
    for ref_id in ref_ids[:]:
        ref = refer.Refs[ref_id]
        img_id = ref["image_id"]
        ann_id = ref["ann_id"]
        img_path = os.path.join(refer.IMAGE_DIR,
                                refer.Imgs[img_id]["file_name"])
        img = cv2.imread(img_path)
        #im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        ref_expr = "\n".join([s["raw"] for s in ref["sentences"]])
        #print(ref_expr)
        #print("img_id", img_id)

        yield (img, ref_expr, img_id, ann_id, ref_id)
예제 #6
0

if __name__ == '__main__':

    import os.path as osp
    import sys
    ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
    sys.path.insert(0, osp.join(ROOT_DIR, 'lib', 'datasets'))
    from refer import REFER

    # load refer of dataset
    dataset = 'refcoco'
    refer = REFER(dataset, splitBy = 'google')

    # mimic some Res
    val_refIds = refer.getRefIds(split='test')
    ref_id = 49767
    print "GD: %s" % refer.Refs[ref_id]['sentences']
    Res = [{'ref_id': ref_id, 'sent': 'left bottle'}]

    # evaluate some refer expressions
    refEval = RefEvaluation(refer, Res)
    refEval.evaluate()

    # print output evaluation scores
    for metric, score in refEval.eval.items():
        print '%s: %.3f'%(metric, score)

    # demo how to use evalImgs to retrieve low score result
    # evals = [eva for eva in refEval.evalRefs if eva['CIDEr']<30]
    # print 'ground truth sents'
예제 #7
0
import os.path as osp
import cv2
import argparse
parser = argparse.ArgumentParser(description='Data preparation')
parser.add_argument('--data_root',  type=str) # contains refclef, refcoco, refcoco+, refcocog and images
parser.add_argument('--output_dir',  type=str)
parser.add_argument('--dataset', type=str, choices=['refcoco', 'refcoco+','refcocog','refclef'],default='refcoco')
parser.add_argument('--split',  type=str,default='umd')
parser.add_argument('--generate_mask',  action='store_true')
args = parser.parse_args()
# data_root # contains refclef, refcoco, refcoco+, refcocog and images

refer = REFER(args.data_root, args.dataset, args.split)

print ('dataset [%s_%s] contains: ' % (args.dataset, args.split))
ref_ids = refer.getRefIds()
image_ids = refer.getImgIds()
print ('%s expressions for %s refs in %s images.' % (len(refer.Sents), len(ref_ids), len(image_ids)))

print('\nAmong them:')
if args.dataset == 'refclef':
    if args.split == 'unc':
        splits = ['train', 'val', 'testA','testB','testC']
    else:
        splits = ['train', 'val', 'test']
elif args.dataset == 'refcoco':
    splits = ['train', 'val', 'testA', 'testB']
elif args.dataset == 'refcoco+':
    splits = ['train', 'val',  'testA', 'testB']
elif args.dataset == 'refcocog':
    splits = ['train', 'val', 'test']  # we don't have test split for refcocog right now.
예제 #8
0

if __name__ == '__main__':

    import os.path as osp
    import sys
    ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
    sys.path.insert(0, osp.join(ROOT_DIR, 'lib', 'datasets'))
    from refer import REFER

    # load refer of dataset
    dataset = 'refcoco'
    refer = REFER(dataset, splitBy = 'google')

    # mimic some Res
    val_refIds = refer.getRefIds(split='test')
    ref_id = 49767
    print "GD: %s" % refer.Refs[ref_id]['sentences']
    Res = [{'ref_id': ref_id, 'sent': 'left bottle'}]

    # evaluate some refer expressions
    refEval = RefEvaluation(refer, Res)
    refEval.evaluate()

    # print output evaluation scores
    for metric, score in refEval.eval.items():
        print '%s: %.3f'%(metric, score)

    # demo how to use evalImgs to retrieve low score result
    # evals = [eva for eva in refEval.evalRefs if eva['CIDEr']<30]
    # print 'ground truth sents'
예제 #9
0
class RefCOCODataset(Dataset):
    def __init__(self, refcoco_dir, refcoco_images_dir, coco_dir, split='val'):

        self.image_dir = refcoco_images_dir

        # coco_train_annFile = coco_dir.joinpath('annotations/instances_train2014.json')
        # self.coco = COCO(coco_train_annFile)

        assert split in ['train', 'val', 'test']

        workspace_dir = Path(__file__).resolve().parent.parent
        refcoco_util_dir = workspace_dir.joinpath('refcoco_utils')
        import sys
        sys.path.append(str(refcoco_util_dir))
        from refer import REFER
        self.refer = REFER('refcocog', 'umd')

        ref_ids = self.refer.getRefIds(split=split)

        id2dets = {}
        img_ids = []
        image_fns = []
        for ref_id in ref_ids:
            ref = self.refer.Refs[ref_id]
            img_id = ref['image_id']

            if img_id not in img_ids:
                img_ids.append(img_id)

                fn_ann = ref['file_name']

                # COCO_train2014_000000419645_398406.jpg
                # COCO_train2014_000000419645.jpg

                suffix = fn_ann.split('.')[-1]

                fname = '_'.join(fn_ann.split('_')[:-1]) + '.' + suffix

                image_fns.append(fname)

                detections = self.refer.imgToAnns[img_id]

                id2dets[img_id] = detections

        self.image_ids = img_ids
        self.image_fns = image_fns
        self.id2dets = id2dets

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):

        image_id = self.image_ids[idx]
        image_fn = self.image_fns[idx]
        image_path = self.image_dir.joinpath(image_fn)

        assert Path(image_path).exists(), image_path

        img = cv2.imread(str(image_path))

        H, W, C = img.shape

        dets = self.id2dets[image_id]
        # cat_names = [det['category_name'] for det in dets]

        boxes = []
        for i, region in enumerate([det['bbox'] for det in dets]):
            # (x1, y1, x2, y2)
            x, y, w, h = region[:4]
            x1, y1, x2, y2 = x, y, x + w, y + h

            # x1, y1, x2, y2 = region[:4]

            assert x2 <= W, (image_id, i, region)
            assert y2 <= H, (image_id, i, region)

            box = [x1, y1, x2, y2]
            boxes.append(box)

        boxes = np.array(boxes)

        return {
            'img_id': str(image_id),
            'img_fn': image_fn,
            'img': img,
            'boxes': boxes,
            # 'captions': cat_names
        }
class RefCOCO(Dataset):
    def __init__(self, data_dir="/projectnb/llamagrp/shawnlin/ref-exp-gen/dataset/refer2/refer/data", dataset="refcoco", split="train", splitBy="google", transforms=None):
        assert (split in ["train", "val", "test"])
        assert os.path.exists(data_dir), \
            "cannot find folder {}, please download refcoco data into this folder".format(data_dir)

        self.data_dir = data_dir
        self.dataset = dataset
        self.transforms = transforms
        self.split = split
        self.splitBy = splitBy

        self.refer = REFER(self.data_dir, self.dataset, self.splitBy)
        self.ref_ids = self.refer.getRefIds(split=self.split)[:]

        #self.filter_non_overlap = filter_non_overlap
        #self.filter_duplicate_rels = filter_duplicate_rels and self.split == 'train'

        # read in dataset from a h5 file and a dict (json) file
        #self.im_h5 = h5py.File(self.image_file, 'r')
        self.info = json.load(open("/projectnb/llamagrp/shawnlin/ref-exp-gen/graph-rcnn.pytorch/datasets/vg_bm/VG-SGG-dicts.json", 'r'))
        #self.im_refs = self.im_h5['images'] # image data reference
        #im_scale = self.im_refs.shape[2]

        # add background class
        self.info['label_to_idx']['__background__'] = 0
        self.class_to_ind = self.info['label_to_idx']
        self.ind_to_classes = sorted(self.class_to_ind, key=lambda k:
                               self.class_to_ind[k])
        #cfg.ind_to_class = self.ind_to_classes

        self.predicate_to_ind = self.info['predicate_to_idx']
        self.predicate_to_ind['__background__'] = 0
        self.ind_to_predicates = sorted(self.predicate_to_ind, key=lambda k:
                                  self.predicate_to_ind[k])
        #cfg.ind_to_predicate = self.ind_to_predicates

        #self.split_mask, self.image_index, self.im_sizes, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
        #    self.roidb_file, self.image_file,
        #    self.split, num_im, num_val_im=num_val_im,
        #    filter_empty_rels=filter_empty_rels,
        #    filter_non_overlap=filter_non_overlap and split == "train",
        #)

        #self.json_category_id_to_contiguous_id = self.class_to_ind

        #self.contiguous_category_id_to_json_id = {
        #    v: k for k, v in self.json_category_id_to_contiguous_id.items()
        #}

    #@property
    #def coco(self):
    #    """
    #    :return: a Coco-like object that we can use to evaluate detection!
    #    """
    #    anns = []
    #    for i, (cls_array, box_array) in enumerate(zip(self.gt_classes, self.gt_boxes)):
    #        for cls, box in zip(cls_array.tolist(), box_array.tolist()):
    #            anns.append({
    #                'area': (box[3] - box[1] + 1) * (box[2] - box[0] + 1),
    #                'bbox': [box[0], box[1], box[2] - box[0] + 1, box[3] - box[1] + 1],
    #                'category_id': cls,
    #                'id': len(anns),
    #                'image_id': i,
    #                'iscrowd': 0,
    #            })
    #    fauxcoco = COCO()
    #    fauxcoco.dataset = {
    #        'info': {'description': 'ayy lmao'},
    #        'images': [{'id': i} for i in range(self.__len__())],
    #        'categories': [{'supercategory': 'person',
    #                           'id': i, 'name': name} for i, name in enumerate(self.ind_to_classes) if name != '__background__'],
    #        'annotations': anns,
    #    }
    #    fauxcoco.createIndex()
    #    return fauxcoco

    #def _im_getter(self, idx):
    #    w, h = self.im_sizes[idx, :]
    #    ridx = self.image_index[idx]
    #    im = self.im_refs[ridx]
    #    im = im[:, :h, :w] # crop out
    #    im = im.transpose((1,2,0)) # c h w -> h w c
    #    return im

    def __len__(self):
        return len(self.ref_ids)

    def __getitem__(self, index):
        """
        get dataset item
        """
        # get image
        ref = self.refer.Refs[self.ref_ids[index]]
        img_id = ref["image_id"]
        ann_id = ref["ann_id"]
        img_path = os.path.join(self.refer.IMAGE_DIR, self.refer.Imgs[img_id]["file_name"])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #print("img original size", img.shape)
        width, height = img.shape[0], img.shape[1]
        #print("img after size", img.shape)
        ref_expr = "\n".join([s["raw"] for s in ref["sentences"]])

        # get object bounding boxes, labels and relations
        #obj_boxes = [[34.79, 272.54, 106.72, 80.43]] # dummy target bbox
        referent_box = [self.refer.Anns[ann_id]["bbox"]]
        target_raw = BoxList(referent_box, (width, height), mode="xyxy")
        if self.transforms is not None:
            img, target = self.transforms(img, target_raw)
        else:
            img, target = img, target_raw
        target.add_field("ref_sents", [s["raw"] for s in ref["sentences"]])
        target.add_field("label", self.refer.Anns[ann_id]["category_id"])
        #target.add_field("labels", torch.from_numpy(obj_labels))
        #target.add_field("pred_labels", torch.from_numpy(obj_relations))
        #target.add_field("relation_labels", torch.from_numpy(obj_relation_triplets))
        target = target.clip_to_image(remove_empty=False)

        info = {"img_id":img_id, "ann_id":ann_id, "ref_id": self.ref_ids[index], "ref_sents": [s["raw"] for s in ref["sentences"]]}

        return img, target, index, info

    #def get_groundtruth(self, index):
    #    width, height = self.im_sizes[index, :]
    #    # get object bounding boxes, labels and relations

    #    obj_boxes = self.gt_boxes[index].copy()
    #    obj_labels = self.gt_classes[index].copy()
    #    obj_relation_triplets = self.relationships[index].copy()

    #    if self.filter_duplicate_rels:
    #        # Filter out dupes!
    #        assert self.split == 'train'
    #        old_size = obj_relation_triplets.shape[0]
    #        all_rel_sets = defaultdict(list)
    #        for (o0, o1, r) in obj_relation_triplets:
    #            all_rel_sets[(o0, o1)].append(r)
    #        obj_relation_triplets = [(k[0], k[1], np.random.choice(v)) for k,v in all_rel_sets.items()]
    #        obj_relation_triplets = np.array(obj_relation_triplets)

    #    obj_relations = np.zeros((obj_boxes.shape[0], obj_boxes.shape[0]))

    #    for i in range(obj_relation_triplets.shape[0]):
    #        subj_id = obj_relation_triplets[i][0]
    #        obj_id = obj_relation_triplets[i][1]
    #        pred = obj_relation_triplets[i][2]
    #        obj_relations[subj_id, obj_id] = pred

    #    target = BoxList(obj_boxes, (width, height), mode="xyxy")
    #    target.add_field("labels", torch.from_numpy(obj_labels))
    #    target.add_field("pred_labels", torch.from_numpy(obj_relations))
    #    target.add_field("relation_labels", torch.from_numpy(obj_relation_triplets))
    #    target.add_field("difficult", torch.from_numpy(obj_labels).clone().fill_(0))
    #    return target

    def get_img_info(self, img_id):

        ref = self.refer.Refs[self.ref_ids[index]]
        img_id = ref["image_id"]
        w, h = self.refer.Imgs[img_id]["width"], self.refer.Imgs[img_id]["height"]
        return {"height": h, "width": w}
예제 #11
0
class RefCOCODataset(Dataset):
    def __init__(self, refcoco_dir, refcoco_images_dir, split='val'):

        self.image_dir = refcoco_images_dir

        mattnet_maskrcnn_detections_path = refcoco_dir.joinpath(
            'detections/refcocog_umd/res101_coco_minus_refer_notime_dets.json')
        with open(mattnet_maskrcnn_detections_path) as f:
            mattnet_maskrcnn_detections = json.load(f)

        id2dets = {}
        for det in mattnet_maskrcnn_detections:
            image_id = det['image_id']
            if image_id not in id2dets:
                id2dets[image_id] = []
            id2dets[image_id].append(det)
        self.id2dets = id2dets

        print('Load mattnet detections from', mattnet_maskrcnn_detections_path)

        assert split in ['train', 'val', 'test']

        workspace_dir = Path(__file__).resolve().parent.parent
        refcoco_util_dir = workspace_dir.joinpath('refcoco_utils')
        import sys
        sys.path.append(str(refcoco_util_dir))
        from refer import REFER
        self.refer = REFER('refcocog', 'umd')

        ref_ids = self.refer.getRefIds(split=split)
        img_ids = []
        image_fns = []
        for ref_id in ref_ids:
            ref = self.refer.Refs[ref_id]
            img_id = ref['image_id']

            if img_id not in img_ids:
                img_ids.append(img_id)

                fn_ann = ref['file_name']

                # COCO_train2014_000000419645_398406.jpg
                # COCO_train2014_000000419645.jpg

                suffix = fn_ann.split('.')[-1]

                fname = '_'.join(fn_ann.split('_')[:-1]) + '.' + suffix

                image_fns.append(fname)

        self.image_ids = img_ids
        self.image_fns = image_fns

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):

        image_id = self.image_ids[idx]
        image_fn = self.image_fns[idx]
        image_path = self.image_dir.joinpath(image_fn)

        assert Path(image_path).exists(), image_path

        img = cv2.imread(str(image_path))

        H, W, C = img.shape

        dets = self.id2dets[image_id]
        cat_names = [det['category_name'] for det in dets]

        boxes = []
        for i, region in enumerate([det['box'] for det in dets]):
            # (x1, y1, x2, y2)
            x, y, w, h = region[:4]

            x1, y1, x2, y2 = x, y, x + w, y + h

            assert x2 <= W, (image_id, i, region)
            assert y2 <= H, (image_id, i, region)

            box = [x1, y1, x2, y2]
            boxes.append(box)

        boxes = np.array(boxes)

        return {
            'img_id': str(image_id),
            'img_fn': image_fn,
            'img': img,
            'boxes': boxes,
            'captions': cat_names
        }
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou


data_root = '../refer/data'
dataset = 'refcoco'
splitBy = 'unc'
refer = REFER(data_root, dataset, splitBy)

ref_ids = refer.getRefIds(split='testB')
images_dir = '/root/refer/data/images/mscoco/images/train2014/'

hyp = open("hyp.txt", "w")
ref1 = open("ref1.txt", "w")
ref2 = open("ref2.txt", "w")
ref3 = open("ref3.txt", "w")
ref4 = open("ref4.txt", "w")

for ref_id in tqdm(ref_ids):
    ref = refer.Refs[ref_id]
    x, y, w, h = refer.getRefBox(ref_id)  # [x, y, w, h]
    x1, y1, x2, y2 = x, y, x + w, y + h
    image_path = images_dir + refer.Imgs[ref['image_id']]['file_name']

    image = scipy.misc.imread(image_path)
예제 #13
0
def main(args):

  # Image Directory
  params = vars(args)
  dataset_splitBy = params['dataset'] + '_' + params['splitBy']
  if 'coco' or 'combined' in dataset_splitBy:
    IMAGE_DIR = 'data/images/mscoco/images/train2014'
  elif 'clef' in dataset_splitBy:
    IMAGE_DIR = 'data/images/saiapr_tc-12'
  else:
    print('No image directory prepared for ', args.dataset)
    sys.exit(0)

  # make save dir
  save_dir = osp.join('cache/detections', dataset_splitBy)
  if not osp.isdir(save_dir):
    os.makedirs(save_dir)
  print(save_dir)

  # get mrcn instance
  mrcn = inference.Inference(args)
  imdb = mrcn.imdb

  # import refer
  from refer import REFER
  data_root, dataset, splitBy = params['data_root'], params['dataset'], params['splitBy']
  refer = REFER(data_root, dataset, splitBy)
  cat_name_to_cat_ix = {category_name: category_id for category_id, category_name in refer.Cats.items()}

  # detect and prepare dets.json
  proposals = []
  det_id = 0
  cnt = 0

  # # TEMPS DEBUG
  # # os.makedirs('cache/old_internals')
  # img_path = '/home/mwb/Datasets/mscoco/images/train2014/COCO_train2014_000000581857.jpg'
  # scores, boxes = mrcn.predict(img_path)
  # image_feat = mrcn.net._predictions['__temp_net_conv'].data.cpu().numpy()
  # roi_feats = mrcn.net._predictions['__temp_pool5'].data.cpu().numpy()
  # rois = mrcn.net._predictions['__temp_rois'].data.cpu().numpy()[:,1:]
  # head_feats = mrcn.net._predictions['__temp_head_feats'].data.cpu().numpy()
  # head_pool = mrcn.net._predictions['__temp_head_pool'].data.cpu().numpy()
  # print(image_feat.shape, roi_feats.shape, rois.shape, head_feats.shape, head_pool.shape)
  # np.save('cache/old_internals/image_feat.npy', image_feat)
  # np.save('cache/old_internals/roi_feats.npy', roi_feats)
  # np.save('cache/old_internals/rois.npy', rois)
  # np.save('cache/old_internals/head_feats.npy', head_feats)
  # np.save('cache/old_internals/head_pool.npy', head_pool)

  val_image_ids = {refer.Refs[ref_id]['image_id'] for ref_id in refer.getRefIds(split='val') if refer.Refs[ref_id]['split'] == 'val'}
  print('val image num:', len(val_image_ids))


  start = time()
  # for image_id, image in refer.Imgs.items():
  for image_id in val_image_ids:
    image = refer.Imgs[image_id]
    file_name = image['file_name']
    img_path = osp.join(IMAGE_DIR, file_name)

    # predict
    scores, boxes = mrcn.predict(img_path)

    rois = mrcn.net._predictions['rois'].data.cpu().numpy()[:,1:] / mrcn._scale
    cnt += 1
    # print('%s/%s done.' % (cnt, len(refer.Imgs)))
    print('%s/%s done.' % (cnt, len(val_image_ids)))

    # info = {
    #   'image_id': image_id,
    #   'rois': rois,
    #   'scores': scores, 
    #   'boxes': boxes,
    #   'roi_scores': mrcn.net._predictions['__roi_scores'].data.cpu().numpy()
    # }
    torch.cuda.empty_cache()

    # proposals.append(info)
  
  total_t = time() - start
  avg_t = total_t / len(val_image_ids)
  print('time: %.6f / %.6f = %.6f' % (total_t, len(val_image_ids), avg_t))