Ejemplo n.º 1
0
def vis(model, loader, save_dir, rank=None, world_size=1):
    attention_dir = os.path.join(save_dir, 'attention_probs')
    hidden_dir = os.path.join(save_dir, 'hidden_states')
    cos_dir = os.path.join(save_dir, 'cos_similarity')
    # if not os.path.exists(hidden_dir):
    #     makedirsExist(hidden_dir)
    # if not os.path.exists(cos_dir):
    #     makedirsExist(cos_dir)
    if not os.path.exists(attention_dir):
        makedirsExist(attention_dir)
    # offset = 0
    # if rank is not None:
    #     num_samples = int(math.ceil(len(loader.dataset) * 1.0 / world_size))
    #     offset = num_samples * rank
    # index = offset
    model.eval()
    for i, data in zip(trange(len(loader)), loader):
    # for i, data in enumerate(loader):
        data = to_cuda(data)
        output = model(*data)
        for _i, (attention_probs, hidden_states) in enumerate(zip(output['attention_probs'], output['hidden_states'])):
            index = int(data[2][_i][-1])
            if hasattr(loader.dataset, 'ids'):
                image_id = loader.dataset.ids[index]
            else:
                image_id = loader.dataset.database[index]['image'].split('/')[1].split('.')[0]
            attention_probs_arr = attention_probs.detach().cpu().numpy()
            hidden_states_arr = hidden_states.detach().cpu().numpy()
            cos_similarity_arr = (hidden_states @ hidden_states.transpose(1, 2)).detach().cpu().numpy()
            np.save(os.path.join(attention_dir, '{}.npy'.format(image_id)), attention_probs_arr)
Ejemplo n.º 2
0
    def __init__(self, image_set, root_path, data_path, boxes='gt', proposal_source='official',
                 transform=None, test_mode=False,
                 zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True,
                 tokenizer=None, pretrained_model_name=None,
                 add_image_as_a_box=False, mask_size=(14, 14),
                 aspect_grouping=False, **kwargs):
        """
        VREP Dataset

        :param image_set: image folder name
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to dataset
        :param boxes: boxes to use, 'gt' or 'proposal'
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(VRep, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        self.data_json = 'obj_det_res.json'#'image_seg_test.json'#'obj_det_res.json'
        self.ref_json = 'ref_annotations.json'
        self.boxes = boxes
        self.refer = Refer()
        self.test_mode = test_mode
        self.data_path = data_path
        self.root_path = root_path
        self.transform = transform
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        if zip_mode:
            self.zipreader = ZipReader()

        self.database = self.load_annotations()
        if self.aspect_grouping:
            self.group_ids = self.group_aspect(self.database)
Ejemplo n.º 3
0
    def __init__(self, split, cfg, transform):
        super().__init__()
        self.split = split
        self.cfg = cfg
        self.transform = transform

        self.annotations = []
        n_img = 0
        for img in json.load(open(self.cfg.DATAPATH)):
            split = split + 'id' if split == 'val' else split  # 'val' -> 'valid'
            if img['split'] in split.split('_'):  # if img['split'] == split:
                n_img += 1
                for annot in img['annotations']:
                    if cfg.TEST.EXCL_LEFT_RIGHT and (
                            annot['predicate'] == 'to the left of'
                            or annot['predicate'] == 'to the right of'):
                        continue

                    annot['url'] = img['url']
                    annot['height'] = img['height']
                    annot['width'] = img['width']
                    annot['subject']['bbox'] = self.fix_bbox(
                        annot['subject']['bbox'], img['height'], img['width'])
                    annot['object']['bbox'] = self.fix_bbox(
                        annot['object']['bbox'], img['height'], img['width'])
                    self.annotations.append(annot)

        print('%d relations in %s' % (len(self.annotations), split))
        print('%d imgs in %s' % (n_img, split))

        self.cache_dir = os.path.join(cfg.DATASET.ROOT_PATH, 'cache')
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        if cfg.NETWORK.BERT_MODEL_NAME:
            print('Initializing BERT tokenizer from',
                  cfg.NETWORK.BERT_MODEL_NAME)
        self.tokenizer = BertTokenizer.from_pretrained(
            'bert-base-uncased' if cfg.NETWORK.BERT_MODEL_NAME is None else
            cfg.NETWORK.BERT_MODEL_NAME,
            cache_dir=self.cache_dir)
Ejemplo n.º 4
0
    def load_annotations(self, ann_file):
        tic = time.time()
        database = []
        db_cache_name = 'vcr_nometa_{}_{}_{}'.format(self.task, self.image_set, os.path.basename(ann_file)[:-len('.jsonl')])
        if self.only_use_relevant_dets:
            db_cache_name = db_cache_name + '_only_relevant_dets'
        if self.zip_mode:
            db_cache_name = db_cache_name + '_zipped'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name))

        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        print('loading database from {}...'.format(ann_file))
        tic = time.time()

        with jsonlines.open(ann_file) as reader:
            for ann in reader:
                if self.zip_mode:
                    img_fn = os.path.join(self.data_path, self.image_set + '.zip@/' + self.image_set, ann['img_fn'])
                    metadata_fn = os.path.join(self.data_path, self.image_set + '.zip@/' + self.image_set, ann['metadata_fn'])
                else:
                    img_fn = os.path.join(self.data_path, self.image_set, ann['img_fn'])
                    metadata_fn = os.path.join(self.data_path, self.image_set, ann['metadata_fn'])

                db_i = {
                    'annot_id': ann['annot_id'],
                    'objects': ann['objects'],
                    'img_fn': img_fn,
                    'metadata_fn': metadata_fn,
                    'question': ann['question'],
                    'answer_choices': ann['answer_choices'],
                    'answer_label': ann['answer_label'] if not self.test_mode else None,
                    'rationale_choices': ann['rationale_choices'],
                    'rationale_label': ann['rationale_label'] if not self.test_mode else None,
                }
                database.append(db_i)
        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 5
0
    def load_annotations(self):
        tic = time.time()
        database = []
        db_cache_name = 'refcoco+_boxes_{}_{}'.format(
            self.boxes, '+'.join(self.image_sets))
        if self.zip_mode:
            db_cache_name = db_cache_name + '_zipmode'
        if self.test_mode:
            db_cache_name = db_cache_name + '_testmode'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root,
                                     '{}.pkl'.format(db_cache_name))

        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(
                        db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        print('loading database of split {}...'.format('+'.join(
            self.image_sets)))
        tic = time.time()

        for ref_id, ref in zip(self.refer_ids, self.refs):
            iset = 'train2014'
            if not self.test_mode:
                gt_x, gt_y, gt_w, gt_h = self.refer.getRefBox(ref_id=ref_id)
            if self.zip_mode:
                image_fn = os.path.join(
                    self.data_path, iset + '.zip@/' + iset,
                    'COCO_{}_{:012d}.jpg'.format(iset, ref['image_id']))
            else:
                image_fn = os.path.join(
                    self.data_path, iset,
                    'COCO_{}_{:012d}.jpg'.format(iset, ref['image_id']))
            for sent in ref['sentences']:
                idb = {
                    'sent_id':
                    sent['sent_id'],
                    'ann_id':
                    ref['ann_id'],
                    'ref_id':
                    ref['ref_id'],
                    'image_id':
                    ref['image_id'],
                    'image_fn':
                    image_fn,
                    'width':
                    self.coco.imgs[ref['image_id']]['width'],
                    'height':
                    self.coco.imgs[ref['image_id']]['height'],
                    'raw':
                    sent['raw'],
                    'sent':
                    sent['sent'],
                    'tokens':
                    sent['tokens'],
                    'category_id':
                    ref['category_id'],
                    'gt_box': [gt_x, gt_y, gt_x + gt_w, gt_y +
                               gt_h] if not self.test_mode else None
                }
                database.append(idb)

        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 6
0
    def __init__(self, split, cfg, transform):
        super().__init__()
        self.split = split
        self.cfg = cfg
        self.transform = transform

        self.all_proposals_test = False
        if cfg.DATASET.ALL_PROPOSALS_TEST:
            self.all_proposals_test = True

        self.annotations = []

        # Load images
        self.path = self.cfg.TEST_PATH if split == 'test' else self.cfg.TRAIN_VAL_PATH
        imgs = json.load(open(self.path))

        skipped_count = 0
        for img in imgs:
            if img['path'].endswith('.png'):
                img['path'] = '.'.join([img['path'].split('.')[0], 'jpg'])

            rels_cand = None
            if self.all_proposals_test and split != 'train':
                rels_cand = []
                nb_of_objs = len(img['objects'])
                if nb_of_objs > cfg.DATASET.MAX_NB_OF_OBJ:
                    nb_of_objs = min(cfg.DATASET.MAX_NB_OF_OBJ, nb_of_objs)
                    skipped_count += 1
                for sub_id in range(0, nb_of_objs):
                    for obj_id in range(0, nb_of_objs):
                        if sub_id == obj_id: continue
                        rels_cand.append((sub_id, obj_id))

            annot = {
                'img_path': img['path'],
                'annot': img['relationships'],
                'objects': img['objects'],
                'rels_cand': rels_cand,
            }

            self.annotations.append(annot)

        print(
            f'number of imgs with skipped objs (skipped_count): {skipped_count}'
        )
        print('%d imgs in %s' % (len(self.annotations), split))

        # categories
        self.num_object_classes = len(self.cfg.OBJECT_CATEGORIES)
        self._object_class_to_ind = dict(
            zip(self.cfg.OBJECT_CATEGORIES, range(self.num_object_classes)))
        self.num_predicate_classes = len(self.cfg.PREDICATE_CATEGORIES)
        self._predicate_class_to_ind = dict(
            zip(self.cfg.PREDICATE_CATEGORIES,
                range(self.num_predicate_classes)))

        self.cache_dir = os.path.join(cfg.DATASET.ROOT_PATH, 'cache')
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = BertTokenizer.from_pretrained(
            'bert-base-uncased' if cfg.NETWORK.BERT_MODEL_NAME is None else
            cfg.NETWORK.BERT_MODEL_NAME,
            cache_dir=self.cache_dir)

        self.sample_rels = cfg.TRAIN.SAMPLE_RELS
Ejemplo n.º 7
0
    def __init__(self,
                 ann_file,
                 image_set,
                 root_path,
                 data_path,
                 seq_len=64,
                 with_precomputed_visual_feat=False,
                 mask_raw_pixels=True,
                 with_rel_task=True,
                 with_mlm_task=True,
                 with_mvrc_task=True,
                 transform=None,
                 test_mode=False,
                 zip_mode=False,
                 cache_mode=False,
                 cache_db=False,
                 ignore_db_cache=True,
                 tokenizer=None,
                 pretrained_model_name=None,
                 add_image_as_a_box=False,
                 aspect_grouping=False,
                 **kwargs):
        """
        Conceptual Captions Dataset

        :param ann_file: annotation jsonl file
        :param image_set: image folder name, e.g., 'vcr1images'
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(COCOCaptionsDataset, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        assert not test_mode

        annot = {
            'train': 'annotations/captions_train2017.json',
            'val': 'annotations/captions_val2017.json'
        }
        annot_inst = {
            'train': 'annotations/instances_train2017.json',
            'val': 'annotations/instances_val2017.json'
        }
        if zip_mode:
            self.root = os.path.join(data_path,
                                     '{0}2017.zip@/{0}2017'.format(image_set))
        else:
            self.root = os.path.join(data_path, '{}2017'.format(image_set))

        self.seq_len = seq_len
        self.with_rel_task = with_rel_task
        self.with_mlm_task = with_mlm_task
        self.with_mvrc_task = with_mvrc_task
        self.data_path = data_path
        self.root_path = root_path
        self.ann_file = os.path.join(data_path, annot[image_set])
        self.ann_file_inst = os.path.join(data_path, annot_inst[image_set])
        self.with_precomputed_visual_feat = with_precomputed_visual_feat
        self.mask_raw_pixels = mask_raw_pixels
        self.image_set = image_set
        self.transform = transform
        self.test_mode = test_mode
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        if self.zip_mode:
            self.zipreader = ZipReader()

        self.coco = COCO(self.ann_file)
        self.coco_inst = COCO(self.ann_file_inst)
        self.ids = list(sorted(self.coco.imgs.keys()))
        # filter images without detection annotations
        self.ids = [
            img_id for img_id in self.ids
            if len(self.coco_inst.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
        ]

        self.json_category_id_to_contiguous_id = {
            v: i + 1
            for i, v in enumerate(self.coco_inst.getCatIds())
        }
        self.contiguous_category_id_to_json_id = {
            v: k
            for k, v in self.json_category_id_to_contiguous_id.items()
        }
        self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}

        if self.aspect_grouping:
            assert False, "not support aspect grouping currently!"
            # self.group_ids = self.group_aspect(self.database)

        print('mask_raw_pixels: ', self.mask_raw_pixels)
Ejemplo n.º 8
0
    def __init__(self,
                 image_set,
                 root_path,
                 data_path,
                 boxes='gt',
                 proposal_source='official',
                 transform=None,
                 test_mode=False,
                 zip_mode=False,
                 cache_mode=False,
                 cache_db=False,
                 ignore_db_cache=True,
                 tokenizer=None,
                 pretrained_model_name=None,
                 add_image_as_a_box=False,
                 mask_size=(14, 14),
                 aspect_grouping=False,
                 **kwargs):
        """
        RefCOCO+ Dataset

        :param image_set: image folder name
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to dataset
        :param boxes: boxes to use, 'gt' or 'proposal'
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(RefCOCO, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'

        categories = [
            '__background__', 'person', 'bicycle', 'car', 'motorcycle',
            'airplane', 'bus', 'train', 'truck', 'boat', 'trafficlight',
            'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat',
            'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
            'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase',
            'frisbee', 'skis', 'snowboard', 'sportsball', 'kite',
            'baseballbat', 'baseballglove', 'skateboard', 'surfboard',
            'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife',
            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
            'broccoli', 'carrot', 'hotdog', 'pizza', 'donut', 'cake', 'chair',
            'couch', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tv',
            'laptop', 'mouse', 'remote', 'keyboard', 'cellphone', 'microwave',
            'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
            'scissors', 'teddybear', 'hairdrier', 'toothbrush'
        ]

        coco_annot_files = {
            "train2014": "annotations/instances_train2014.json",
            "val2014": "annotations/instances_val2014.json",
            "test2015": "annotations/image_info_test2015.json",
        }
        proposal_dets = 'refcoco+/proposal/res101_coco_minus_refer_notime_dets.json'
        proposal_masks = 'refcoco+/proposal/res101_coco_minus_refer_notime_masks.json'
        self.vg_proposal = ("vgbua_res101_precomputed",
                            "trainval2014_resnet101_faster_rcnn_genome")
        self.proposal_source = proposal_source
        self.boxes = boxes
        self.test_mode = test_mode
        self.category_to_idx = {c: i for i, c in enumerate(categories)}
        self.data_path = data_path
        self.root_path = root_path
        self.transform = transform
        self.image_sets = [iset.strip() for iset in image_set.split('+')]
        self.coco = COCO(annotation_file=os.path.join(
            data_path, coco_annot_files['train2014']))
        self.refer = REFER(data_path, dataset='refcoco+', splitBy='unc')
        self.refer_ids = []
        for iset in self.image_sets:
            self.refer_ids.extend(self.refer.getRefIds(split=iset))
        self.refs = self.refer.loadRefs(ref_ids=self.refer_ids)
        if 'proposal' in boxes:
            with open(os.path.join(data_path, proposal_dets), 'r') as f:
                proposal_list = json.load(f)
            self.proposals = {}
            for proposal in proposal_list:
                image_id = proposal['image_id']
                if image_id in self.proposals:
                    self.proposals[image_id].append(proposal['box'])
                else:
                    self.proposals[image_id] = [proposal['box']]
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        if zip_mode:
            self.zipreader = ZipReader()

        self.database = self.load_annotations()
        if self.aspect_grouping:
            self.group_ids = self.group_aspect(self.database)
Ejemplo n.º 9
0
    def load_annotations(self):
        tic = time.time()
        database = []
        db_cache_name = 'vrep_boxes'#_{}_{}'.format(self.boxes, '+'.join(self.image_sets))
        if self.zip_mode:
            db_cache_name = db_cache_name + '_zipmode'
        if self.test_mode:
            db_cache_name = db_cache_name + '_testmode'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name))
        dataset = self._load_json(os.path.join(self.data_path, self.data_json))
        ref = self._load_json(os.path.join(self.data_path, self.ref_json))
        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        #print('loading database of split {}...'.format('+'.join(self.image_sets)))
        tic = time.time()

        refer_id = 0 
	
        for data_point in dataset['images']:
            iset  = 'full_images'
            image_name = data_point['file_name'].split('/')[3]
            if True:
            	for anno in data_point['annotations']:
                    if anno['id'] == data_point['ground_truth']:
                        gt_x, gt_y, gt_w, gt_h = anno['bbox']
            if self.zip_mode:
                image_fn = os.path.join(self.data_path, iset + '.zip@/' + iset, image_name)
            else:
                image_fn = os.path.join(self.data_path, iset, image_name)
            for sent in ref[image_name]:
                idb = {
                    #'sent_id': sent['sent_id'],
                    #'ann_id': ref['ann_id'],
                    'ref_id': refer_id,
                    'image_id': image_name,
                    'image_fn': image_fn,
                    'width': 1024,
                    'height': 576,
                    'raw': sent,
                    'sent': sent,
                    'tokens': self.tokenizer.tokenize(sent),
                    #'category_id': ref['category_id'],
                    'gt_box': [gt_x, gt_y, gt_x + gt_w, gt_y + gt_h] if not self.test_mode else None
                }
                self.refer.ref_id_to_box[refer_id] = [image_name, [gt_x, gt_y, gt_w, gt_h], sent]
                database.append(idb)
                refer_id += 1

        with open('./final_refer_testset', 'w') as f:
            json.dump(self.refer.ref_id_to_box, f)

        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 10
0
    def load_annotations(self):
        tic = time.time()
        entries = []
        imgid2psid = {}
        count = 0

        if self.image_sets[0] == 'test':
            split_value = 'val'
        else:
            split_value = self.image_sets[0]

        # image to ps id
        index_i = 0
        for i, annotation_ps in enumerate(self.setting):
            if annotation_ps['split'] == split_value:
                self.trainval_index_to_id[index_i] = annotation_ps['id']
                index_i += 1
                if annotation_ps['id'] in self.ps_map:
                    self.ps_map[annotation_ps['id']].append(i)
                else:
                    self.ps_map[annotation_ps['id']] = []
                    self.ps_map[annotation_ps['id']].append(i)
        # for cls, id_each in enumerate(self.ps_map):
        #     self.trainval_id_to_cls[id_each] = cls
        cls_id = 0
        if self.image_sets[0] == 'train':
            self.setting = self.setting[:34054]
        if self.image_sets[0] == 'val':
            self.setting = self.setting[34054:37132]
        if self.image_sets[0] == 'test':
            self.setting = self.setting[37132:]

        for annotation in self.setting:  #[34054:37132]: #[:1000]
            if annotation['split'] != '':  # split_value:
                self.image_nums += 1
                image_id = annotation['file_path']
                imgid2psid[image_id] = annotation['id']

                self.imgid2entry[image_id] = []

                if split_value == 'train':
                    for sentences in annotation['captions']:
                        for i in self.ps_map[annotation['id']]:
                            annotation_sameid = self.setting[i]
                            entries.append({
                                "caption":
                                sentences.split(),
                                'image_id':
                                self.data_path + "/imgs/" +
                                annotation_sameid['file_path'],
                                'id':
                                annotation['id']
                            })
                else:
                    image_id = annotation['file_path']
                    for sentences in annotation['captions']:
                        entries.append({
                            "caption": sentences.split(),
                            'image_id': self.data_path + "/imgs/" + image_id,
                            'id': annotation['id']
                        })
                        count += 1
                if annotation['id'] not in self.trainval_id_to_cls:
                    self.trainval_id_to_cls[annotation['id']] = torch.tensor(
                        cls_id).long()  #lihui
                    cls_id += 1

        return entries

        database = []
        for ref_id, ref in zip(self.refer_ids, self.refs):
            gt_x, gt_y, gt_w, gt_h = self.refer.getRefBox(ref_id=ref_id)
            image_fn = os.path.join(
                self.data_path, iset,
                'COCO_{}_{:012d}.jpg'.format(iset, ref['image_id']))
            for sent in ref['sentences']:
                idb = {
                    'sent_id':
                    sent['sent_id'],
                    'ann_id':
                    ref['ann_id'],
                    'ref_id':
                    ref['ref_id'],
                    'image_id':
                    ref['image_id'],
                    'image_fn':
                    image_fn,
                    'width':
                    self.coco.imgs[ref['image_id']]['width'],
                    'height':
                    self.coco.imgs[ref['image_id']]['height'],
                    'raw':
                    sent['raw'],
                    'sent':
                    sent['sent'],
                    'tokens':
                    sent['tokens'],
                    'category_id':
                    ref['category_id'],
                    'gt_box': [gt_x, gt_y, gt_x + gt_w, gt_y +
                               gt_h] if not self.test_mode else None
                }
                database.append(idb)

        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
    def load_annotations(self):
        tic = time.time()
        database = []
        db_cache_name = 'vqa_cp2_boxes{}_{}'.format(self.boxes, '+'.join(self.image_sets))
        if self.with_precomputed_visual_feat:
            db_cache_name += 'visualprecomp'
        if self.zip_mode:
            db_cache_name = db_cache_name + '_zipmode'
        if self.test_mode:
            db_cache_name = db_cache_name + '_testmode'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name))

        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        print('loading database of split {}...'.format('+'.join(self.image_sets)))
        tic = time.time()

        for ann_file, q_file, coco_path, box_file \
                in zip(self.ann_files, self.q_files, self.coco_datasets, self.precomputed_box_files):
            qs = self._load_json(q_file)
            anns = self._load_json(ann_file) if not self.test_mode else ([None] * len(qs))

            # we need to create 3 coco objects
            coco_train2014 = COCO(self.coco_dataset['train2014'])
            coco_val2014 = COCO(self.coco_dataset['val2014'])
            coco_test2015 = COCO(self.coco_dataset['test2015'])
            for ann, q in zip(anns, qs):
                if q['coco_split'] == 'train2014':
                    coco_obj = coco_train2014
                    box_dir = 'trainval2014'
                elif q['coco_split'] == 'val2014':
                    coco_obj = coco_val2014
                    box_dir = 'trainval2014'
                elif q['coco_split'] == 'test2015':
                    coco_obj = coco_test2015
                    box_dir = 'test2015'
                else:
                    raise ValueError("COCO split in question : {} not supported".format(q['coco_split']))

                idb = {'image_id': q['image_id'],
                       'image_fn': coco_path.format(q['coco_split'], q['coco_split'], q['image_id']),
                       'width': coco_obj.imgs[q['image_id']]['width'],
                       'height': coco_obj.imgs[q['image_id']]['height'],
                       'box_fn': os.path.join(box_file.format(box_dir), '{}.json'.format(q['image_id'])),
                       'question_id': q['question_id'],
                       'question': q['question'],
                       'answers': [a['answer'] for a in ann['answers']] if not self.test_mode else None,
                       'multiple_choice_answer': ann['multiple_choice_answer'] if not self.test_mode else None,
                       "question_type": ann['question_type'] if not self.test_mode else None,
                       "answer_type": ann['answer_type'] if not self.test_mode else None,
                       }
                database.append(idb)

        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 12
0
    def __init__(self,
                 image_set,
                 root_path,
                 data_path,
                 boxes='gt',
                 proposal_source='official',
                 transform=None,
                 test_mode=False,
                 zip_mode=False,
                 cache_mode=False,
                 cache_db=False,
                 ignore_db_cache=True,
                 tokenizer=None,
                 pretrained_model_name=None,
                 add_image_as_a_box=False,
                 mask_size=(14, 14),
                 aspect_grouping=False,
                 parts=1,
                 number_sep=1,
                 part_methods='VS',
                 **kwargs):
        """
        RefCOCO+ Dataset

        :param image_set: image folder name
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to dataset
        :param boxes: boxes to use, 'gt' or 'proposal'
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(Pedes, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'

        self.pedes_annot_files = {
            "trainval": "trainval.json",
        }

        self.vg_proposal = ("vgbua_res101_precomputed",
                            "trainval2014_resnet101_faster_rcnn_genome")
        self.proposal_source = proposal_source
        self.boxes = boxes
        self.test_mode = test_mode

        self.data_path = data_path
        self.root_path = root_path
        self.transform = transform
        self.image_sets = [iset.strip() for iset in image_set.split('+')]
        # self.coco = COCO(annotation_file=os.path.join(data_path, coco_annot_files['train2014']))
        # self.refer = REFER(data_path, dataset='refcoco+', splitBy='unc')
        # self.refer_ids = []
        # for iset in self.image_sets:
        #     self.refer_ids.extend(self.refer.getRefIds(split=iset))
        # self.refs = self.refer.loadRefs(ref_ids=self.refer_ids)

        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        self.trainval_id_to_cls = {}
        self.image_nums = 0
        self.imgid2entry = {}
        self.ps_map = {}
        self.imgid2psid = {}
        self.trainval_index_to_id = {}
        f = open(
            os.path.join(self.data_path, self.pedes_annot_files['trainval']))
        self.setting = json.load(f)
        self.database = self.load_annotations()
        # if self.aspect_grouping:
        #     self.group_ids = self.group_aspect(self.database)
        self.part = parts
        self.max_word = 50

        self.val_images = []
        self.val_boxes = []
        self.val_im_info = []
        self.val_ids = []
        self.val_feat = []
        self.diff = 7

        self.use_JPP = False
        if part_methods == 'KS': self.use_JPP = True

        self.number_sep = number_sep
        self.number_parts = self.number_sep * self.part - self.number_sep + 1

        if self.use_JPP:
            f_box = open(os.path.join(self.data_path,
                                      'result.json'))  #box_frcnn.json
            self.JPP_boxes = json.load(f_box)
Ejemplo n.º 13
0
    def __init__(self,
                 captions_set,
                 ann_file,
                 roi_set,
                 image_set,
                 root_path,
                 data_path,
                 small_version=False,
                 negative_sampling='hard',
                 phrase_cls=True,
                 transform=None,
                 test_mode=False,
                 zip_mode=False,
                 cache_mode=False,
                 cache_db=False,
                 ignore_db_cache=True,
                 basic_tokenizer=None,
                 tokenizer=None,
                 pretrained_model_name=None,
                 add_image_as_a_box=True,
                 on_memory=False,
                 **kwargs):
        """
        Visual Grounded Paraphrase Dataset

        :param ann_file: annotation csv file
        :param image_set: image folder name, e.g., 'vcr1images'
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param kwargs:
        """
        super(VGPDataset, self).__init__()

        # temperarily enable cache mode and see if it works
        # assert not cache_mode, 'currently not support cache mode!'

        self.data_path = data_path
        self.root_path = root_path
        self.captions_set = os.path.join(data_path, captions_set)
        self.ann_file = os.path.join(data_path, ann_file)
        self.roi_set = os.path.join(data_path, roi_set)
        self.image_set = os.path.join(self.data_path, image_set)
        self.small = small_version
        self.neg_sampling = negative_sampling
        self.phrase_cls = phrase_cls
        self.transform = transform
        self.test_mode = test_mode
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.on_memory = False  # mode True doesn't work
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.basic_tokenizer = basic_tokenizer if basic_tokenizer is not None \
            else BasicTokenizer(do_lower_case=True)
        if tokenizer is None:
            if pretrained_model_name is None:
                pretrained_model_name = 'bert-base-uncased'
            if 'roberta' in pretrained_model_name:
                tokenizer = RobertaTokenizer.from_pretrained(
                    pretrained_model_name)
            else:
                tokenizer = BertTokenizer.from_pretrained(
                    pretrained_model_name)
        self.tokenizer = tokenizer

        if zip_mode:
            self.zipreader = ZipReader()

        self.database = self.load_captions(self.captions_set)
Ejemplo n.º 14
0
    def __init__(self,
                 ann_file,
                 image_set,
                 root_path,
                 data_path,
                 seq_len=64,
                 with_precomputed_visual_feat=False,
                 mask_raw_pixels=True,
                 with_rel_task=True,
                 with_mlm_task=True,
                 with_mvrc_task=True,
                 transform=None,
                 test_mode=False,
                 zip_mode=False,
                 cache_mode=False,
                 cache_db=False,
                 ignore_db_cache=True,
                 tokenizer=None,
                 pretrained_model_name=None,
                 add_image_as_a_box=False,
                 aspect_grouping=False,
                 **kwargs):
        """
        Conceptual Captions Dataset

        :param ann_file: annotation jsonl file
        :param image_set: image folder name, e.g., 'vcr1images'
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(ParallelTextDataset, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        assert not test_mode

        annot = {
            'train': 'train.json',
            'val': 'test.json',
            'test': 'test.json'
        }

        self.seq_len = seq_len
        self.with_rel_task = with_rel_task
        self.with_mlm_task = with_mlm_task
        self.with_mvrc_task = with_mvrc_task
        self.data_path = data_path
        self.root_path = root_path
        self.ann_file = os.path.join(data_path, annot[image_set])
        self.with_precomputed_visual_feat = with_precomputed_visual_feat
        self.mask_raw_pixels = mask_raw_pixels
        self.image_set = image_set
        self.transform = transform
        self.test_mode = test_mode
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        self.zipreader = ZipReader()

        # FM: Customise for multi30k dataset
        self.database = list(jsonlines.open(self.ann_file))

        if self.aspect_grouping:
            assert False, "not support aspect grouping currently!"
            self.group_ids = self.group_aspect(self.database)

        print('mask_raw_pixels: ', self.mask_raw_pixels)
Ejemplo n.º 15
0
    def load_captions(self, captions_set):
        database = []
        db_cache_name = 'vgp_nometa'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root,
                                     '{}.pkl'.format(db_cache_name))
        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(
                        db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        print('loading database from {} and creating pairs...'.format(
            captions_set))
        tic = time.time()
        if self.neg_sampling == "hard":
            path_similarities = os.path.join(self.captions_set,
                                             "similarities.csv")
            if not os.path.exists(path_similarities):
                print(
                    "It seems hard negative mining has not been done for this set of captions, run it now"
                )
                model_path = os.path.join(
                    os.getcwd(),
                    "model/pretrained_model/resnet101-pt-vgbua-0000.model")
                main(self.captions_set,
                     self.image_set,
                     model_path,
                     batch_size=4,
                     n_neighbors=20,
                     use_saved=True)
            similarities_df = pd.read_csv(path_similarities)
        if self.phrase_cls:
            phrases_df = pd.read_csv(self.ann_file)
        img_id_list = np.array(os.listdir(captions_set))
        for k, folder in enumerate(img_id_list):
            if folder.endswith(".txt"):
                img_id = folder[:-4]
                path = os.path.join(captions_set, folder)
                # Avoid ascii errors for some captions
                try:
                    list_captions = open(path).read().split("\n")[:-1]
                except UnicodeDecodeError:
                    list_captions = open(
                        path, 'r+', encoding="utf-8").read().split("\n")[:-1]

                if self.small:
                    positive_captions = np.random.choice(list_captions,
                                                         2,
                                                         replace=False)
                    n_negative = 1
                else:
                    positive_captions = list_captions
                    n_negative = 2
                # Create pairs of captions that describe the same image
                for i in range(len(positive_captions)):
                    for j in range(i):
                        # create a unique id for each instance in the data set
                        pair_id = "{}_{}_{}".format(str(k), str(i), str(j))
                        db_i = {
                            'pair_id': pair_id,
                            'img_id': img_id,
                            'caption1': list_captions[i],
                            'caption2': list_captions[j],
                            'label': 0
                        }
                        if self.phrase_cls:
                            db_i["phrases_1"], db_i["phrases_2"], \
                            db_i["phrase_labels"] = get_clean_phrases(phrases_df, img_id, list_captions[i],
                                                                      list_captions[j])
                        if self.on_memory:
                            # db_i["image"] = open(os.path.join(self.image_set, img_id + ".jpg"), "rb")
                            image = Image.open(
                                os.path.join(self.image_set, img_id + ".jpg"))
                            db_i["image"] = image.copy()
                            image.close()

                        database.append(db_i)

                # Select one or two negative captions
                if self.neg_sampling == 'random':
                    other_imgs = img_id_list[img_id_list != folder]
                    # Fix the seed to have data set reproducibility
                    np.random.seed(k)
                    neg_image = np.random.choice(other_imgs, size=1)[0]
                    neg_path = os.path.join(captions_set, neg_image)
                else:
                    if self.neg_sampling != "hard":
                        print(
                            "{} negative sampling is not supported, hard negative sampling will "
                            "be used".format(self.neg_sampling))
                    similar_img_idx = similarities_df[
                        similarities_df["img_id"] == int(
                            img_id)]["2"].values[0]
                    neg_img = similarities_df.iloc[similar_img_idx]["img_id"]
                    neg_path = os.path.join(captions_set,
                                            str(neg_img) + ".txt")

                # Create negative pairs
                # Avoid ascii errors for some captions
                try:
                    neg_captions = open(neg_path).read().split("\n")[:-1]
                except UnicodeDecodeError:
                    neg_captions = open(
                        neg_path, 'r+',
                        encoding="utf-8").read().split("\n")[:-1]
                neg_captions = np.random.choice(neg_captions,
                                                size=n_negative,
                                                replace=False)
                for idx, caption in enumerate(positive_captions):
                    # if we want the small data set only create one negative pair
                    if self.small and idx > 0:
                        break
                    else:
                        for idx_bis, wrong_caption in enumerate(neg_captions):
                            # Randomly flip whether the wrong caption comes first or second, fix the seed for every image
                            np.random.seed(k + idx + idx_bis)
                            flip = np.random.randint(2, size=1).astype(bool)[0]
                            pair_id = "{}_{}_{}".format(
                                str(k), str(idx),
                                str(idx_bis + len(positive_captions)))
                            db_i = {
                                'pair_id': pair_id,
                                'img_id': img_id,
                                'label': 1 + flip
                            }
                            if flip:
                                db_i['caption1'] = wrong_caption
                                db_i['caption2'] = caption
                            else:
                                db_i['caption1'] = caption
                                db_i['caption2'] = wrong_caption

                            if self.on_memory:
                                # db_i["image"] = open(os.path.join(self.image_set, img_id + ".jpg"), "rb")
                                image = Image.open(
                                    os.path.join(self.image_set,
                                                 img_id + ".jpg"))
                                db_i["image"] = image.copy()
                                image.close()
                            database.append(db_i)
            else:
                continue
        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 16
0
    def load_annotations(self):
        tic = time.time()
        database = []
        if self.use_imdb:
            db_cache_name = 'vqa2_imdb_boxes{}_{}'.format(self.boxes, '+'.join(self.image_sets))
        else:
            db_cache_name = 'vqa2_nonimdb_boxes{}_{}'.format(self.boxes, '+'.join(self.image_sets))
        if self.with_precomputed_visual_feat:
            db_cache_name += 'visualprecomp'
        if self.zip_mode:
            db_cache_name = db_cache_name + '_zipmode'
        if self.test_mode:
            db_cache_name = db_cache_name + '_testmode'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root, '{}.pkl'.format(db_cache_name))

        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        print('loading database of split {}...'.format('+'.join(self.image_sets)))
        tic = time.time()

        if self.use_imdb:
            for imdb_file, (coco_path, coco_annot), box_file \
                    in zip(self.imdb_files, self.coco_datasets, self.precomputed_box_files):
                print("loading imdb: {}".format(imdb_file))
                imdb = np.load(imdb_file, allow_pickle=True)
                print("imdb info:")
                pprint.pprint(imdb[0])

                coco = COCO(coco_annot)
                for item in imdb[1:]:
                    idb = {'image_id': item['image_id'],
                           'image_fn': coco_path.format(item['image_id']),
                           'width': coco.imgs[item['image_id']]['width'],
                           'height': coco.imgs[item['image_id']]['height'],
                           'box_fn': os.path.join(box_file, '{}.json'.format(item['image_id'])),
                           'question_id': item['question_id'],
                           'question_tokens': item['question_tokens'],
                           'answers': item['answers'] if not self.test_mode else None,
                           }
                    database.append(idb)
        else:
            for ann_file, q_file, (coco_path, coco_annot), box_file \
                    in zip(self.ann_files, self.q_files, self.coco_datasets, self.precomputed_box_files):
                qs = self._load_json(q_file)['questions']
                anns = self._load_json(ann_file)['annotations'] if not self.test_mode else ([None] * len(qs))
                coco = COCO(coco_annot)
                for ann, q in zip(anns, qs):
                    idb = {'image_id': q['image_id'],
                           'image_fn': coco_path.format(q['image_id']),
                           'width': coco.imgs[q['image_id']]['width'],
                           'height': coco.imgs[q['image_id']]['height'],
                           'box_fn': os.path.join(box_file, '{}.json'.format(q['image_id'])),
                           'question_id': q['question_id'],
                           'question': q['question'],
                           'answers': [a['answer'] for a in ann['answers']] if not self.test_mode else None,
                           'multiple_choice_answer': ann['multiple_choice_answer'] if not self.test_mode else None,
                           "question_type": ann['question_type'] if not self.test_mode else None,
                           "answer_type": ann['answer_type'] if not self.test_mode else None,
                           }
                    database.append(idb)

        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 17
0
    def __init__(self, ann_file, image_set, root_path, data_path, seq_len=64,
                 with_precomputed_visual_feat=False, mask_raw_pixels=True,
                 with_rel_task=True, with_mlm_task=False, with_mvrc_task=False,
                 transform=None, test_mode=False,
                 zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True,
                 tokenizer=None, pretrained_model_name=None,
                 add_image_as_a_box=False,
                 aspect_grouping=False, languages_used='first', **kwargs):
        """
        Conceptual Captions Dataset

        :param ann_file: annotation jsonl file
        :param image_set: image folder name, e.g., 'vcr1images'
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(Multi30kDataset_5x_Mixed, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        # TODO: need to remove this to allows testing
        # assert not test_mode

        annot = {'train': 'train_frcnn_5captions_both.json',
                 'val': 'val_frcnn.json',
                 'test2015': 'test_frcnn.json'}

        self.seq_len = seq_len
        self.with_rel_task = with_rel_task
        self.with_mlm_task = with_mlm_task
        self.with_mvrc_task = with_mvrc_task
        self.data_path = data_path
        self.root_path = root_path
        self.ann_file = os.path.join(data_path, annot[image_set])
        self.with_precomputed_visual_feat = with_precomputed_visual_feat
        self.mask_raw_pixels = mask_raw_pixels
        self.image_set = image_set
        self.transform = transform
        self.test_mode = test_mode
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        #FM edit: added option for how many captions
        self.languages_used = languages_used
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        self.zipreader = ZipReader()

        # FM: Customise for multi30k dataset
        if not self.test_mode:
            self.database = list(jsonlines.open(self.ann_file))
            db_size = len(self.database)   
            print('**************')         
            print('Size before: ', db_size)         
            if not self.zip_mode:
                for i, idb in enumerate(self.database):
                    self.database[i]['frcnn'] = idb['frcnn'].replace('.zip@', '')\
                        .replace('.0', '').replace('.1', '').replace('.2', '').replace('.3', '')
                    self.database[i]['image'] = idb['image'].replace('.zip@', '')

            # double database - one is used for english one for german
            database_2 = copy.deepcopy(self.database)
            self.database = self.database + database_2
            print('**************')         
            print('Size after: ', len(self.database)) 
            for i, idb in enumerate(self.database):
                if i<db_size:
                    self.database[i]['lang'] = 'first'
                else:
                    self.database[i]['lang'] = 'second'
        # FM edit: create dataset for test mode 
        else:
            self.simple_database = list(jsonlines.open(self.ann_file))
            if not self.zip_mode:
                for i, idb in enumerate(self.simple_database):
                    self.simple_database[i]['frcnn'] = idb['frcnn'].replace('.zip@', '')\
                        .replace('.0', '').replace('.1', '').replace('.2', '').replace('.3', '')
                    self.simple_database[i]['image'] = idb['image'].replace('.zip@', '')
            # create database cross-coupling each caption with all images
            self.database = []
            db_index = 0
            for x, idb_x in enumerate(self.simple_database):
                for y, idb_y in enumerate(self.simple_database):                    
                    self.database.append({})
                    self.database[db_index]['label'] = 1.0 if x==y else 0.0
                    self.database[db_index]['caption_en'] = self.simple_database[x]['caption_en']
                    self.database[db_index]['caption_de'] = self.simple_database[x]['caption_de']
                    self.database[db_index]['image'] = self.simple_database[y]['image']
                    self.database[db_index]['frcnn'] = self.simple_database[y]['frcnn']
                    self.database[db_index]['caption_index'] = x
                    self.database[db_index]['image_index'] = y
                    db_index += 1

        if self.aspect_grouping:
            assert False, "not support aspect grouping currently!"
            self.group_ids = self.group_aspect(self.database)

        print('mask_raw_pixels: ', self.mask_raw_pixels)
Ejemplo n.º 18
0
    def load_annotations(self):
        tic = time.time()
        database = []
        db_cache_name = 'foil_{}'.format(self.image_set)
        if self.zip_mode:
            db_cache_name = db_cache_name + '_zipmode'
        if self.test_mode:
            db_cache_name = db_cache_name + '_testmode'
        db_cache_root = os.path.join(self.root_path, 'cache')
        db_cache_path = os.path.join(db_cache_root,
                                     '{}.pkl'.format(db_cache_name))

        if os.path.exists(db_cache_path):
            if not self.ignore_db_cache:
                # reading cached database
                print('cached database found in {}.'.format(db_cache_path))
                with open(db_cache_path, 'rb') as f:
                    print('loading cached database from {}...'.format(
                        db_cache_path))
                    tic = time.time()
                    database = cPickle.load(f)
                    print('Done (t={:.2f}s)'.format(time.time() - tic))
                    return database
            else:
                print('cached database ignored.')

        # ignore or not find cached database, reload it from annotation file
        print('loading database of split {}...'.format(self.image_set))
        tic = time.time()

        for foil_id, foil in zip(self.foil_ids, self.foils):
            iset = 'train2014'
            if self.zip_mode:
                image_fn = os.path.join(
                    self.data_path, iset + '.zip@/' + iset,
                    'COCO_{}_{:012d}.jpg'.format(iset, foil['image_id']))
            else:
                image_fn = os.path.join(
                    self.root_path, self.data_path, iset,
                    'COCO_{}_{:012d}.jpg'.format(iset, foil['image_id']))

            expression_tokens = self.tokenizer.basic_tokenizer.tokenize(
                foil['caption'])
            expression_wps = []
            for token in expression_tokens:
                expression_wps.extend(
                    self.tokenizer.wordpiece_tokenizer.tokenize(token))

            word_offsets = [0]

            for i, wp in enumerate(expression_wps):
                if wp[0] == '#':
                    #still inside single word
                    continue
                else:
                    #this is the beginning of a new word
                    word_offsets.append(i)

            word_offsets.append(len(expression_wps))

            target_word = foil['target_word']
            foil_word = foil['foil_word']
            target_wps = None
            target_pos = -1
            if foil['foil']:
                foil_wps = self.tokenizer.wordpiece_tokenizer.tokenize(
                    foil_word)
                twps_len = len(foil_wps)
                for i in range(len(expression_wps) - twps_len):
                    if expression_wps[i:i + twps_len] == foil_wps:
                        target_pos = i
                        break
            else:
                twps_len = 1
            idb = {
                'ann_id': foil['id'],
                'foil_id': foil['foil_id'],
                'image_id': foil['image_id'],
                'image_fn': image_fn,
                'width': self.coco.imgs[foil['image_id']]['width'],
                'height': self.coco.imgs[foil['image_id']]['height'],
                'caption': foil['caption'].strip(),
                'caption_tokens': expression_wps,
                'target_word': foil['target_word'],
                'target': self.stoi.get(foil['target_word'], 0),
                'foil_word': foil['foil_word'],
                'label': foil['foil'],
                'pos': target_pos,
                'mask': twps_len
            }
            database.append(idb)

        print('Done (t={:.2f}s)'.format(time.time() - tic))

        # cache database via cPickle
        if self.cache_db:
            print('caching database to {}...'.format(db_cache_path))
            tic = time.time()
            if not os.path.exists(db_cache_root):
                makedirsExist(db_cache_root)
            with open(db_cache_path, 'wb') as f:
                cPickle.dump(database, f)
            print('Done (t={:.2f}s)'.format(time.time() - tic))

        return database
Ejemplo n.º 19
0
    def __init__(self,
                 root_path,
                 data_path,
                 boxes='gt',
                 proposal_source='official',
                 transform=None,
                 test_mode=False,
                 zip_mode=False,
                 cache_mode=False,
                 cache_db=False,
                 ignore_db_cache=True,
                 tokenizer=None,
                 pretrained_model_name=None,
                 add_image_as_a_box=False,
                 mask_size=(14, 14),
                 aspect_grouping=False,
                 **kwargs):
        """
        Foil Dataset

        :param image_set: image folder name
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to dataset
        :param boxes: boxes to use, 'gt' or 'proposal'
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(Foil, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'

        coco_annot_files = {
            "train2014": "annotations/instances_train2014.json",
            "val2014": "annotations/instances_val2014.json",
            "test2015": "annotations/image_info_test2015.json",
        }

        foil_annot_files = {
            "train": "foil/foilv1.0_train_2017.json",
            "test": "foil/foilv1.0_test_2017.json"
        }

        foil_vocab_file = "foil/vocab.txt"

        self.vg_proposal = ("vgbua_res101_precomputed",
                            "trainval2014_resnet101_faster_rcnn_genome")

        self.test_mode = test_mode
        self.data_path = data_path
        self.root_path = root_path
        self.transform = transform

        vocab_file = open(os.path.join(data_path, foil_vocab_file), 'r')
        vocab_lines = vocab_file.readlines()
        vocab_lines = [v.strip() for v in vocab_lines]
        self.itos = vocab_lines
        self.stoi = dict(list(zip(self.itos, range(len(vocab_lines)))))

        if self.test_mode:
            self.image_set = "val2014"
            coco_annot_file = coco_annot_files["val2014"]
        else:
            self.image_set = "train2014"
            coco_annot_file = coco_annot_files["train2014"]

        self.coco = COCO(
            annotation_file=os.path.join(data_path, coco_annot_file))
        self.foil = FOIL(data_path, 'train' if not test_mode else 'test')
        self.foil_ids = list(self.foil.Foils.keys())
        self.foils = self.foil.loadFoils(foil_ids=self.foil_ids)
        if 'proposal' in boxes:
            with open(os.path.join(data_path, proposal_dets), 'r') as f:
                proposal_list = json.load(f)
            self.proposals = {}
            for proposal in proposal_list:
                image_id = proposal['image_id']
                if image_id in self.proposals:
                    self.proposals[image_id].append(proposal['box'])
                else:
                    self.proposals[image_id] = [proposal['box']]
        self.boxes = boxes
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        if zip_mode:
            self.zipreader = ZipReader()

        self.database = self.load_annotations()
        if self.aspect_grouping:
            self.group_ids = self.group_aspect(self.database)
    def __init__(self, image_set, root_path, data_path, answer_vocab_file, use_imdb=True,
                 with_precomputed_visual_feat=False, boxes="36",
                 transform=None, test_mode=False,
                 zip_mode=False, cache_mode=False, cache_db=True, ignore_db_cache=True,
                 tokenizer=None, pretrained_model_name=None,
                 add_image_as_a_box=False, mask_size=(14, 14),
                 aspect_grouping=False, toy_dataset=False, toy_samples=128, **kwargs):
        """
        Visual Question Answering Dataset

        :param image_set: image folder name
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(VQA_CP, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'

        categories = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
                      'boat',
                      'trafficlight', 'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat', 'dog', 'horse',
                      'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
                      'suitcase', 'frisbee', 'skis', 'snowboard', 'sportsball', 'kite', 'baseballbat', 'baseballglove',
                      'skateboard', 'surfboard', 'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife', 'spoon',
                      'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'donut',
                      'cake', 'chair', 'couch', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tv', 'laptop', 'mouse',
                      'remote', 'keyboard', 'cellphone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
                      'clock', 'vase', 'scissors', 'teddybear', 'hairdrier', 'toothbrush']
        vqa_question = {
            "train": "vqa/vqacp_v2_train_questions.json",
            "val": "vqa/vqacp_v2_test_questions.json",
        }
        vqa_annot = {
            "train": "vqa/vqacp_v2_train_annotations.json",
            "val": "vqa/vqacp_v2_test_annotations.json",
        }
        
        if boxes == "36":
            precomputed_boxes = {
                'train': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome_36"),
                'val': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome_36"),
            }
        elif boxes == "10-100ada":
            precomputed_boxes = {
                'train': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome"),
                'val': ("vgbua_res101_precomputed", "{}_resnet101_faster_rcnn_genome"),
            }
        else:
            raise ValueError("Not support boxes: {}!".format(boxes))

        self.coco_dataset = {
            "train2014": os.path.join(data_path, "annotations", "instances_train2014.json"),
            "val2014": os.path.join(data_path, "annotations", "instances_val2014.json"),
            "test-dev2015": os.path.join(data_path, "annotations", "image_info_test-dev2015.json"),
            "test2015": os.path.join(data_path, "annotations", "image_info_test2015.json"),
        }

        self.periodStrip = re.compile("(?!<=\d)(\.)(?!\d)")
        self.commaStrip = re.compile("(\d)(\,)(\d)")
        self.punct = [';', r"/", '[', ']', '"', '{', '}',
                      '(', ')', '=', '+', '\\', '_', '-',
                      '>', '<', '@', '`', ',', '?', '!']

        self.boxes = boxes
        self.test_mode = test_mode
        self.with_precomputed_visual_feat = with_precomputed_visual_feat
        self.category_to_idx = {c: i for i, c in enumerate(categories)}
        self.data_path = data_path
        self.root_path = root_path

        # load the answer vocab file: same as vqav2 dataset
        with open(answer_vocab_file, 'r', encoding='utf8') as f:
            self.answer_vocab = [w.lower().strip().strip('\r').strip('\n').strip('\r') for w in f.readlines()]
            self.answer_vocab = list(filter(lambda x: x != '', self.answer_vocab))
            self.answer_vocab = [self.processPunctuation(w) for w in self.answer_vocab]

        # The config.DATA.TRAIN_IMAGE_SET and config.DATA.VAL_IMAGE_SET have
        # a little different use here, it indicates the mode 'train' or 'val'
        self.image_sets = [iset.strip() for iset in image_set.split('+')]
        self.ann_files = [os.path.join(data_path, vqa_annot[iset]) for iset in self.image_sets] \
            if not self.test_mode else [None for iset in self.image_sets]
        self.q_files = [os.path.join(data_path, vqa_question[iset]) for iset in self.image_sets]

        self.precomputed_box_files = [
            os.path.join(data_path, precomputed_boxes[iset][0], precomputed_boxes[iset][1]) for iset in self.image_sets]

        self.box_bank = {}
        self.coco_datasets = [os.path.join(data_path, '{}', 'COCO_{}_{{:012d}}.jpg') for iset in self.image_sets]

        self.transform = transform
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size

        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        if zip_mode:
            self.zipreader = ZipReader()

        self.database = self.load_annotations()
        if self.aspect_grouping:
            self.group_ids = self.group_aspect(self.database)

        # toy dataset
        if toy_dataset:
            print(f"Using the toy dataset!! Total samples = {toy_samples}")
            self.database = self.database[:toy_samples]
Ejemplo n.º 21
0
    def __init__(self, ann_file, image_set, root_path, data_path, transform=None, task='Q2A', test_mode=False,
                 zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True,
                 basic_tokenizer=None, tokenizer=None, pretrained_model_name=None,
                 only_use_relevant_dets=False, add_image_as_a_box=False, mask_size=(14, 14),
                 aspect_grouping=False, basic_align=False, qa2r_noq=False, qa2r_aug=False,
                 seq_len=64,
                 **kwargs):
        """
        Visual Commonsense Reasoning Dataset

        :param ann_file: annotation jsonl file
        :param image_set: image folder name, e.g., 'vcr1images'
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param task: 'Q2A' means question to answer, 'QA2R' means question and answer to rationale,
                     'Q2AR' means question to answer and rationale
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param only_use_relevant_dets: filter out detections not used in query and response
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param basic_align: align to tokens retokenized by basic_tokenizer
        :param qa2r_noq: in QA->R, the query contains only the correct answer, without question
        :param qa2r_aug: in QA->R, whether to augment choices to include those with wrong answer in query
        :param kwargs:
        """
        super(VCRDataset, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        assert task in ['Q2A', 'QA2R', 'Q2AR'] , 'not support task {}'.format(task)
        assert not qa2r_aug, "Not implemented!"

        self.qa2r_noq = qa2r_noq
        self.qa2r_aug = qa2r_aug

        self.seq_len = seq_len

        categories = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
                      'trafficlight', 'firehydrant', 'stopsign', 'parkingmeter', 'bench', 'bird', 'cat', 'dog', 'horse',
                      'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
                      'suitcase', 'frisbee', 'skis', 'snowboard', 'sportsball', 'kite', 'baseballbat', 'baseballglove',
                      'skateboard', 'surfboard', 'tennisracket', 'bottle', 'wineglass', 'cup', 'fork', 'knife', 'spoon',
                      'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hotdog', 'pizza', 'donut',
                      'cake', 'chair', 'couch', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tv', 'laptop', 'mouse',
                      'remote', 'keyboard', 'cellphone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
                      'clock', 'vase', 'scissors', 'teddybear', 'hairdrier', 'toothbrush']
        self.category_to_idx = {c: i for i, c in enumerate(categories)}
        self.data_path = data_path
        self.root_path = root_path
        self.ann_file = os.path.join(data_path, ann_file)
        self.image_set = image_set
        self.transform = transform
        self.task = task
        self.test_mode = test_mode
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.basic_align = basic_align
        print('Dataset Basic Align: {}'.format(self.basic_align))
        self.cache_dir = os.path.join(root_path, 'cache')
        self.only_use_relevant_dets = only_use_relevant_dets
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.basic_tokenizer = basic_tokenizer if basic_tokenizer is not None \
            else BasicTokenizer(do_lower_case=True)
        if tokenizer is None:
            if pretrained_model_name is None:
                pretrained_model_name = 'bert-base-uncased'
            if 'roberta' in pretrained_model_name:
                tokenizer = RobertaTokenizer.from_pretrained(pretrained_model_name, cache_dir=self.cache_dir)
            else:
                tokenizer = BertTokenizer.from_pretrained(pretrained_model_name, cache_dir=self.cache_dir)
        self.tokenizer = tokenizer

        if zip_mode:
            self.zipreader = ZipReader()

        self.database = self.load_annotations(self.ann_file)
        if self.aspect_grouping:
            assert False, "Not support aspect grouping now!"
            self.group_ids = self.group_aspect(self.database)

        self.person_name_id = 0
Ejemplo n.º 22
0
    def __init__(self, ann_file, image_set, root_path, data_path, seq_len=64,
                 with_precomputed_visual_feat=False, mask_raw_pixels=True,
                 with_rel_task=True, with_mlm_task=False, with_mvrc_task=False,
                 transform=None, test_mode=False,
                 zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True,
                 tokenizer=None, pretrained_model_name=None,
                 add_image_as_a_box=False,
                 aspect_grouping=False, languages_used='first', MLT_vocab='bert-base-german-cased-vocab.txt', **kwargs):
        """
        Conceptual Captions Dataset

        :param ann_file: annotation jsonl file
        :param image_set: image folder name, e.g., 'vcr1images'
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to vcr dataset
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(Multi30kDataset2018, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        # TODO: need to remove this to allows testing
        # assert not test_mode

        annot = {'train': 'train_MLT_frcnn.json',
                 'val': 'val_MLT_frcnn.json',
                 'test2015': 'test_MLT_2018_renamed_frcnn.json'}

        self.seq_len = seq_len
        self.with_rel_task = with_rel_task
        self.with_mlm_task = with_mlm_task
        self.with_mvrc_task = with_mvrc_task
        self.data_path = data_path
        self.root_path = root_path
        self.ann_file = os.path.join(data_path, annot[image_set])
        self.with_precomputed_visual_feat = with_precomputed_visual_feat
        self.mask_raw_pixels = mask_raw_pixels
        self.image_set = image_set
        self.transform = transform
        self.test_mode = test_mode
        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        #FM edit: added option for how many captions
        self.languages_used = languages_used
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        self.zipreader = ZipReader()

        # FM: Customise for multi30k dataset
        self.database = list(jsonlines.open(self.ann_file))
        if not self.zip_mode:
            for i, idb in enumerate(self.database):
                self.database[i]['frcnn'] = idb['frcnn'].replace('.zip@', '')\
                    .replace('.0', '').replace('.1', '').replace('.2', '').replace('.3', '')
                self.database[i]['image'] = idb['image'].replace('.zip@', '')


        if self.aspect_grouping:
            assert False, "not support aspect grouping currently!"
            self.group_ids = self.group_aspect(self.database)

        print('mask_raw_pixels: ', self.mask_raw_pixels)

        #FM: initialise vocabulary for output
        self.MLT_vocab_path = os.path.join(root_path, 'model/pretrained_model', MLT_vocab)
        self.MLT_vocab = []
        with open(self.MLT_vocab_path) as fp:
            for cnt, line in enumerate(fp):
                self.MLT_vocab.append(line.strip())
Ejemplo n.º 23
0
    def __init__(self, image_set, root_path, data_path, boxes='gt', proposal_source='official',
                 transform=None, test_mode=False,
                 zip_mode=False, cache_mode=False, cache_db=False, ignore_db_cache=True,
                 tokenizer=None, pretrained_model_name=None,
                 add_image_as_a_box=False, mask_size=(14, 14),
                 aspect_grouping=False, **kwargs):
        """
        Market1501 Dataset

        :param image_set: image folder name
        :param root_path: root path to cache database loaded from annotation file
        :param data_path: path to dataset
        :param boxes: boxes to use, 'gt' or 'proposal'
        :param transform: transform
        :param test_mode: test mode means no labels available
        :param zip_mode: reading images and metadata in zip archive
        :param cache_mode: cache whole dataset to RAM first, then __getitem__ read them from RAM
        :param ignore_db_cache: ignore previous cached database, reload it from annotation file
        :param tokenizer: default is BertTokenizer from pytorch_pretrained_bert
        :param add_image_as_a_box: add whole image as a box
        :param mask_size: size of instance mask of each object
        :param aspect_grouping: whether to group images via their aspect
        :param kwargs:
        """
        super(PA100K, self).__init__()

        assert not cache_mode, 'currently not support cache mode!'
        self.vg_proposal = ("vgbua_res101_precomputed", "trainval2014_resnet101_faster_rcnn_genome")
        self.proposal_source = proposal_source
        self.boxes = boxes
        self.test_mode = test_mode

        self.data_path = data_path
        self.root_path = root_path
        self.transform = transform
        self.image_sets = [iset.strip() for iset in image_set.split('+')]

        self.zip_mode = zip_mode
        self.cache_mode = cache_mode
        self.cache_db = cache_db
        self.ignore_db_cache = ignore_db_cache
        self.aspect_grouping = aspect_grouping
        self.cache_dir = os.path.join(root_path, 'cache')
        self.add_image_as_a_box = add_image_as_a_box
        self.mask_size = mask_size
        if not os.path.exists(self.cache_dir):
            makedirsExist(self.cache_dir)
        self.tokenizer = tokenizer if tokenizer is not None \
            else BertTokenizer.from_pretrained(
            'bert-base-uncased' if pretrained_model_name is None else pretrained_model_name,
            cache_dir=self.cache_dir)

        self.trainval_id_to_cls = {}
        self.image_nums = 0
        # self.imgid2entry = {}
        self.ps_map = {}
        self.imgid2psid = {}
        self.trainval_index_to_id = {}

        self.image_entries = []
        self.pa100k_attribute = self.generate_data_description()
        self.database = self.load_annotations(self.pa100k_attribute)
        # if self.aspect_grouping:
        #     self.group_ids = self.group_aspect(self.database)
        self.part = 7
        self.max_boxes = 7

        self.max_word = 26

        self.val_images = []
        self.val_boxes = []
        self.val_im_info = []
        self.val_ids = []
        self.val_feat = []

        self.diff = 2