Exemple #1
0
    def __init__(self, mode):
        """
        :param mode: train2014 or val2014
        """
        self.mode = mode
        self.root = os.path.join(COCO_PATH, mode)
        self.ann_file = os.path.join(COCO_PATH, 'annotations', 'instances_{}.json'.format(mode))
        self.coco = COCO(self.ann_file)
        self.ids = [k for k in self.coco.imgs.keys() if len(self.coco.imgToAnns[k]) > 0]


        tform = []
        if self.is_train:
             tform.append(RandomOrder([
                 Grayscale(),
                 Brightness(),
                 Contrast(),
                 Sharpness(),
                 Hue(),
             ]))

        tform += [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]

        self.transform_pipeline = Compose(tform)
        self.ind_to_classes = ['__background__'] + [v['name'] for k, v in self.coco.cats.items()]
        # COCO inds are weird (84 inds in total but a bunch of numbers are skipped)
        self.id_to_ind = {coco_id:(ind+1) for ind, coco_id in enumerate(self.coco.cats.keys())}
        self.id_to_ind[0] = 0

        self.ind_to_id = {x:y for y,x in self.id_to_ind.items()}
    def __init__(self,
                 mode,
                 dict_file=CF_DICT_FN,
                 data_file=CF_IM_DATA_FN,
                 num_im=-1,
                 num_val_im=5000):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.dict_file = dict_file
        self.data_file = data_file

        self.filenames, self.gt_classes = load_data(data_file,
                                                    CF_IMAGES,
                                                    self.mode,
                                                    num_im,
                                                    num_val_im=num_val_im)

        self.ind_to_classes = load_info(dict_file)

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Exemple #3
0
 def __init__(self):
     '''
     self.filenames = ['*****@*****.**',
                         '*****@*****.**',
                         '*****@*****.**',
                         '*****@*****.**',
                         '*****@*****.**']
     '''
     self.filenames = glob.glob(
         '/home/suji/spring20/vilbert_beta/data/VCR/vcr1images/*/*.jpg'
     )[:100]
     print("VCRDataset filenames:", self.filenames)
     tform = [
         SquarePad(),
         Resize(IM_SCALE),
         ToTensor(),
         Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     ]
     self.transform_pipeline = Compose(tform)
Exemple #4
0
def custom(filename):

    IM_SCALE = 592

    image_unpadded = Image.open(filename)

    tform = [
        SquarePad(),
        Resize(IM_SCALE),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]

    result = Compose(tform)

    #print (image_unpadded.siz
    a = result(image_unpadded)
    b = F.to_pil_image(a)
    c = Variable(a.view(-1, 3, 592, 592))
    w, h = image_unpadded.size
    img_scale_factor = IM_SCALE / max(w, h)
    im_size = (IM_SCALE, int(w * img_scale_factor), img_scale_factor)
    return c, np.asarray([im_size])
Exemple #5
0
    def __init__(self,
                 mode,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True):
        """
        Torch dataset for VRD
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        """
        if mode not in ('test', 'train'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.anno_file = VRD_TRAIN if self.mode == 'train' else VRD_TEST
        self.image_root = VRD_TRAIN_IMAGES if self.mode == 'train' else VRD_TEST_IMAGES
        self.filter_non_overlap = (filter_non_overlap and self.mode == 'train')
        self.filter_duplicate_rels = (filter_duplicate_rels
                                      and self.mode == 'train')

        self.gt_boxes, self.gt_classes, self.relationships, self.filenames = load_graphs(
            self.anno_file,
            filter_non_overlap=self.filter_non_overlap and self.is_train,
        )

        self.ind_to_classes, self.ind_to_predicates = load_info(VRD_LABELS)
        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Exemple #6
0
    def __init__(self,
                 mode,
                 roidb_file=VG_SGG_FN,
                 dict_file=VG_SGG_DICT_FN,
                 image_file=IM_DATA_FN,
                 filter_empty_rels=True,
                 num_im=-1,
                 num_val_im=5000,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True,
                 use_proposals=False):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.roidb_file = roidb_file
        self.dict_file = dict_file
        self.image_file = image_file
        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'

        self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
            self.roidb_file,
            self.mode,
            num_im,
            num_val_im=num_val_im,
            filter_empty_rels=filter_empty_rels,
            filter_non_overlap=self.filter_non_overlap and self.is_train,
        )

        self.filenames = load_image_filenames(image_file)
        # CHANGE
        index_list = np.where(self.split_mask)[0]
        trimmed_list = list(
            filter(lambda x: x < len(self.filenames), index_list))
        print("num files, num of files that will be used", len(self.filenames),
              len(trimmed_list))
        # END
        self.filenames = [self.filenames[i] for i in trimmed_list]

        self.ind_to_classes, self.ind_to_predicates = load_info(dict_file)

        if use_proposals:
            print("Loading proposals", flush=True)
            p_h5 = h5py.File(PROPOSAL_FN, 'r')
            rpn_rois = p_h5['rpn_rois']
            rpn_scores = p_h5['rpn_scores']
            rpn_im_to_roi_idx = np.array(
                p_h5['im_to_roi_idx'][self.split_mask])
            rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])

            self.rpn_rois = []
            for i in range(len(self.filenames)):
                rpn_i = np.column_stack((
                    rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                               rpn_num_rois[i]],
                    rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                             rpn_num_rois[i]],
                ))
                self.rpn_rois.append(rpn_i)
        else:
            self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
    def __init__(
            self,
            mode,
            roidb_file=VG_SGG_FN,
            dict_file=VG_SGG_DICT_FN,
            image_file=IM_DATA_FN,
            filter_empty_rels=True,
            num_im=-1,
            num_val_im=5000,
            filter_duplicate_rels=True,
            filter_non_overlap=True,  #num_im=-1
            use_proposals=False):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode
        self.mode = 'val'
        print('mode', self.mode)

        # Initialize
        self.roidb_file = roidb_file
        self.dict_file = dict_file
        self.image_file = image_file
        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'

        # self.split_mask = load_split(
        #     self.roidb_file, self.mode, num_im, num_val_im=num_val_im,
        #     filter_empty_rels=filter_empty_rels,
        # )

        # self.ind_to_classes, self.ind_to_predicates = load_info(dict_file)
        self.ind_to_classes = {}
        self.ind_to_predicates = {}

        # Load classes
        self._classes = ['__background__']
        self._class_to_ind = {}
        self._class_to_ind[self._classes[0]] = 0
        self.ind_to_classes = {}
        self.ind_to_classes[self._classes[0]] = 0
        with open(os.path.join(VOCAB_DIR, 'objects_vocab.txt')) as f:
            count = 1
            for object in f.readlines():
                names = [n.lower().strip() for n in object.split(',')]
                self._classes.append(names[0])
                for n in names:
                    self._class_to_ind[n] = count
                self.ind_to_classes[names[0]] = count
                if count == 151: break
                count += 1

        # print('num_classes',len(self.ind_to_classes))
        # Load attributes
        self._attributes = ['__no_attribute__']
        self._attribute_to_ind = {}
        self._attribute_to_ind[self._attributes[0]] = 0
        with open(os.path.join(VOCAB_DIR, 'attributes_vocab.txt')) as f:
            count = 1
            for att in f.readlines():
                names = [n.lower().strip() for n in att.split(',')]
                self._attributes.append(names[0])
                for n in names:
                    self._attribute_to_ind[n] = count
                count += 1

        # Load relations
        self._relations = ['__no_relation__']
        self._relation_to_ind = {}
        self._relation_to_ind[self._relations[0]] = 0
        self.ind_to_predicates = {}
        self.ind_to_predicates[self._relations[0]] = 0

        with open('/share/yutong/projects/neural-motifs/data/VG-SGG-dicts.json'
                  ) as f:
            dictionary = json.load(f)['idx_to_predicate']
            for key in dictionary.keys():
                self.ind_to_predicates[dictionary[key]] = int(key)

        with open(os.path.join(VOCAB_DIR, 'relations_vocab.txt')) as f:
            count = 1
            for rel in f.readlines():
                names = [n.lower().strip() for n in rel.split(',')]
                self._relations.append(names[0])
                for n in names:
                    self._relation_to_ind[n] = count
                self.ind_to_predicates[names[0]] = count
                count += 1

        # print('num relations',len(self.ind_to_predicates))
        self.ind_to_classes = sorted(self.ind_to_classes,
                                     key=lambda k: self.ind_to_classes[k])
        self.ind_to_predicates = sorted(
            self.ind_to_predicates, key=lambda k: self.ind_to_predicates[k])

        self.filenames, keep = load_image_filenames(image_file)

        # self.roidb_file, self.mode, num_im, num_val_im = num_val_im,
        #     filter_empty_rels=filter_empty_rels,

        self.split_mask = \
            load_graphs(keep, self.roidb_file, self.filenames, self.num_classes,
                        self._class_to_ind, self._relation_to_ind, self._attribute_to_ind,
                        filter_empty_rels=filter_empty_rels, filter_non_overlap=filter_non_overlap,
                        mode =self.mode, num_im=num_im, num_val_im = num_val_im)
        self.split_mask = self.split_mask[keep]
        # self.split_mask = self.split_mask[:len(self.filenames)]
        # print('filename num', len(self.filenames))
        # print('split num', len(self.split_mask))
        self.filenames = [
            self.filenames[i] for i in np.where(self.split_mask)[0]
        ]

        # graphs_file, filenames, num_classes, _class_to_ind, _relation_to_ind, _attribute_to_ind,
        #                 filter_empty_rels=True, filter_non_overlap=False , mode='train', num_im=-1, num_val_im=0

        if use_proposals:
            print("Loading proposals", flush=True)
            p_h5 = h5py.File(PROPOSAL_FN, 'r')
            rpn_rois = p_h5['rpn_rois']
            rpn_scores = p_h5['rpn_scores']
            rpn_im_to_roi_idx = np.array(
                p_h5['im_to_roi_idx'][self.split_mask])
            rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])

            self.rpn_rois = []
            for i in range(len(self.filenames)):
                rpn_i = np.column_stack((
                    rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                               rpn_num_rois[i]],
                    rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                             rpn_num_rois[i]],
                ))
                self.rpn_rois.append(rpn_i)
        else:
            self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Exemple #8
0
    def __init__(self,
                 mode,
                 roidb_file=VG_SGG_FN,
                 dict_file=VG_SGG_DICT_FN,
                 image_file=IM_DATA_FN,
                 filter_empty_rels=True,
                 num_im=-1,
                 num_val_im=5000,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True,
                 use_proposals=False):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file: VG-SGG.h5
        :param dict_file: VG-SGG-dicts.json
        :param image_file: image_data.json
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        #ipdb.set_trace()
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.roidb_file = roidb_file  # HDF5 containing the GT boxes, classes, and relationships
        self.dict_file = dict_file  # JSON Contains mapping of classes/relationships to words
        self.image_file = image_file  # JSON containing image filenames

        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'

        # (ndarray, list, list, list)
        self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
            self.roidb_file,
            self.mode,
            num_im,
            num_val_im=num_val_im,
            filter_empty_rels=filter_empty_rels,
            filter_non_overlap=self.filter_non_overlap and self.is_train,
        )

        # list
        self.filenames = load_image_filenames(image_file)
        # get the train list if 'train'; get the 'test' list if 'test'; get the 'val' list if 'val'
        self.filenames = [
            self.filenames[i] for i in np.where(self.split_mask)[0]
        ]
        """
        if self.mode == 'train':
            num_for_train = 1000
            self.gt_boxes = self.gt_boxes[:num_for_train]
            self.gt_classes = self.gt_classes[:num_for_train]
            self.relationships = self.relationships[:num_for_train]
            self.filenames = self.filenames[:num_for_train]
        elif self.mode == 'val':
            num_for_val = 100
            self.gt_boxes = self.gt_boxes[:num_for_val]
            self.gt_classes = self.gt_classes[:num_for_val]
            self.relationships = self.relationships[:num_for_val]
            self.filenames = self.filenames[:num_for_val]
        elif self.mode == 'test':
            num_for_test = 4000
            self.gt_boxes = self.gt_boxes[:num_for_test]
            self.gt_classes = self.gt_classes[:num_for_test]
            self.relationships = self.relationships[:num_for_test]
            self.filenames = self.filenames[:num_for_test]
        else:
            pass
        """
        '''        
        # 15 15 15
        if self.mode == 'test':
            txtfile = open('test_img.txt', 'w')
            for one_dict in self.filenames:
                txtfile.write(one_dict)
                txtfile.write('\n')
            txtfile.close()
        '''
        print("From visual_genome.py: the total number of ", self.mode,
              " images is")
        print(len(self.filenames))

        self.ind_to_classes, self.ind_to_predicates = load_info(dict_file)

        # whether use RPN proposals of Xu et als; will correspond to self.filenames
        if use_proposals:
            print("Loading proposals", flush=True)
            p_h5 = h5py.File(PROPOSAL_FN, 'r')
            rpn_rois = p_h5['rpn_rois']
            rpn_scores = p_h5['rpn_scores']
            rpn_im_to_roi_idx = np.array(
                p_h5['im_to_roi_idx'][self.split_mask])
            rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])

            self.rpn_rois = []
            for i in range(len(self.filenames)):
                rpn_i = np.column_stack((
                    rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                               rpn_num_rois[i]],
                    rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                             rpn_num_rois[i]],
                ))
                self.rpn_rois.append(rpn_i)
        else:
            self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Exemple #9
0
    def __init__(self,
                 mode,
                 data_dir,
                 filter_empty_rels=True,
                 num_im=-1,
                 num_val_im=5000,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True,
                 min_graph_size=-1,
                 max_graph_size=-1,
                 torch_detector=False,
                 n_shots=-1,
                 square_pad=True,
                 training_triplets=None,
                 exclude_left_right=False):
        """0
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """

        # print(mode, VG.split, data_dir, num_im, num_val_im, torch_detector, min_graph_size, max_graph_size, n_shots)

        assert mode in ('test', 'train',
                        'val'), '%s mode not recognized' % mode
        self.mode = mode
        self.max_graph_size = max_graph_size if mode == 'train' else -1
        self.min_graph_size = min_graph_size if mode == 'train' else -1
        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'
        self.n_shots = n_shots
        assert VG.split in ['stanford', 'vte',
                            'gqa'], ('invalid split', VG.split)

        if training_triplets:
            assert mode in ['val', 'test'], mode

        if VG.split == 'stanford':
            data_name = 'VG'
            self.roidb_file = os.path.join(data_dir, data_name,
                                           'stanford_filtered', 'VG-SGG.h5')
            self.dict_file = os.path.join(data_dir, data_name,
                                          'stanford_filtered',
                                          'VG-SGG-dicts.json')
            self.image_file = os.path.join(data_dir, data_name,
                                           'stanford_filtered',
                                           'image_data.json')
            self.images_dir = os.path.join(data_dir, data_name, 'VG_100K')
            self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
                self.roidb_file,
                self.mode,
                num_im,
                num_val_im=num_val_im,
                filter_empty_rels=filter_empty_rels,
                min_graph_size=self.min_graph_size,
                max_graph_size=self.max_graph_size,
                filter_non_overlap=self.filter_non_overlap and self.is_train,
                training_triplets=training_triplets,
                random_subset=False,
                filter_zeroshots=True,
                n_shots=n_shots)
        elif VG.split == 'vte':
            data_name = 'VG'
            self.images_dir = os.path.join(data_dir, data_name, 'VG_100K')
            vte = VTESplit(os.path.join(data_dir, data_name, 'vtranse',
                                        'vg1_2_meta.h5'),
                           mode=self.mode)
            self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = vte.load_graphs(
                num_im,
                num_val_im=num_val_im,
                filter_empty_rels=filter_empty_rels,
                min_graph_size=self.min_graph_size,
                max_graph_size=self.max_graph_size,
                training_triplets=training_triplets,
                random_subset=False,
                filter_zeroshots=True)

        elif VG.split == 'gqa':
            data_name = 'GQA'
            self.images_dir = os.path.join(data_dir, 'VG/VG_100K')
            # Load the JSON containing the SGs
            f_mode = mode
            if mode == 'val':
                f_mode = 'train'  # we are using the last 5k training SGs for validation
            elif mode == 'test':
                f_mode = 'val'  # GQA has no public test SGs, so use the val set instead

            img_list_file = os.path.join(data_dir, data_name,
                                         '%s_images.json' % f_mode)

            if os.path.isfile(img_list_file):
                print('Loading GQA-%s image ids...' % mode)
                with open(img_list_file, 'r') as f:
                    self.image_ids = json.load(f)
            else:
                # Use only images having question-answer pairs in the balanced split
                print('Loading GQA-%s questions...' % mode)
                with open(
                        os.path.join(data_dir, data_name,
                                     '%s_balanced_questions.json' % f_mode),
                        'rb') as f:
                    Q_dict = json.load(f)
                self.image_ids = set()
                for v in Q_dict.values():
                    self.image_ids.add(v['imageId'])
                with open(img_list_file, 'w') as f:
                    json.dump(list(self.image_ids), f)

                del Q_dict

            self.image_ids = sorted(
                list(self.image_ids
                     ))  # sort to make it consistent for different runs

            self.filenames = gqa.load_image_filenames(self.image_ids, mode,
                                                      self.images_dir)

            if VG.train_sgs is None:
                print('Loading GQA-%s scene graphs...' % mode)
                with open(
                        os.path.join(data_dir, data_name,
                                     'sceneGraphs/train_sceneGraphs.json'),
                        'rb') as f:
                    VG.train_sgs = json.load(f)
                with open(
                        os.path.join(data_dir, data_name,
                                     'sceneGraphs/val_sceneGraphs.json'),
                        'rb') as f:
                    VG.val_sgs = json.load(f)
            train_sgs, val_sgs = VG.train_sgs, VG.val_sgs

            (self.ind_to_classes, self.ind_to_predicates, self.classes_to_ind,
             self.predicates_to_ind) = gqa.load_info(train_sgs, val_sgs)

            (self.split_mask, self.gt_boxes, self.gt_classes,
             self.relationships) = gqa.load_graphs(
                 train_sgs if f_mode == 'train' else val_sgs,
                 self.image_ids,
                 self.classes_to_ind,
                 self.predicates_to_ind,
                 num_val_im=num_val_im,
                 mode=mode,
                 training_triplets=training_triplets,
                 min_graph_size=self.min_graph_size,
                 max_graph_size=self.max_graph_size,
                 random_subset=False,
                 filter_empty_rels=filter_empty_rels,
                 filter_zeroshots=True,
                 exclude_left_right=exclude_left_right)

            del train_sgs, val_sgs, self.image_ids  # force to clean RAM

        else:
            raise NotImplementedError(VG.split)

        self.root = os.path.join(data_dir, data_name)

        if VG.split == 'stanford':
            self.filenames = load_image_filenames(
                self.image_file,
                self.images_dir) if VG.filenames is None else VG.filenames
            self.ind_to_classes, self.ind_to_predicates = load_info(
                self.dict_file)
        elif VG.split == 'vte':
            self.filenames = vte.load_image_filenames(self.images_dir)
            self.ind_to_classes, self.ind_to_predicates = vte.load_info()
            vte.close()

        if VG.filenames is None:
            VG.filenames = self.filenames

        # if self.mode == 'train':
        #     print('\nind_to_classes', len(self.ind_to_classes), self.ind_to_classes)
        #     print('\nind_to_predicates', len(self.ind_to_predicates), self.ind_to_predicates, '\n')

        self.triplet_counts = {}
        # c = 0
        N_total, M_FG_total, M_BG_total, n_obj_lst, fg_lst, sp_lst  = 0, 0, 0, [], [], []
        for im in range(len(self.gt_classes)):
            n_obj = len(self.gt_classes[im])
            n_obj_lst.append(n_obj)
            fg_lst.append(len(filter_dups(self.relationships[im])))
            sp_lst.append(100 * float(fg_lst[-1]) / (n_obj * (n_obj - 1)))
            N_total += n_obj
            M_FG_total += fg_lst[-1]
            M_BG_total += n_obj * (n_obj - 1)
            for rel_ind, tri in enumerate(self.relationships[im]):
                o1, o2, R = tri
                tri_str = '{}_{}_{}'.format(self.gt_classes[im][o1], R,
                                            self.gt_classes[im][o2])

                if training_triplets:
                    if isinstance(training_triplets, dict):
                        assert tri_str in training_triplets and training_triplets[
                            tri_str] <= self.n_shots, (
                                mode, len(training_triplets), tri_str,
                                training_triplets[tri_str], self.n_shots)

                if tri_str not in self.triplet_counts:
                    self.triplet_counts[tri_str] = 0

                self.triplet_counts[tri_str] += 1

        n_samples = len(self.gt_classes)
        # self.triplets = list(self.triplet_counts.keys())
        counts = list(self.triplet_counts.values())

        if mode == 'train':
            self.subj_pred_pairs, self.pred_obj_pairs = {}, {}
            for im in range(len(self.gt_classes)):
                for rel_ind, tri in enumerate(self.relationships[im]):
                    o1, o2, R = tri
                    tri_str = '{}_{}_{}'.format(self.gt_classes[im][o1], R,
                                                self.gt_classes[im][o2])
                    pair = '{}_{}'.format(self.gt_classes[im][o1],
                                          R)  # man wearing
                    if pair not in self.subj_pred_pairs:
                        self.subj_pred_pairs[pair] = {}
                    self.subj_pred_pairs[pair][
                        self.gt_classes[im][o2]] = self.triplet_counts[tri_str]

                    pair = '{}_{}'.format(
                        R, self.gt_classes[im][o2])  # on surfboard
                    if pair not in self.pred_obj_pairs:
                        self.pred_obj_pairs[pair] = {}
                    self.pred_obj_pairs[pair][
                        self.gt_classes[im][o1]] = self.triplet_counts[tri_str]

            print('subj_pred_pairs, pred_obj_pairs', len(self.subj_pred_pairs),
                  len(self.pred_obj_pairs))

        print('{} images, {} triplets ({} unique triplets)'.format(
            # VG.split,
            # mode,
            len(self.gt_classes),
            # np.sum(self.split_mask),
            # len(self.split_mask),
            np.sum(counts),  # total count
            len(self.triplet_counts)))  # unique

        # np.min(counts), np.max(counts)))

        def stats(x):
            return 'min={:.1f}, max={:.1f}, mean={:.1f}, std={:.1f}'.format(
                np.min(x), np.max(x), np.mean(x), np.std(x))

        print(
            'Stats: {} objects ({}), {} FG edges ({}), {} BG edges ({:.2f} avg), graph density {}'
            .format(N_total, str(stats(n_obj_lst)), M_FG_total,
                    str(stats(fg_lst)), M_BG_total, M_BG_total / n_samples,
                    str(stats(sp_lst))))

        assert len(self.split_mask) == len(self.filenames), (len(
            self.split_mask), len(self.filenames))

        self.filenames = [
            self.filenames[i] for i in np.where(self.split_mask)[0]
        ]

        # if self.mode == 'train':
        #     print('example of triplets')
        #     for tri in list(self.triplet_counts.keys())[:5]:
        #         print(tri, self.triplet2str(tri), self.triplet_counts[tri])

        self.rpn_rois = None

        self.torch_detector = torch_detector

        tform = [SquarePad()] if square_pad else []
        if torch_detector:
            tform += [ToTensor()]
        else:
            tform += [
                Resize(IM_SCALE),
                ToTensor(),
                Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
            ]

        self.transform_pipeline = Compose(tform)
Exemple #10
0
    def __init__(
            self,
            mode,
            # image_dir, instances_json, stuff_json=None,
            stuff_only=True,
            image_size=(64, 64),
            mask_size=16,
            normalize_images=True,
            max_samples=None,
            include_relationships=True,
            min_object_size=0.02,
            min_objects_per_image=3,
            max_objects_per_image=8,
            include_other=False,
            instance_whitelist=None,
            stuff_whitelist=None):
        """
    A PyTorch Dataset for loading Coco and Coco-Stuff annotations and converting
    them to scene graphs on the fly.

    Inputs:
    - image_dir: Path to a directory where images are held
    - instances_json: Path to a JSON file giving COCO annotations
    - stuff_json: (optional) Path to a JSON file giving COCO-Stuff annotations
    - stuff_only: (optional, default True) If True then only iterate over
      images which appear in stuff_json; if False then iterate over all images
      in instances_json.
    - image_size: Size (H, W) at which to load images. Default (64, 64).
    - mask_size: Size M for object segmentation masks; default 16.
    - normalize_image: If True then normalize images by subtracting ImageNet
      mean pixel and dividing by ImageNet std pixel.
    - max_samples: If None use all images. Other wise only use images in the
      range [0, max_samples). Default None.
    - include_relationships: If True then include spatial relationships; if
      False then only include the trivial __in_image__ relationship.
    - min_object_size: Ignore objects whose bounding box takes up less than
      this fraction of the image.
    - min_objects_per_image: Ignore images which have fewer than this many
      object annotations.
    - max_objects_per_image: Ignore images which have more than this many
      object annotations.
    - include_other: If True, include COCO-Stuff annotations which have category
      "other". Default is False, because I found that these were really noisy
      and pretty much impossible for the system to model.
    - instance_whitelist: None means use all instance categories. Otherwise a
      list giving a whitelist of instance category names to use.
    - stuff_whitelist: None means use all stuff categories. Otherwise a list
      giving a whitelist of stuff category names to use.
    """
        super(Dataset, self).__init__()
        self.mode = mode

        image_dir = join(COCO_PATH, "images", "%s2017" % mode)
        instances_json = join(COCO_PATH, "annotations",
                              "instances_%s2017.json" % mode)
        stuff_json = join(COCO_PATH, "annotations", "stuff_%s2017.json" % mode)

        if stuff_only and stuff_json is None:
            print('WARNING: Got stuff_only=True but stuff_json=None.')
            print('Falling back to stuff_only=False.')

        self.image_dir = image_dir
        self.mask_size = mask_size
        self.max_samples = max_samples
        self.normalize_images = normalize_images
        self.include_relationships = include_relationships
        # self.set_image_size(image_size)

        tform = []
        if self.is_train:
            tform.append(
                RandomOrder([
                    Grayscale(),
                    Brightness(),
                    Contrast(),
                    Sharpness(),
                    Hue(),
                ]))

        tform += [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]

        self.transform_pipeline = Compose(tform)

        with open(instances_json, 'r') as f:
            instances_data = json.load(f)

        stuff_data = None
        if stuff_json is not None and stuff_json != '':
            with open(stuff_json, 'r') as f:
                stuff_data = json.load(f)

        self.image_ids = []
        self.image_id_to_filename = {}
        self.image_id_to_size = {}
        for image_data in instances_data['images']:
            image_id = image_data['id']
            filename = image_data['file_name']
            width = image_data['width']
            height = image_data['height']
            self.image_ids.append(image_id)
            self.image_id_to_filename[image_id] = filename
            self.image_id_to_size[image_id] = (width, height)

        self.vocab = {
            'object_name_to_idx': {},
            'pred_name_to_idx': {},
        }
        object_idx_to_name = {}
        all_instance_categories = []
        for category_data in instances_data['categories']:
            category_id = category_data['id']
            category_name = category_data['name']
            all_instance_categories.append(category_name)
            object_idx_to_name[category_id] = category_name
            self.vocab['object_name_to_idx'][category_name] = category_id
        all_stuff_categories = []
        if stuff_data:
            for category_data in stuff_data['categories']:
                category_name = category_data['name']
                category_id = category_data['id']
                all_stuff_categories.append(category_name)
                object_idx_to_name[category_id] = category_name
                self.vocab['object_name_to_idx'][category_name] = category_id

        if instance_whitelist is None:
            instance_whitelist = all_instance_categories
        if stuff_whitelist is None:
            stuff_whitelist = all_stuff_categories
        category_whitelist = set(instance_whitelist) | set(stuff_whitelist)

        # Add object data from instances
        self.image_id_to_objects = defaultdict(list)
        for object_data in instances_data['annotations']:
            image_id = object_data['image_id']
            _, _, w, h = object_data['bbox']
            W, H = self.image_id_to_size[image_id]
            box_area = (w * h) / (W * H)
            box_ok = box_area > min_object_size
            object_name = object_idx_to_name[object_data['category_id']]
            category_ok = object_name in category_whitelist
            other_ok = object_name != 'other' or include_other
            if box_ok and category_ok and other_ok:
                self.image_id_to_objects[image_id].append(object_data)

        # Add object data from stuff
        if stuff_data:
            image_ids_with_stuff = set()
            for object_data in stuff_data['annotations']:
                image_id = object_data['image_id']
                image_ids_with_stuff.add(image_id)
                _, _, w, h = object_data['bbox']
                W, H = self.image_id_to_size[image_id]
                box_area = (w * h) / (W * H)
                box_ok = box_area > min_object_size
                object_name = object_idx_to_name[object_data['category_id']]
                category_ok = object_name in category_whitelist
                other_ok = object_name != 'other' or include_other
                if box_ok and category_ok and other_ok:
                    self.image_id_to_objects[image_id].append(object_data)
            if stuff_only:
                new_image_ids = []
                for image_id in self.image_ids:
                    if image_id in image_ids_with_stuff:
                        new_image_ids.append(image_id)
                self.image_ids = new_image_ids

                all_image_ids = set(self.image_id_to_filename.keys())
                image_ids_to_remove = all_image_ids - image_ids_with_stuff
                for image_id in image_ids_to_remove:
                    self.image_id_to_filename.pop(image_id, None)
                    self.image_id_to_size.pop(image_id, None)
                    self.image_id_to_objects.pop(image_id, None)

        # COCO category labels start at 1, so use 0 for __image__
        self.vocab['object_name_to_idx']['__image__'] = 0

        # Build object_idx_to_name
        name_to_idx = self.vocab['object_name_to_idx']
        assert len(name_to_idx) == len(set(name_to_idx.values()))
        max_object_idx = max(name_to_idx.values())
        idx_to_name = ['NONE'] * (1 + max_object_idx)
        for name, idx in self.vocab['object_name_to_idx'].items():
            idx_to_name[idx] = name
        self.vocab['object_idx_to_name'] = idx_to_name

        # Prune images that have too few or too many objects
        new_image_ids = []
        total_objs = 0
        for image_id in self.image_ids:
            num_objs = len(self.image_id_to_objects[image_id])
            total_objs += num_objs
            if min_objects_per_image <= num_objs <= max_objects_per_image:
                new_image_ids.append(image_id)
        # self.image_ids = new_image_ids
        self.ids = new_image_ids

        self.vocab['pred_idx_to_name'] = [
            '__in_image__',
            'left of',
            'right of',
            'above',
            'below',
            'inside',
            'surrounding',
        ]
        self.vocab['pred_name_to_idx'] = {}
        for idx, name in enumerate(self.vocab['pred_idx_to_name']):
            self.vocab['pred_name_to_idx'][name] = idx

        # for object detection model to get number of class
        self.ind_to_classes = self.vocab['object_idx_to_name']
        self.ind_to_id = {i: i for i in range(len(self.ind_to_classes))}
    def __init__(self,
                 mode,
                 roidb_file=VG200_SGG_FN,
                 dict_file=VG200_SGG_DICT_FN,
                 image_file=IM_DATA_FN,
                 saliency_file=SALIENCY_FN,
                 depth_file=DEPTH_FN,
                 filter_empty_rels=True,
                 num_im=-1,
                 num_val_im=5000,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True,
                 captions_info_file=CAPTIONS_INFO,
                 captions_file=CAPTIONS_FN,
                 seq_per_img=5,
                 use_proposals=False):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.roidb_file = roidb_file
        self.dict_file = dict_file
        self.image_file = image_file
        self.saliency_file = saliency_file
        self.depth_file = depth_file
        self.captions_info = captions_info_file
        self.captions_file = captions_file

        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'

        self.split_mask, self.gt_boxes, self.gt_classes, self.relationships, self.key_rel_idxes = load_graphs(
            self.roidb_file,
            self.mode,
            num_im,
            num_val_im=num_val_im,
            filter_empty_rels=filter_empty_rels,
            filter_non_overlap=self.filter_non_overlap and self.is_train,
        )
        self.image_index = np.where(self.split_mask)[0]
        self.sal_h5 = h5py.File(self.saliency_file, 'r')['images']  # 108073
        self.depth_h5 = h5py.File(self.depth_file, 'r')['images']  # 108073

        self.ind_to_classes, self.ind_to_predicates, self.subset_dbidxes = load_info(
            dict_file)
        self.filenames, self.coco_ids = load_image_filenames(image_file)
        self.filenames = [self.filenames[i] for i in self.subset_dbidxes
                          ]  # re arange the filename
        self.filenames = [
            self.filenames[i] for i in np.where(self.split_mask)[0]
        ]
        self.coco_ids = [self.coco_ids[i]
                         for i in self.subset_dbidxes]  # type: int
        self.coco_ids = [
            self.coco_ids[i] for i in np.where(self.split_mask)[0]
        ]

        # load captions, already in vg200_keyrel order
        info = json.load(open(self.captions_info))
        self.ix_to_word = info['ix_to_word']
        self.vocab_size = len(self.ix_to_word)
        self.seq_per_img = seq_per_img

        print('vocab size is ', self.vocab_size)

        # open the hdf5 file
        print('DataLoader loading h5 file')
        self.h5_label_file = h5py.File(captions_file, 'r', driver='core')

        # load in the sequence data
        self.seq_size = self.h5_label_file['labels'].shape
        self.seq_length = self.seq_size[1]
        print('max sequence length in data is ', self.seq_length)
        # load the pointers in full to RAM (should be small enough)

        self.label_start_ix = self.h5_label_file['label_start_ix'][:]
        self.label_end_ix = self.h5_label_file['label_end_ix'][:]

        if mode == 'test':
            self.label_start_ix = self.label_start_ix[18720:]
            self.label_end_ix = self.label_end_ix[18720:]
        else:
            if num_val_im != -1:
                if mode == 'train':
                    self.label_start_ix = self.label_start_ix[num_val_im:18720]
                    self.label_end_ix = self.label_end_ix[num_val_im:18720]
                elif mode == 'val':
                    self.label_start_ix = self.label_start_ix[:num_val_im]
                    self.label_end_ix = self.label_end_ix[:num_val_im]
            else:
                self.label_start_ix = self.label_start_ix[:18720]
                self.label_end_ix = self.label_end_ix[:18720]

        if use_proposals:
            print("Loading proposals", flush=True)
            p_h5 = h5py.File(PROPOSAL_FN, 'r')
            rpn_rois = p_h5['rpn_rois']
            rpn_scores = p_h5['rpn_scores']
            rpn_im_to_roi_idx = np.array(
                p_h5['im_to_roi_idx'][self.split_mask])
            rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])

            self.rpn_rois = []
            for i in range(len(self.filenames)):
                rpn_i = np.column_stack((
                    rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                               rpn_num_rois[i]],
                    rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                             rpn_num_rois[i]],
                ))
                self.rpn_rois.append(rpn_i)
        else:
            self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
        self.sal_transform_pipeline = Compose(
            [Resize(int(IM_SCALE / 16)),
             ToTensor()])
        self.depth_transform_pipeline = Compose(
            [Resize(int(IM_SCALE)), ToTensor()])
Exemple #12
0
    def __init__(self, mode, num_im=-1, num_val_im=5000):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        output = open(os.path.join(DATA_PATH, "hico_" + mode + ".pkl"), 'rb')
        self.roidb_file = pickle.load(output)
        output.close()

        # self.dict_file = dict_file
        # self.image_file = image_file
        # self.filter_non_overlap = filter_non_overlap
        # self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'
        #
        # self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
        #     self.roidb_file, self.mode, num_im, num_val_im=num_val_im,
        #     filter_empty_rels=filter_empty_rels,
        #     filter_non_overlap=self.filter_non_overlap and self.is_train,
        # )

        self.filenames = load_image_filenames(
            self.roidb_file, os.path.join(DATA_PATH, 'hico/images/' + mode))
        # self.filenames = [self.filenames[i] for i in np.where(self.split_mask)[0]]
        self.gt_boxes,self.gt_classes,self.gt_human_boxes,self.gt_human_classes,self.gt_hoi_classes\
            =load_boxes(self.roidb_file)

        self.ind_to_classes, self.ind_to_predicates = load_info(
            self.roidb_file)

        # if use_proposals:
        #     print("Loading proposals", flush=True)
        #     p_h5 = h5py.File(PROPOSAL_FN, 'r')
        #     rpn_rois = p_h5['rpn_rois']
        #     rpn_scores = p_h5['rpn_scores']
        #     rpn_im_to_roi_idx = np.array(p_h5['im_to_roi_idx'][self.split_mask])
        #     rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])
        #
        #     self.rpn_rois = []
        #     for i in range(len(self.filenames)):
        #         rpn_i = np.column_stack((
        #             rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]],
        #             rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]],
        #         ))
        #         self.rpn_rois.append(rpn_i)
        # else:
        #     self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Exemple #13
0
    def __init__(self, mode, filter_duplicate_rels=True, mask_resolution=28, use_for_bias=False):
        """
        :param mode: train2014 or val2014
        """
        self.mask_resolution = mask_resolution
        self.mode = mode
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'
        tform = []
        if self.is_train:
            tform.append(RandomOrder([
                Grayscale(),
                Brightness(),
                Contrast(),
                Sharpness(),
                Hue(),
            ]))
        tform += [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
        #image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'image/'+mode)) if name.endswith('.jpg')]
        error_list = [line.strip()[:-4] for line in open(os.path.join(PIC_PATH, mode+'_error_list.txt'))]
        image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'image/' + mode)) if
                       name.endswith('.jpg') and name[:-4] not in error_list]
        image_names.sort(key=str.lower)
        if self.mode != 'test':
            semantic_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'segmentation/'+mode+'/semantic'))
                              if name.endswith('.png') and name[:-4] not in error_list]
            instance_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'segmentation/'+mode+'/instance'))
                              if name.endswith('.png') and name[:-4] not in error_list]
            semantic_names.sort(key=str.lower)
            instance_names.sort(key=str.lower)
            assert image_names == semantic_names
            assert image_names == instance_names
        # image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_OFFLINE_PATH, 'val/obj_feat/')) if
        #                name.endswith('.npy')]
        # image_names.sort(key=str.lower)
        self.img_names = image_names
        print(len(self.img_names))
        rel_cats = json.load(open(os.path.join(PIC_PATH,'categories_list/relation_categories.json')))
        self.ind_to_predicates = [rel_cat['name'] for rel_cat in rel_cats]
        cls_cats = json.load(open(os.path.join(PIC_PATH, 'categories_list/label_categories.json')))
        self.ind_to_classes = [cls_cat['name'] for cls_cat in cls_cats]
        if self.mode != 'test':
            self.img2rels = dict()
            img_relations = json.load(open(os.path.join(PIC_PATH, 'relation/relations_'+self.mode+'.json')))
            for img_relation in img_relations:
                rels = []
                for index, rel in enumerate(img_relation['relations']):
                    temp = np.array([[rel['subject']-1, rel['object']-1, rel['relation']]], dtype=np.int32)
                    rels.append(temp)
                rels = np.concatenate(rels, axis=0)
                self.img2rels[img_relation['name'][:-4]] = rels
        print('====================')
        print(self.ind_to_classes)
        print(self.ind_to_predicates)

        self.id_to_ind = {ind: ind for ind, name in enumerate(self.ind_to_classes)}
        self.ind_to_id = {x: y for y, x in self.id_to_ind.items()}
        #self.create_coco_format()
        if use_for_bias:
            dataset = json.load(open(os.path.join(PIC_PATH, 'train.json')))
            anns, imgs = {}, {}
            imgToAnns = defaultdict(list)
            for img in dataset['images']:
                imgs[img['id']] = img['file_name'][:-4]
            for ann in dataset['annotations']:
                imgToAnns[imgs[ann['image_id']]].append(ann)
            self.img2boxes = dict()
            self.img2classes = dict()
            for img_name, anns in imgToAnns.items():
                gt_box = []
                gt_class = []
                for ann in anns:
                    gt_box.append(np.array([ann['bbox']]))
                    gt_class.append(np.array([ann['category_id']]))
                gt_box = np.concatenate(gt_box, axis=0)
                gt_class = np.concatenate(gt_class, axis=0)
                self.img2boxes[img_name] = gt_box
                self.img2classes[img_name] = gt_class
Exemple #14
0
    def __init__(self, mode, filter_duplicate_rels=True, mask_resolution=28):
        """
        :param mode: train2014 or val2014
        """
        self.mask_resolution = mask_resolution
        self.mode = mode
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'
        tform = []
        if self.is_train:
            tform.append(
                RandomOrder([
                    Grayscale(),
                    Brightness(),
                    Contrast(),
                    Sharpness(),
                    Hue(),
                ]))
        tform += [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
        image_names = [
            name[:-4]
            for name in os.listdir(os.path.join(PIC_PATH, 'image/' + mode))
            if name.endswith('.jpg')
        ]
        if self.mode != 'test':
            semantic_names = [
                name[:-4] for name in os.listdir(
                    os.path.join(PIC_PATH, 'segmentation/' + mode +
                                 '/semantic')) if name.endswith('.png')
            ]
            instance_names = [
                name[:-4] for name in os.listdir(
                    os.path.join(PIC_PATH, 'segmentation/' + mode +
                                 '/instance')) if name.endswith('.png')
            ]
            image_names.sort(key=str.lower)
            semantic_names.sort(key=str.lower)
            instance_names.sort(key=str.lower)
            assert image_names == semantic_names
            assert image_names == instance_names
        # image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_OFFLINE_PATH, 'val/obj_feat/')) if
        #                name.endswith('.npy')]
        # image_names.sort(key=str.lower)
        self.img_names = image_names
        rel_cats = json.load(
            open(
                os.path.join(PIC_PATH,
                             'categories_list/relation_categories.json')))
        self.ind_to_predicates = [rel_cat['name'] for rel_cat in rel_cats]
        cls_cats = json.load(
            open(
                os.path.join(PIC_PATH,
                             'categories_list/label_categories.json')))
        self.ind_to_classes = [cls_cat['name'] for cls_cat in cls_cats]
        if self.mode != 'test':
            self.img2rels = dict()
            img_relations = json.load(
                open(
                    os.path.join(PIC_PATH,
                                 'relation/relations_' + self.mode + '.json')))
            for img_relation in img_relations:
                rels = []
                for index, rel in enumerate(img_relation['relations']):
                    temp = np.array([[
                        rel['subject'] - 1, rel['object'] - 1, rel['relation']
                    ]],
                                    dtype=np.int32)
                    rels.append(temp)
                rels = np.concatenate(rels, axis=0)
                self.img2rels[img_relation['name'][:-4]] = rels
        print('====================')
        print(self.ind_to_classes)
        print(self.ind_to_predicates)

        self.id_to_ind = {
            ind: ind
            for ind, name in enumerate(self.ind_to_classes)
        }
        self.ind_to_id = {x: y for y, x in self.id_to_ind.items()}