def __init__(self, mode): """ :param mode: train2014 or val2014 """ self.mode = mode self.root = os.path.join(COCO_PATH, mode) self.ann_file = os.path.join(COCO_PATH, 'annotations', 'instances_{}.json'.format(mode)) self.coco = COCO(self.ann_file) self.ids = [k for k in self.coco.imgs.keys() if len(self.coco.imgToAnns[k]) > 0] tform = [] if self.is_train: tform.append(RandomOrder([ Grayscale(), Brightness(), Contrast(), Sharpness(), Hue(), ])) tform += [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) self.ind_to_classes = ['__background__'] + [v['name'] for k, v in self.coco.cats.items()] # COCO inds are weird (84 inds in total but a bunch of numbers are skipped) self.id_to_ind = {coco_id:(ind+1) for ind, coco_id in enumerate(self.coco.cats.keys())} self.id_to_ind[0] = 0 self.ind_to_id = {x:y for y,x in self.id_to_ind.items()}
def __init__(self, mode, dict_file=CF_DICT_FN, data_file=CF_IM_DATA_FN, num_im=-1, num_val_im=5000): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.dict_file = dict_file self.data_file = data_file self.filenames, self.gt_classes = load_data(data_file, CF_IMAGES, self.mode, num_im, num_val_im=num_val_im) self.ind_to_classes = load_info(dict_file) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def __init__(self): ''' self.filenames = ['*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**'] ''' self.filenames = glob.glob( '/home/suji/spring20/vilbert_beta/data/VCR/vcr1images/*/*.jpg' )[:100] print("VCRDataset filenames:", self.filenames) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def custom(filename): IM_SCALE = 592 image_unpadded = Image.open(filename) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] result = Compose(tform) #print (image_unpadded.siz a = result(image_unpadded) b = F.to_pil_image(a) c = Variable(a.view(-1, 3, 592, 592)) w, h = image_unpadded.size img_scale_factor = IM_SCALE / max(w, h) im_size = (IM_SCALE, int(w * img_scale_factor), img_scale_factor) return c, np.asarray([im_size])
def __init__(self, mode, filter_duplicate_rels=True, filter_non_overlap=True): """ Torch dataset for VRD :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead """ if mode not in ('test', 'train'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.anno_file = VRD_TRAIN if self.mode == 'train' else VRD_TEST self.image_root = VRD_TRAIN_IMAGES if self.mode == 'train' else VRD_TEST_IMAGES self.filter_non_overlap = (filter_non_overlap and self.mode == 'train') self.filter_duplicate_rels = (filter_duplicate_rels and self.mode == 'train') self.gt_boxes, self.gt_classes, self.relationships, self.filenames = load_graphs( self.anno_file, filter_non_overlap=self.filter_non_overlap and self.is_train, ) self.ind_to_classes, self.ind_to_predicates = load_info(VRD_LABELS) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def __init__(self, mode, roidb_file=VG_SGG_FN, dict_file=VG_SGG_DICT_FN, image_file=IM_DATA_FN, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, use_proposals=False): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.roidb_file = roidb_file self.dict_file = dict_file self.image_file = image_file self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs( self.roidb_file, self.mode, num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, filter_non_overlap=self.filter_non_overlap and self.is_train, ) self.filenames = load_image_filenames(image_file) # CHANGE index_list = np.where(self.split_mask)[0] trimmed_list = list( filter(lambda x: x < len(self.filenames), index_list)) print("num files, num of files that will be used", len(self.filenames), len(trimmed_list)) # END self.filenames = [self.filenames[i] for i in trimmed_list] self.ind_to_classes, self.ind_to_predicates = load_info(dict_file) if use_proposals: print("Loading proposals", flush=True) p_h5 = h5py.File(PROPOSAL_FN, 'r') rpn_rois = p_h5['rpn_rois'] rpn_scores = p_h5['rpn_scores'] rpn_im_to_roi_idx = np.array( p_h5['im_to_roi_idx'][self.split_mask]) rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) self.rpn_rois = [] for i in range(len(self.filenames)): rpn_i = np.column_stack(( rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], )) self.rpn_rois.append(rpn_i) else: self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def __init__( self, mode, roidb_file=VG_SGG_FN, dict_file=VG_SGG_DICT_FN, image_file=IM_DATA_FN, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, #num_im=-1 use_proposals=False): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode self.mode = 'val' print('mode', self.mode) # Initialize self.roidb_file = roidb_file self.dict_file = dict_file self.image_file = image_file self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' # self.split_mask = load_split( # self.roidb_file, self.mode, num_im, num_val_im=num_val_im, # filter_empty_rels=filter_empty_rels, # ) # self.ind_to_classes, self.ind_to_predicates = load_info(dict_file) self.ind_to_classes = {} self.ind_to_predicates = {} # Load classes self._classes = ['__background__'] self._class_to_ind = {} self._class_to_ind[self._classes[0]] = 0 self.ind_to_classes = {} self.ind_to_classes[self._classes[0]] = 0 with open(os.path.join(VOCAB_DIR, 'objects_vocab.txt')) as f: count = 1 for object in f.readlines(): names = [n.lower().strip() for n in object.split(',')] self._classes.append(names[0]) for n in names: self._class_to_ind[n] = count self.ind_to_classes[names[0]] = count if count == 151: break count += 1 # print('num_classes',len(self.ind_to_classes)) # Load attributes self._attributes = ['__no_attribute__'] self._attribute_to_ind = {} self._attribute_to_ind[self._attributes[0]] = 0 with open(os.path.join(VOCAB_DIR, 'attributes_vocab.txt')) as f: count = 1 for att in f.readlines(): names = [n.lower().strip() for n in att.split(',')] self._attributes.append(names[0]) for n in names: self._attribute_to_ind[n] = count count += 1 # Load relations self._relations = ['__no_relation__'] self._relation_to_ind = {} self._relation_to_ind[self._relations[0]] = 0 self.ind_to_predicates = {} self.ind_to_predicates[self._relations[0]] = 0 with open('/share/yutong/projects/neural-motifs/data/VG-SGG-dicts.json' ) as f: dictionary = json.load(f)['idx_to_predicate'] for key in dictionary.keys(): self.ind_to_predicates[dictionary[key]] = int(key) with open(os.path.join(VOCAB_DIR, 'relations_vocab.txt')) as f: count = 1 for rel in f.readlines(): names = [n.lower().strip() for n in rel.split(',')] self._relations.append(names[0]) for n in names: self._relation_to_ind[n] = count self.ind_to_predicates[names[0]] = count count += 1 # print('num relations',len(self.ind_to_predicates)) self.ind_to_classes = sorted(self.ind_to_classes, key=lambda k: self.ind_to_classes[k]) self.ind_to_predicates = sorted( self.ind_to_predicates, key=lambda k: self.ind_to_predicates[k]) self.filenames, keep = load_image_filenames(image_file) # self.roidb_file, self.mode, num_im, num_val_im = num_val_im, # filter_empty_rels=filter_empty_rels, self.split_mask = \ load_graphs(keep, self.roidb_file, self.filenames, self.num_classes, self._class_to_ind, self._relation_to_ind, self._attribute_to_ind, filter_empty_rels=filter_empty_rels, filter_non_overlap=filter_non_overlap, mode =self.mode, num_im=num_im, num_val_im = num_val_im) self.split_mask = self.split_mask[keep] # self.split_mask = self.split_mask[:len(self.filenames)] # print('filename num', len(self.filenames)) # print('split num', len(self.split_mask)) self.filenames = [ self.filenames[i] for i in np.where(self.split_mask)[0] ] # graphs_file, filenames, num_classes, _class_to_ind, _relation_to_ind, _attribute_to_ind, # filter_empty_rels=True, filter_non_overlap=False , mode='train', num_im=-1, num_val_im=0 if use_proposals: print("Loading proposals", flush=True) p_h5 = h5py.File(PROPOSAL_FN, 'r') rpn_rois = p_h5['rpn_rois'] rpn_scores = p_h5['rpn_scores'] rpn_im_to_roi_idx = np.array( p_h5['im_to_roi_idx'][self.split_mask]) rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) self.rpn_rois = [] for i in range(len(self.filenames)): rpn_i = np.column_stack(( rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], )) self.rpn_rois.append(rpn_i) else: self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def __init__(self, mode, roidb_file=VG_SGG_FN, dict_file=VG_SGG_DICT_FN, image_file=IM_DATA_FN, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, use_proposals=False): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: VG-SGG.h5 :param dict_file: VG-SGG-dicts.json :param image_file: image_data.json :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ #ipdb.set_trace() if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.roidb_file = roidb_file # HDF5 containing the GT boxes, classes, and relationships self.dict_file = dict_file # JSON Contains mapping of classes/relationships to words self.image_file = image_file # JSON containing image filenames self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' # (ndarray, list, list, list) self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs( self.roidb_file, self.mode, num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, filter_non_overlap=self.filter_non_overlap and self.is_train, ) # list self.filenames = load_image_filenames(image_file) # get the train list if 'train'; get the 'test' list if 'test'; get the 'val' list if 'val' self.filenames = [ self.filenames[i] for i in np.where(self.split_mask)[0] ] """ if self.mode == 'train': num_for_train = 1000 self.gt_boxes = self.gt_boxes[:num_for_train] self.gt_classes = self.gt_classes[:num_for_train] self.relationships = self.relationships[:num_for_train] self.filenames = self.filenames[:num_for_train] elif self.mode == 'val': num_for_val = 100 self.gt_boxes = self.gt_boxes[:num_for_val] self.gt_classes = self.gt_classes[:num_for_val] self.relationships = self.relationships[:num_for_val] self.filenames = self.filenames[:num_for_val] elif self.mode == 'test': num_for_test = 4000 self.gt_boxes = self.gt_boxes[:num_for_test] self.gt_classes = self.gt_classes[:num_for_test] self.relationships = self.relationships[:num_for_test] self.filenames = self.filenames[:num_for_test] else: pass """ ''' # 15 15 15 if self.mode == 'test': txtfile = open('test_img.txt', 'w') for one_dict in self.filenames: txtfile.write(one_dict) txtfile.write('\n') txtfile.close() ''' print("From visual_genome.py: the total number of ", self.mode, " images is") print(len(self.filenames)) self.ind_to_classes, self.ind_to_predicates = load_info(dict_file) # whether use RPN proposals of Xu et als; will correspond to self.filenames if use_proposals: print("Loading proposals", flush=True) p_h5 = h5py.File(PROPOSAL_FN, 'r') rpn_rois = p_h5['rpn_rois'] rpn_scores = p_h5['rpn_scores'] rpn_im_to_roi_idx = np.array( p_h5['im_to_roi_idx'][self.split_mask]) rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) self.rpn_rois = [] for i in range(len(self.filenames)): rpn_i = np.column_stack(( rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], )) self.rpn_rois.append(rpn_i) else: self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def __init__(self, mode, data_dir, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, min_graph_size=-1, max_graph_size=-1, torch_detector=False, n_shots=-1, square_pad=True, training_triplets=None, exclude_left_right=False): """0 Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ # print(mode, VG.split, data_dir, num_im, num_val_im, torch_detector, min_graph_size, max_graph_size, n_shots) assert mode in ('test', 'train', 'val'), '%s mode not recognized' % mode self.mode = mode self.max_graph_size = max_graph_size if mode == 'train' else -1 self.min_graph_size = min_graph_size if mode == 'train' else -1 self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' self.n_shots = n_shots assert VG.split in ['stanford', 'vte', 'gqa'], ('invalid split', VG.split) if training_triplets: assert mode in ['val', 'test'], mode if VG.split == 'stanford': data_name = 'VG' self.roidb_file = os.path.join(data_dir, data_name, 'stanford_filtered', 'VG-SGG.h5') self.dict_file = os.path.join(data_dir, data_name, 'stanford_filtered', 'VG-SGG-dicts.json') self.image_file = os.path.join(data_dir, data_name, 'stanford_filtered', 'image_data.json') self.images_dir = os.path.join(data_dir, data_name, 'VG_100K') self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs( self.roidb_file, self.mode, num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, min_graph_size=self.min_graph_size, max_graph_size=self.max_graph_size, filter_non_overlap=self.filter_non_overlap and self.is_train, training_triplets=training_triplets, random_subset=False, filter_zeroshots=True, n_shots=n_shots) elif VG.split == 'vte': data_name = 'VG' self.images_dir = os.path.join(data_dir, data_name, 'VG_100K') vte = VTESplit(os.path.join(data_dir, data_name, 'vtranse', 'vg1_2_meta.h5'), mode=self.mode) self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = vte.load_graphs( num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, min_graph_size=self.min_graph_size, max_graph_size=self.max_graph_size, training_triplets=training_triplets, random_subset=False, filter_zeroshots=True) elif VG.split == 'gqa': data_name = 'GQA' self.images_dir = os.path.join(data_dir, 'VG/VG_100K') # Load the JSON containing the SGs f_mode = mode if mode == 'val': f_mode = 'train' # we are using the last 5k training SGs for validation elif mode == 'test': f_mode = 'val' # GQA has no public test SGs, so use the val set instead img_list_file = os.path.join(data_dir, data_name, '%s_images.json' % f_mode) if os.path.isfile(img_list_file): print('Loading GQA-%s image ids...' % mode) with open(img_list_file, 'r') as f: self.image_ids = json.load(f) else: # Use only images having question-answer pairs in the balanced split print('Loading GQA-%s questions...' % mode) with open( os.path.join(data_dir, data_name, '%s_balanced_questions.json' % f_mode), 'rb') as f: Q_dict = json.load(f) self.image_ids = set() for v in Q_dict.values(): self.image_ids.add(v['imageId']) with open(img_list_file, 'w') as f: json.dump(list(self.image_ids), f) del Q_dict self.image_ids = sorted( list(self.image_ids )) # sort to make it consistent for different runs self.filenames = gqa.load_image_filenames(self.image_ids, mode, self.images_dir) if VG.train_sgs is None: print('Loading GQA-%s scene graphs...' % mode) with open( os.path.join(data_dir, data_name, 'sceneGraphs/train_sceneGraphs.json'), 'rb') as f: VG.train_sgs = json.load(f) with open( os.path.join(data_dir, data_name, 'sceneGraphs/val_sceneGraphs.json'), 'rb') as f: VG.val_sgs = json.load(f) train_sgs, val_sgs = VG.train_sgs, VG.val_sgs (self.ind_to_classes, self.ind_to_predicates, self.classes_to_ind, self.predicates_to_ind) = gqa.load_info(train_sgs, val_sgs) (self.split_mask, self.gt_boxes, self.gt_classes, self.relationships) = gqa.load_graphs( train_sgs if f_mode == 'train' else val_sgs, self.image_ids, self.classes_to_ind, self.predicates_to_ind, num_val_im=num_val_im, mode=mode, training_triplets=training_triplets, min_graph_size=self.min_graph_size, max_graph_size=self.max_graph_size, random_subset=False, filter_empty_rels=filter_empty_rels, filter_zeroshots=True, exclude_left_right=exclude_left_right) del train_sgs, val_sgs, self.image_ids # force to clean RAM else: raise NotImplementedError(VG.split) self.root = os.path.join(data_dir, data_name) if VG.split == 'stanford': self.filenames = load_image_filenames( self.image_file, self.images_dir) if VG.filenames is None else VG.filenames self.ind_to_classes, self.ind_to_predicates = load_info( self.dict_file) elif VG.split == 'vte': self.filenames = vte.load_image_filenames(self.images_dir) self.ind_to_classes, self.ind_to_predicates = vte.load_info() vte.close() if VG.filenames is None: VG.filenames = self.filenames # if self.mode == 'train': # print('\nind_to_classes', len(self.ind_to_classes), self.ind_to_classes) # print('\nind_to_predicates', len(self.ind_to_predicates), self.ind_to_predicates, '\n') self.triplet_counts = {} # c = 0 N_total, M_FG_total, M_BG_total, n_obj_lst, fg_lst, sp_lst = 0, 0, 0, [], [], [] for im in range(len(self.gt_classes)): n_obj = len(self.gt_classes[im]) n_obj_lst.append(n_obj) fg_lst.append(len(filter_dups(self.relationships[im]))) sp_lst.append(100 * float(fg_lst[-1]) / (n_obj * (n_obj - 1))) N_total += n_obj M_FG_total += fg_lst[-1] M_BG_total += n_obj * (n_obj - 1) for rel_ind, tri in enumerate(self.relationships[im]): o1, o2, R = tri tri_str = '{}_{}_{}'.format(self.gt_classes[im][o1], R, self.gt_classes[im][o2]) if training_triplets: if isinstance(training_triplets, dict): assert tri_str in training_triplets and training_triplets[ tri_str] <= self.n_shots, ( mode, len(training_triplets), tri_str, training_triplets[tri_str], self.n_shots) if tri_str not in self.triplet_counts: self.triplet_counts[tri_str] = 0 self.triplet_counts[tri_str] += 1 n_samples = len(self.gt_classes) # self.triplets = list(self.triplet_counts.keys()) counts = list(self.triplet_counts.values()) if mode == 'train': self.subj_pred_pairs, self.pred_obj_pairs = {}, {} for im in range(len(self.gt_classes)): for rel_ind, tri in enumerate(self.relationships[im]): o1, o2, R = tri tri_str = '{}_{}_{}'.format(self.gt_classes[im][o1], R, self.gt_classes[im][o2]) pair = '{}_{}'.format(self.gt_classes[im][o1], R) # man wearing if pair not in self.subj_pred_pairs: self.subj_pred_pairs[pair] = {} self.subj_pred_pairs[pair][ self.gt_classes[im][o2]] = self.triplet_counts[tri_str] pair = '{}_{}'.format( R, self.gt_classes[im][o2]) # on surfboard if pair not in self.pred_obj_pairs: self.pred_obj_pairs[pair] = {} self.pred_obj_pairs[pair][ self.gt_classes[im][o1]] = self.triplet_counts[tri_str] print('subj_pred_pairs, pred_obj_pairs', len(self.subj_pred_pairs), len(self.pred_obj_pairs)) print('{} images, {} triplets ({} unique triplets)'.format( # VG.split, # mode, len(self.gt_classes), # np.sum(self.split_mask), # len(self.split_mask), np.sum(counts), # total count len(self.triplet_counts))) # unique # np.min(counts), np.max(counts))) def stats(x): return 'min={:.1f}, max={:.1f}, mean={:.1f}, std={:.1f}'.format( np.min(x), np.max(x), np.mean(x), np.std(x)) print( 'Stats: {} objects ({}), {} FG edges ({}), {} BG edges ({:.2f} avg), graph density {}' .format(N_total, str(stats(n_obj_lst)), M_FG_total, str(stats(fg_lst)), M_BG_total, M_BG_total / n_samples, str(stats(sp_lst)))) assert len(self.split_mask) == len(self.filenames), (len( self.split_mask), len(self.filenames)) self.filenames = [ self.filenames[i] for i in np.where(self.split_mask)[0] ] # if self.mode == 'train': # print('example of triplets') # for tri in list(self.triplet_counts.keys())[:5]: # print(tri, self.triplet2str(tri), self.triplet_counts[tri]) self.rpn_rois = None self.torch_detector = torch_detector tform = [SquarePad()] if square_pad else [] if torch_detector: tform += [ToTensor()] else: tform += [ Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] self.transform_pipeline = Compose(tform)
def __init__( self, mode, # image_dir, instances_json, stuff_json=None, stuff_only=True, image_size=(64, 64), mask_size=16, normalize_images=True, max_samples=None, include_relationships=True, min_object_size=0.02, min_objects_per_image=3, max_objects_per_image=8, include_other=False, instance_whitelist=None, stuff_whitelist=None): """ A PyTorch Dataset for loading Coco and Coco-Stuff annotations and converting them to scene graphs on the fly. Inputs: - image_dir: Path to a directory where images are held - instances_json: Path to a JSON file giving COCO annotations - stuff_json: (optional) Path to a JSON file giving COCO-Stuff annotations - stuff_only: (optional, default True) If True then only iterate over images which appear in stuff_json; if False then iterate over all images in instances_json. - image_size: Size (H, W) at which to load images. Default (64, 64). - mask_size: Size M for object segmentation masks; default 16. - normalize_image: If True then normalize images by subtracting ImageNet mean pixel and dividing by ImageNet std pixel. - max_samples: If None use all images. Other wise only use images in the range [0, max_samples). Default None. - include_relationships: If True then include spatial relationships; if False then only include the trivial __in_image__ relationship. - min_object_size: Ignore objects whose bounding box takes up less than this fraction of the image. - min_objects_per_image: Ignore images which have fewer than this many object annotations. - max_objects_per_image: Ignore images which have more than this many object annotations. - include_other: If True, include COCO-Stuff annotations which have category "other". Default is False, because I found that these were really noisy and pretty much impossible for the system to model. - instance_whitelist: None means use all instance categories. Otherwise a list giving a whitelist of instance category names to use. - stuff_whitelist: None means use all stuff categories. Otherwise a list giving a whitelist of stuff category names to use. """ super(Dataset, self).__init__() self.mode = mode image_dir = join(COCO_PATH, "images", "%s2017" % mode) instances_json = join(COCO_PATH, "annotations", "instances_%s2017.json" % mode) stuff_json = join(COCO_PATH, "annotations", "stuff_%s2017.json" % mode) if stuff_only and stuff_json is None: print('WARNING: Got stuff_only=True but stuff_json=None.') print('Falling back to stuff_only=False.') self.image_dir = image_dir self.mask_size = mask_size self.max_samples = max_samples self.normalize_images = normalize_images self.include_relationships = include_relationships # self.set_image_size(image_size) tform = [] if self.is_train: tform.append( RandomOrder([ Grayscale(), Brightness(), Contrast(), Sharpness(), Hue(), ])) tform += [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) with open(instances_json, 'r') as f: instances_data = json.load(f) stuff_data = None if stuff_json is not None and stuff_json != '': with open(stuff_json, 'r') as f: stuff_data = json.load(f) self.image_ids = [] self.image_id_to_filename = {} self.image_id_to_size = {} for image_data in instances_data['images']: image_id = image_data['id'] filename = image_data['file_name'] width = image_data['width'] height = image_data['height'] self.image_ids.append(image_id) self.image_id_to_filename[image_id] = filename self.image_id_to_size[image_id] = (width, height) self.vocab = { 'object_name_to_idx': {}, 'pred_name_to_idx': {}, } object_idx_to_name = {} all_instance_categories = [] for category_data in instances_data['categories']: category_id = category_data['id'] category_name = category_data['name'] all_instance_categories.append(category_name) object_idx_to_name[category_id] = category_name self.vocab['object_name_to_idx'][category_name] = category_id all_stuff_categories = [] if stuff_data: for category_data in stuff_data['categories']: category_name = category_data['name'] category_id = category_data['id'] all_stuff_categories.append(category_name) object_idx_to_name[category_id] = category_name self.vocab['object_name_to_idx'][category_name] = category_id if instance_whitelist is None: instance_whitelist = all_instance_categories if stuff_whitelist is None: stuff_whitelist = all_stuff_categories category_whitelist = set(instance_whitelist) | set(stuff_whitelist) # Add object data from instances self.image_id_to_objects = defaultdict(list) for object_data in instances_data['annotations']: image_id = object_data['image_id'] _, _, w, h = object_data['bbox'] W, H = self.image_id_to_size[image_id] box_area = (w * h) / (W * H) box_ok = box_area > min_object_size object_name = object_idx_to_name[object_data['category_id']] category_ok = object_name in category_whitelist other_ok = object_name != 'other' or include_other if box_ok and category_ok and other_ok: self.image_id_to_objects[image_id].append(object_data) # Add object data from stuff if stuff_data: image_ids_with_stuff = set() for object_data in stuff_data['annotations']: image_id = object_data['image_id'] image_ids_with_stuff.add(image_id) _, _, w, h = object_data['bbox'] W, H = self.image_id_to_size[image_id] box_area = (w * h) / (W * H) box_ok = box_area > min_object_size object_name = object_idx_to_name[object_data['category_id']] category_ok = object_name in category_whitelist other_ok = object_name != 'other' or include_other if box_ok and category_ok and other_ok: self.image_id_to_objects[image_id].append(object_data) if stuff_only: new_image_ids = [] for image_id in self.image_ids: if image_id in image_ids_with_stuff: new_image_ids.append(image_id) self.image_ids = new_image_ids all_image_ids = set(self.image_id_to_filename.keys()) image_ids_to_remove = all_image_ids - image_ids_with_stuff for image_id in image_ids_to_remove: self.image_id_to_filename.pop(image_id, None) self.image_id_to_size.pop(image_id, None) self.image_id_to_objects.pop(image_id, None) # COCO category labels start at 1, so use 0 for __image__ self.vocab['object_name_to_idx']['__image__'] = 0 # Build object_idx_to_name name_to_idx = self.vocab['object_name_to_idx'] assert len(name_to_idx) == len(set(name_to_idx.values())) max_object_idx = max(name_to_idx.values()) idx_to_name = ['NONE'] * (1 + max_object_idx) for name, idx in self.vocab['object_name_to_idx'].items(): idx_to_name[idx] = name self.vocab['object_idx_to_name'] = idx_to_name # Prune images that have too few or too many objects new_image_ids = [] total_objs = 0 for image_id in self.image_ids: num_objs = len(self.image_id_to_objects[image_id]) total_objs += num_objs if min_objects_per_image <= num_objs <= max_objects_per_image: new_image_ids.append(image_id) # self.image_ids = new_image_ids self.ids = new_image_ids self.vocab['pred_idx_to_name'] = [ '__in_image__', 'left of', 'right of', 'above', 'below', 'inside', 'surrounding', ] self.vocab['pred_name_to_idx'] = {} for idx, name in enumerate(self.vocab['pred_idx_to_name']): self.vocab['pred_name_to_idx'][name] = idx # for object detection model to get number of class self.ind_to_classes = self.vocab['object_idx_to_name'] self.ind_to_id = {i: i for i in range(len(self.ind_to_classes))}
def __init__(self, mode, roidb_file=VG200_SGG_FN, dict_file=VG200_SGG_DICT_FN, image_file=IM_DATA_FN, saliency_file=SALIENCY_FN, depth_file=DEPTH_FN, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, captions_info_file=CAPTIONS_INFO, captions_file=CAPTIONS_FN, seq_per_img=5, use_proposals=False): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.roidb_file = roidb_file self.dict_file = dict_file self.image_file = image_file self.saliency_file = saliency_file self.depth_file = depth_file self.captions_info = captions_info_file self.captions_file = captions_file self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' self.split_mask, self.gt_boxes, self.gt_classes, self.relationships, self.key_rel_idxes = load_graphs( self.roidb_file, self.mode, num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, filter_non_overlap=self.filter_non_overlap and self.is_train, ) self.image_index = np.where(self.split_mask)[0] self.sal_h5 = h5py.File(self.saliency_file, 'r')['images'] # 108073 self.depth_h5 = h5py.File(self.depth_file, 'r')['images'] # 108073 self.ind_to_classes, self.ind_to_predicates, self.subset_dbidxes = load_info( dict_file) self.filenames, self.coco_ids = load_image_filenames(image_file) self.filenames = [self.filenames[i] for i in self.subset_dbidxes ] # re arange the filename self.filenames = [ self.filenames[i] for i in np.where(self.split_mask)[0] ] self.coco_ids = [self.coco_ids[i] for i in self.subset_dbidxes] # type: int self.coco_ids = [ self.coco_ids[i] for i in np.where(self.split_mask)[0] ] # load captions, already in vg200_keyrel order info = json.load(open(self.captions_info)) self.ix_to_word = info['ix_to_word'] self.vocab_size = len(self.ix_to_word) self.seq_per_img = seq_per_img print('vocab size is ', self.vocab_size) # open the hdf5 file print('DataLoader loading h5 file') self.h5_label_file = h5py.File(captions_file, 'r', driver='core') # load in the sequence data self.seq_size = self.h5_label_file['labels'].shape self.seq_length = self.seq_size[1] print('max sequence length in data is ', self.seq_length) # load the pointers in full to RAM (should be small enough) self.label_start_ix = self.h5_label_file['label_start_ix'][:] self.label_end_ix = self.h5_label_file['label_end_ix'][:] if mode == 'test': self.label_start_ix = self.label_start_ix[18720:] self.label_end_ix = self.label_end_ix[18720:] else: if num_val_im != -1: if mode == 'train': self.label_start_ix = self.label_start_ix[num_val_im:18720] self.label_end_ix = self.label_end_ix[num_val_im:18720] elif mode == 'val': self.label_start_ix = self.label_start_ix[:num_val_im] self.label_end_ix = self.label_end_ix[:num_val_im] else: self.label_start_ix = self.label_start_ix[:18720] self.label_end_ix = self.label_end_ix[:18720] if use_proposals: print("Loading proposals", flush=True) p_h5 = h5py.File(PROPOSAL_FN, 'r') rpn_rois = p_h5['rpn_rois'] rpn_scores = p_h5['rpn_scores'] rpn_im_to_roi_idx = np.array( p_h5['im_to_roi_idx'][self.split_mask]) rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) self.rpn_rois = [] for i in range(len(self.filenames)): rpn_i = np.column_stack(( rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], )) self.rpn_rois.append(rpn_i) else: self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) self.sal_transform_pipeline = Compose( [Resize(int(IM_SCALE / 16)), ToTensor()]) self.depth_transform_pipeline = Compose( [Resize(int(IM_SCALE)), ToTensor()])
def __init__(self, mode, num_im=-1, num_val_im=5000): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize output = open(os.path.join(DATA_PATH, "hico_" + mode + ".pkl"), 'rb') self.roidb_file = pickle.load(output) output.close() # self.dict_file = dict_file # self.image_file = image_file # self.filter_non_overlap = filter_non_overlap # self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' # # self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs( # self.roidb_file, self.mode, num_im, num_val_im=num_val_im, # filter_empty_rels=filter_empty_rels, # filter_non_overlap=self.filter_non_overlap and self.is_train, # ) self.filenames = load_image_filenames( self.roidb_file, os.path.join(DATA_PATH, 'hico/images/' + mode)) # self.filenames = [self.filenames[i] for i in np.where(self.split_mask)[0]] self.gt_boxes,self.gt_classes,self.gt_human_boxes,self.gt_human_classes,self.gt_hoi_classes\ =load_boxes(self.roidb_file) self.ind_to_classes, self.ind_to_predicates = load_info( self.roidb_file) # if use_proposals: # print("Loading proposals", flush=True) # p_h5 = h5py.File(PROPOSAL_FN, 'r') # rpn_rois = p_h5['rpn_rois'] # rpn_scores = p_h5['rpn_scores'] # rpn_im_to_roi_idx = np.array(p_h5['im_to_roi_idx'][self.split_mask]) # rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) # # self.rpn_rois = [] # for i in range(len(self.filenames)): # rpn_i = np.column_stack(( # rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], # rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], # )) # self.rpn_rois.append(rpn_i) # else: # self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def __init__(self, mode, filter_duplicate_rels=True, mask_resolution=28, use_for_bias=False): """ :param mode: train2014 or val2014 """ self.mask_resolution = mask_resolution self.mode = mode self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' tform = [] if self.is_train: tform.append(RandomOrder([ Grayscale(), Brightness(), Contrast(), Sharpness(), Hue(), ])) tform += [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) #image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'image/'+mode)) if name.endswith('.jpg')] error_list = [line.strip()[:-4] for line in open(os.path.join(PIC_PATH, mode+'_error_list.txt'))] image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'image/' + mode)) if name.endswith('.jpg') and name[:-4] not in error_list] image_names.sort(key=str.lower) if self.mode != 'test': semantic_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'segmentation/'+mode+'/semantic')) if name.endswith('.png') and name[:-4] not in error_list] instance_names = [name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'segmentation/'+mode+'/instance')) if name.endswith('.png') and name[:-4] not in error_list] semantic_names.sort(key=str.lower) instance_names.sort(key=str.lower) assert image_names == semantic_names assert image_names == instance_names # image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_OFFLINE_PATH, 'val/obj_feat/')) if # name.endswith('.npy')] # image_names.sort(key=str.lower) self.img_names = image_names print(len(self.img_names)) rel_cats = json.load(open(os.path.join(PIC_PATH,'categories_list/relation_categories.json'))) self.ind_to_predicates = [rel_cat['name'] for rel_cat in rel_cats] cls_cats = json.load(open(os.path.join(PIC_PATH, 'categories_list/label_categories.json'))) self.ind_to_classes = [cls_cat['name'] for cls_cat in cls_cats] if self.mode != 'test': self.img2rels = dict() img_relations = json.load(open(os.path.join(PIC_PATH, 'relation/relations_'+self.mode+'.json'))) for img_relation in img_relations: rels = [] for index, rel in enumerate(img_relation['relations']): temp = np.array([[rel['subject']-1, rel['object']-1, rel['relation']]], dtype=np.int32) rels.append(temp) rels = np.concatenate(rels, axis=0) self.img2rels[img_relation['name'][:-4]] = rels print('====================') print(self.ind_to_classes) print(self.ind_to_predicates) self.id_to_ind = {ind: ind for ind, name in enumerate(self.ind_to_classes)} self.ind_to_id = {x: y for y, x in self.id_to_ind.items()} #self.create_coco_format() if use_for_bias: dataset = json.load(open(os.path.join(PIC_PATH, 'train.json'))) anns, imgs = {}, {} imgToAnns = defaultdict(list) for img in dataset['images']: imgs[img['id']] = img['file_name'][:-4] for ann in dataset['annotations']: imgToAnns[imgs[ann['image_id']]].append(ann) self.img2boxes = dict() self.img2classes = dict() for img_name, anns in imgToAnns.items(): gt_box = [] gt_class = [] for ann in anns: gt_box.append(np.array([ann['bbox']])) gt_class.append(np.array([ann['category_id']])) gt_box = np.concatenate(gt_box, axis=0) gt_class = np.concatenate(gt_class, axis=0) self.img2boxes[img_name] = gt_box self.img2classes[img_name] = gt_class
def __init__(self, mode, filter_duplicate_rels=True, mask_resolution=28): """ :param mode: train2014 or val2014 """ self.mask_resolution = mask_resolution self.mode = mode self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' tform = [] if self.is_train: tform.append( RandomOrder([ Grayscale(), Brightness(), Contrast(), Sharpness(), Hue(), ])) tform += [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform) image_names = [ name[:-4] for name in os.listdir(os.path.join(PIC_PATH, 'image/' + mode)) if name.endswith('.jpg') ] if self.mode != 'test': semantic_names = [ name[:-4] for name in os.listdir( os.path.join(PIC_PATH, 'segmentation/' + mode + '/semantic')) if name.endswith('.png') ] instance_names = [ name[:-4] for name in os.listdir( os.path.join(PIC_PATH, 'segmentation/' + mode + '/instance')) if name.endswith('.png') ] image_names.sort(key=str.lower) semantic_names.sort(key=str.lower) instance_names.sort(key=str.lower) assert image_names == semantic_names assert image_names == instance_names # image_names = [name[:-4] for name in os.listdir(os.path.join(PIC_OFFLINE_PATH, 'val/obj_feat/')) if # name.endswith('.npy')] # image_names.sort(key=str.lower) self.img_names = image_names rel_cats = json.load( open( os.path.join(PIC_PATH, 'categories_list/relation_categories.json'))) self.ind_to_predicates = [rel_cat['name'] for rel_cat in rel_cats] cls_cats = json.load( open( os.path.join(PIC_PATH, 'categories_list/label_categories.json'))) self.ind_to_classes = [cls_cat['name'] for cls_cat in cls_cats] if self.mode != 'test': self.img2rels = dict() img_relations = json.load( open( os.path.join(PIC_PATH, 'relation/relations_' + self.mode + '.json'))) for img_relation in img_relations: rels = [] for index, rel in enumerate(img_relation['relations']): temp = np.array([[ rel['subject'] - 1, rel['object'] - 1, rel['relation'] ]], dtype=np.int32) rels.append(temp) rels = np.concatenate(rels, axis=0) self.img2rels[img_relation['name'][:-4]] = rels print('====================') print(self.ind_to_classes) print(self.ind_to_predicates) self.id_to_ind = { ind: ind for ind, name in enumerate(self.ind_to_classes) } self.ind_to_id = {x: y for y, x in self.id_to_ind.items()}