def get_the_boxes(image_filename): file_with_boxes = os.path.join( image_path, get_box_file_for_image_file(image_filename)) # get image size - recompute boxes boxes = read_boxes_from(file_with_boxes) img = read_image(os.path.join(image_path, image_filename)) imsize = FeatureMapSize(img=img) # choose the correct box if have two of them # From INSTRE documentation: # Specially, for each tuple-class in INSTRE-M, there are two corresponding object classes in INSTRE-S1. # In each annotation file for a INSTRE-M image, the first line records the object labeled as [a] in INSTRE-S1 # and the second line records the object labeled as [b] in INSTRE-S1. # # CAUTION! the matlab file has boxes in x1, y1, x2, y2, but the .txt files in x, y, w, h query_path_split = query_image_path_original.split("/") image_filename_split = image_filename.split("/") if query_path_split[0].lower( ) == "instre-s1" and image_filename_split[0].lower( ) == "instre-m": assert len( boxes ) == 2, f"INSTRE-M images should have exactly two boxes, but have {boxes}" assert query_path_split[1][2] in ["a", "b"] i_box = 0 if query_path_split[1][2] == "a" else 1 boxes = [convert_the_box_from_xywh(boxes[i_box], imsize)] elif query_path_split[0].lower() == "instre-s1" and image_filename_split[0].lower() == "instre-s1" or \ query_path_split[0].lower() == "instre-s2" and image_filename_split[0].lower() == "instre-s2": boxes = [ convert_the_box_from_xywh(box, imsize) for box in boxes ] else: raise RuntimeError( f"Should not be happening, query {query_image_path_original}, image {image_filename}, boxes {boxes}" ) return boxes
def __init__(self, dataset_src): imdb.__init__(self, dataset_src.name) self._gtboxframe = dataset_src.gtboxframe self._image_size = dataset_src.image_size self._gt_path = dataset_src.gt_path self._image_path = dataset_src.image_path self._image_ids = dataset_src.image_ids self._image_file_names = dataset_src.image_file_names self._num_images = len(self._image_ids) self._num_boxes = len(self._gtboxframe) self._image_index = list(range(self._num_images)) # add background class # '__background__' - always index 0 bkg_tag = '__background__' self._classes = (bkg_tag, ) + tuple( self._gtboxframe["classid"].unique()) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) # Default to roidb handler self._roidb_handler = self.gt_roidb self._salt = str(uuid.uuid4()) self._comp_id = 'comp4' self.config = { 'cleanup': True, 'use_salt': True, 'use_diff': False, 'matlab_eval': False, 'rpn_file': None, 'min_size': 2 } assert os.path.exists( self._gt_path), 'GT path does not exist: {}'.format(self._gt_path) assert os.path.exists( self._image_path), 'Image path does not exist: {}'.format( self._image_path) self.cat_data = {} for i in self._class_to_ind.values(): class_id = self._classes[i] if class_id != bkg_tag: class_entries = self._gtboxframe[self._gtboxframe["classid"] == class_id] gt_file = class_entries['classfilename'].unique() assert len(gt_file) == 1 gt_file = gt_file[0] gt_file = os.path.join(self._gt_path, gt_file) curimage = read_image(gt_file) height, width = curimage.height, curimage.width self.cat_data[i] = [{ 'image_path': gt_file, 'boxes': [0, 0, width, height] }]
def _read_dataset_gt_images(self): self.gt_images_per_classid = OrderedDict() for index, row in self.gtboxframe.iterrows(): gt_file = row["classfilename"] class_id = row["classid"] if class_id not in self.gt_images_per_classid: # if the GT image is not read save it to the dataset self.gt_images_per_classid[class_id] = read_image(os.path.join(self.gt_path, gt_file)) self.logger.info("Read {0} GT images".format(len(self.gt_images_per_classid)))
def _get_dataset_image_by_id(self, image_id): assert image_id in self.image_path_per_image_id, "Can work only with checked images" if image_id not in self.image_per_image_id : img_path = self.image_path_per_image_id[image_id] img = read_image(img_path) img_size = FeatureMapSize(img=img) if max(img_size.w, img_size.h) != self.image_size: h, w = get_image_size_after_resize_preserving_aspect_ratio(img_size.h, img_size.w, self.image_size) img = img.resize((w, h), resample=Image.ANTIALIAS) # resize images in case they were not of the correct size on disk if self.cache_images: self.image_per_image_id[image_id] = img else: img = self.image_per_image_id[image_id] return img
def _load_annotation(self, index): imageframe = self._gtboxframe[self._gtboxframe['imageid'] == self._image_ids[index]] # get the image curimagefilename = self._image_file_names[index] curimagepath = os.path.join(self._image_path, curimagefilename) curimage = read_image(curimagepath) height, width = curimage.height, curimage.width # create roidb entry roi_rec = {} roi_rec['gt_classes'] = [] boxes = [] difficult_flag = [] seg_areas = [] overlaps = np.zeros((len(imageframe), self.num_classes), dtype=np.float32) for ix, gt_index in enumerate(imageframe.index): lx = np.int32(imageframe.loc[gt_index, 'lx'] * width) rx = np.int32(imageframe.loc[gt_index, 'rx'] * width) ty = np.int32(imageframe.loc[gt_index, 'ty'] * height) by = np.int32(imageframe.loc[gt_index, 'by'] * height) gt_class = self._class_to_ind[imageframe.loc[gt_index, 'classid']] seg_areas.append((rx - lx) * (by - ty)) boxes.append([lx, ty, rx, by]) roi_rec['gt_classes'].append(np.int32(gt_class)) overlaps[ix, gt_class] = 1.0 difficult_flag.append(imageframe.loc[gt_index, 'difficult']) roi_rec['boxes'] = np.array(boxes, dtype=np.int32) roi_rec['height'] = height roi_rec['width'] = width roi_rec['flipped'] = False roi_rec['gt_classes'] = np.asarray(roi_rec['gt_classes'], dtype=np.int32) roi_rec['gt_ishard'] = np.asarray(difficult_flag, dtype=np.int32) roi_rec['seg_areas'] = np.asarray(seg_areas, dtype=np.float32) roi_rec['gt_overlaps'] = scipy.sparse.csr_matrix(overlaps) return roi_rec
def build_instre_dataset(data_path, name, eval_scale=None, cache_images=False, no_image_reading=False, logger_prefix="OS2D"): logger = logging.getLogger(f"{logger_prefix}.dataset") logger.info( "Preparing the INSTRE dataset: version {0}, eval scale {1}, image caching {2}" .format(name, eval_scale, cache_images)) # INSTRE dataset was downloaded from here: ftp://ftp.irisa.fr/local/texmex/corpus/instre/instre.tar.gz # Splits by Iscen et al. (2016) were downloaded from here: ftp://ftp.irisa.fr/local/texmex/corpus/instre/gnd_instre.mat image_size = 1000 import scipy.io as sio dataset_path = os.path.join(data_path, "instre") annotation_file = os.path.join(dataset_path, "gnd_instre.mat") annotation_data = sio.loadmat(annotation_file) # annotation_data["qimlist"][0] - 1250 queries - each in annotation_data["qimlist"][0][i][0] file, root - os.path.join(data_path, "instre") # annotation_data["imlist"][0] - 27293 database images - each in annotation_data["imlist"][0][i][0] file, root - os.path.join(data_path, "instre") # annotation_data["gnd"][0] - 1250 annotations for all queries: # annotation_data["gnd"][0][i][0] - indices of positives in annotation_data["imlist"][0] (WARNING - 1-based) # annotation_data["gnd"][0][i][1] - bbox of the query object, one of the boxes from ent of *.txt # images in subsets INSTRE-S1 and INSTRE-S2 contain exactly one object # images in the subset INSTRE-M contain two objects each image_path = dataset_path gt_path = os.path.join(dataset_path, "classes") gt_image_path = os.path.join(gt_path, "images") mkdir(gt_image_path) classdatafile = os.path.join(gt_path, "instre.csv") if not os.path.isfile(classdatafile): logger.info( f"Did not find data file {classdatafile}, creating it from INSTRE source data" ) # create the annotation file from the raw dataset annotation_data["qimlist"] = annotation_data["qimlist"].flatten() annotation_data["imlist"] = annotation_data["imlist"].flatten() annotation_data["gnd"] = annotation_data["gnd"].flatten() num_classes = len(annotation_data["qimlist"]) gtboxframe = [] # will be creating dataframe from a list of dicts for i_class in range(num_classes): query_image_path_original = str( annotation_data["qimlist"][i_class][0]) if query_image_path_original.split("/")[0].lower() == "instre-m": # Query boxes from subset "INSTRE-M" contain both objects, so it is not clear how to use them logger.info( f"Skipping query {i_class}: {query_image_path_original}") continue logger.info(f"Adding query {i_class}: {query_image_path_original}") query_bbox = annotation_data["gnd"][i_class][1].flatten() query_positives = annotation_data["gnd"][i_class][0].flatten( ) - 1 # "-1" because of the original MATLAB indexing classid = i_class classfilename = f"{i_class:05d}_{'_'.join(query_image_path_original.split('/'))}" if not os.path.isfile(classfilename): query_img = read_image( os.path.join(dataset_path, query_image_path_original)) query_img_cropped_box = query_img.crop(query_bbox) query_img_cropped_box.save( os.path.join(gt_image_path, classfilename)) def convert_the_box_from_xywh(box, imsize): lx = float(box[0]) / imsize.w ty = float(box[1]) / imsize.h rx = lx + float(box[2]) / imsize.w by = ty + float(box[3]) / imsize.h return lx, ty, rx, by def read_boxes_from(file_with_boxes): with open(file_with_boxes, "r") as fo: lines = fo.readlines() boxes = [[int(s) for s in line.split(" ")] for line in lines if line] return boxes def get_box_file_for_image_file(image_filename): return image_filename.split(".")[0] + ".txt" def get_the_boxes(image_filename): file_with_boxes = os.path.join( image_path, get_box_file_for_image_file(image_filename)) # get image size - recompute boxes boxes = read_boxes_from(file_with_boxes) img = read_image(os.path.join(image_path, image_filename)) imsize = FeatureMapSize(img=img) # choose the correct box if have two of them # From INSTRE documentation: # Specially, for each tuple-class in INSTRE-M, there are two corresponding object classes in INSTRE-S1. # In each annotation file for a INSTRE-M image, the first line records the object labeled as [a] in INSTRE-S1 # and the second line records the object labeled as [b] in INSTRE-S1. # # CAUTION! the matlab file has boxes in x1, y1, x2, y2, but the .txt files in x, y, w, h query_path_split = query_image_path_original.split("/") image_filename_split = image_filename.split("/") if query_path_split[0].lower( ) == "instre-s1" and image_filename_split[0].lower( ) == "instre-m": assert len( boxes ) == 2, f"INSTRE-M images should have exactly two boxes, but have {boxes}" assert query_path_split[1][2] in ["a", "b"] i_box = 0 if query_path_split[1][2] == "a" else 1 boxes = [convert_the_box_from_xywh(boxes[i_box], imsize)] elif query_path_split[0].lower() == "instre-s1" and image_filename_split[0].lower() == "instre-s1" or \ query_path_split[0].lower() == "instre-s2" and image_filename_split[0].lower() == "instre-s2": boxes = [ convert_the_box_from_xywh(box, imsize) for box in boxes ] else: raise RuntimeError( f"Should not be happening, query {query_image_path_original}, image {image_filename}, boxes {boxes}" ) return boxes for image_id in query_positives: # add one bbox to the annotation # required_columns = ["imageid", "imagefilename", "classid", "classfilename", "gtbboxid", "difficult", "lx", "ty", "rx", "by"] image_file_name = str(annotation_data["imlist"][image_id][0]) boxes = get_the_boxes(image_file_name) for box in boxes: item = OrderedDict() item["gtbboxid"] = len(gtboxframe) item["classid"] = classid item["classfilename"] = classfilename item["imageid"] = image_id assert annotation_data["imlist"][image_id].size == 1 item["imagefilename"] = image_file_name item["difficult"] = 0 item["lx"], item["ty"], item["rx"], item["by"] = box gtboxframe.append(item) gtboxframe = pd.DataFrame(gtboxframe) gtboxframe.to_csv(classdatafile) gtboxframe = read_annotation_file(classdatafile) # get these automatically from gtboxframe image_ids = None image_file_names = None # define a subset split (using closure) subset_name = name.lower() assert subset_name.startswith("instre"), "" subset_name = subset_name[len("instre"):] subsets = [ "all", "s1-train", "s1-val", "s1-test", "s2-train", "s2-val", "s2-test" ] found_subset = False for subset in subsets: if subset_name == "-" + subset: found_subset = subset break assert found_subset, "Could not identify subset {}".format(subset_name) if subset == "all": pass elif subset in ["s1-train", "s1-val", "s1-test"]: gtboxframe = gtboxframe[gtboxframe.classfilename.str.contains( "INSTRE-S1")] classes = gtboxframe.classfilename.drop_duplicates() if subset == "s1-train": classes = classes[:len(classes) * 75 // 100] # first 75% elif subset == "s1-test": classes = classes[len(classes) * 8 // 10:] # last 20% else: # "s1-val" classes = classes[len(classes) * 75 // 100:len(classes) * 8 // 10] # 5% gtboxframe = gtboxframe[gtboxframe.classfilename.isin(classes)] elif subset in ["s2-train", "s2-val", "s2-test"]: gtboxframe = gtboxframe[gtboxframe.classfilename.str.contains( "INSTRE-S2")] classes = gtboxframe.classfilename.drop_duplicates() if subset == "s2-train": classes = classes[:len(classes) * 75 // 100] # first 75% elif subset == "s2-test": classes = classes[len(classes) * 8 // 10:] # last 20% else: # "s2-val" classes = classes[len(classes) * 75 // 100:len(classes) * 8 // 10] # 5% gtboxframe = gtboxframe[gtboxframe.classfilename.isin(classes)] else: raise (RuntimeError("Unknown subset {0}".format(subset))) dataset = DatasetOneShotDetection(gtboxframe, gt_image_path, image_path, name, image_size, eval_scale, image_ids=image_ids, image_file_names=image_file_names, cache_images=cache_images, no_image_reading=no_image_reading, logger_prefix=logger_prefix) return dataset
def build_imagenet_test_episodes(subset_name, data_path, logger): episode_id = int(subset_name.split('-')[-1]) epi_data_name = "epi_inloc_in_domain_1_5_10_500" image_size = 1000 dataset_path = os.path.join(data_path, "ImageNet-RepMet") roidb_path = os.path.join(dataset_path, "RepMet_CVPR2019_data", "data", "Imagenet_LOC", "voc_inloc_roidb.pkl") with open(roidb_path, 'rb') as fid: roidb = pickle.load(fid, encoding='latin1') episodes_path = os.path.join(dataset_path, "RepMet_CVPR2019_data", "data", "Imagenet_LOC", "episodes", f"{epi_data_name}.pkl") with open(episodes_path, 'rb') as fid: episode_data = pickle.load(fid, encoding='latin1') logger.info(f"Extracting episode {episode_id} out of {len(episode_data)}") episode = episode_data[episode_id] dataset_image_path = os.path.join(data_path, "ImageNet-RepMet", "ILSVRC") SWAP_IMG_PATH_SRC = "/dccstor/leonidka1/data/imagenet/ILSVRC/" def _get_image_path(image_path): image_path = image_path.replace(SWAP_IMG_PATH_SRC, "") return image_path # episode["epi_cats"] - list of class ids # episode["query_images"] - list of path to the episode images # episode["epi_cats_names"] - list of names of the episode classes # episode["train_boxes"] - list of box data about class boxes num_classes = len(episode["epi_cats"]) gt_path = os.path.join(dataset_path, epi_data_name) gt_path = os.path.join(gt_path, f"classes_episode_{episode_id}") gt_image_path = os.path.join(gt_path, "images") mkdir(gt_image_path) classdatafile = os.path.join( gt_path, f"classes_{epi_data_name}_episode_{episode_id}.csv") if not os.path.isfile(classdatafile): logger.info( f"Did not find data file {classdatafile}, creating it from the RepMet source data" ) # create the annotation file from the raw dataset gtboxframe = [] # will be creating dataframe from a list of dicts gt_filename_by_id = {} for i_class in range(len(episode["train_boxes"])): train_boxes_data = episode["train_boxes"][i_class] class_id = train_boxes_data[0] assert class_id in episode[ "epi_cats"], f"class_id={class_id} should be listed in episode['epi_cats']={episode['epi_cats']}" query_image_path_original = _get_image_path(train_boxes_data[2]) query_bbox = train_boxes_data[3] query_bbox = query_bbox.flatten() classfilename = f"{class_id:05d}_{'_'.join(query_image_path_original.split('/'))}" if class_id not in gt_filename_by_id: logger.info( f"Adding query #{len(gt_filename_by_id)} - {class_id}: {query_image_path_original}" ) if not os.path.isfile(classfilename) or True: query_img = read_image( os.path.join(dataset_image_path, query_image_path_original)) query_img_cropped_box = query_img.crop(query_bbox) query_img_cropped_box.save( os.path.join(gt_image_path, classfilename)) gt_filename_by_id[class_id] = classfilename else: logger.info( f"WARNING: class {class_id} has multiple entries in GT image {query_image_path_original}, using the first box as GT" ) for class_id in episode["epi_cats"]: if class_id not in gt_filename_by_id: logger.info( f"WARNING: ground truth for class {class_id} not found in episode {episode_id}" ) def convert_the_box_to_relative(box, imsize): lx = float(box[0]) / imsize.w ty = float(box[1]) / imsize.h rx = float(box[2]) / imsize.w by = float(box[3]) / imsize.h return lx, ty, rx, by def find_image_path_in_roidb(image_file_name, roidb): for i_image, im_data in enumerate(roidb["roidb"]): if im_data["flipped"]: raise RuntimeError( f"Image {i_image} data {im_data} has flipped flag on") if im_data["image"] == image_file_name: return i_image return None for image_file_name in episode["query_images"]: # add one bbox to the annotation # required_columns = ["imageid", "imagefilename", "classid", "classfilename", "gtbboxid", "difficult", "lx", "ty", "rx", "by"] image_id = find_image_path_in_roidb(image_file_name, roidb) im_data = roidb["roidb"][image_id] image_file_name = _get_image_path(image_file_name) imsize = FeatureMapSize(w=int(im_data["width"]), h=int(im_data["height"])) boxes_xyxy = im_data["boxes"] classes = im_data["gt_classes"] for box, class_id in zip(boxes_xyxy, classes): if class_id in gt_filename_by_id: item = OrderedDict() item["imageid"] = int(image_id) item["imagefilename"] = image_file_name item["classid"] = int(class_id) item["classfilename"] = gt_filename_by_id[class_id] item["gtbboxid"] = len(gtboxframe) item["difficult"] = 0 item["lx"], item["ty"], item["rx"], item[ "by"] = convert_the_box_to_relative(box, imsize) gtboxframe.append(item) gtboxframe = pd.DataFrame(gtboxframe) gtboxframe.to_csv(classdatafile) gtboxframe = pd.read_csv(classdatafile) return gtboxframe, gt_image_path, dataset_image_path, image_size
def visualize_detections(self, i_image, gt=None, dets=None, num_dets_to_show=30): # get the image imageframe = self._gtboxframe[self._gtboxframe['imageid'] == self._image_ids[i_image]] curimagefilename = imageframe['imagefilename'].unique() assert len(curimagefilename) == 1 curimagefilename = curimagefilename[0] curimagepath = os.path.join(self._image_path, curimagefilename) curimage = read_image(curimagepath) # add GT boxes if gt is not None: boxes_gt = gt.bbox_xyxy colors_gt = ['y'] * boxes_gt.shape[0] labels_gt = [ str(self._classes[lb]) for lb in gt.get_field("labels") ] scores_gt = [np.nan] * boxes_gt.shape[0] # add detections if dets is not None: box_ids = dets.get_field("scores").topk(num_dets_to_show)[1] dets = dets[box_ids] boxes_dets = dets.bbox_xyxy colors_dets = ['m'] * boxes_dets.shape[0] labels_dets = [ str(self._classes[lb]) for lb in dets.get_field("labels") ] scores_dets = [float(s) for s in dets.get_field("scores")] # merge data if gt is not None and dets is not None: vis_boxes = torch.cat([boxes_gt, boxes_dets], 0) vis_labels = labels_gt + labels_dets vis_scores = scores_gt + scores_dets vis_colors = colors_gt + colors_dets elif gt is not None: vis_boxes = boxes_gt vis_labels = labels_gt vis_scores = scores_gt vis_colors = colors_gt elif dets is not None: vis_boxes = boxes_dets vis_labels = labels_dets vis_scores = scores_dets vis_colors = colors_dets else: vis_boxes = None vis_labels = None vis_scores = None vis_colors = None # show image vis_image(curimage, boxes=vis_boxes, label_names=vis_labels, scores=vis_scores, colors=vis_colors, showfig=True)