def get_meta_from_filename(self, filename, name): coco = self.get_entry_from_filename(filename) imid = int(filename.split('_')[-1]) catid = coco.getCatIds(catNms=name)[0] return OrderedDict([ ('id', str(filename)), ('imid', imid), ('category_id', catid), ('category', str(name)), ('supercategory', coco.cats[catid]['supercategory']), # ('filename', os.path.join(self.home(self.SUBDIR, self.IMAGEDIR), # filename + '.jpg')) ])
def coco(writer, name_index, profile, row, verify=False): root = os.path.expanduser(os.path.expandvars(row['root'])) year = str(row['year']) name = profile + year path = os.path.join(root, 'annotations', 'instances_%s.json' % name) if not os.path.exists(path): tf.logging.warn(path + ' not exists') return False import pycocotools.coco coco = pycocotools.coco.COCO(path) catIds = coco.getCatIds(catNms=list(name_index.keys())) cats = coco.loadCats(catIds) id_index = dict((cat['id'], name_index[cat['name']]) for cat in cats) imgIds = coco.getImgIds() path = os.path.join(root, name) imgs = coco.loadImgs(imgIds) _imgs = list(filter(lambda img: os.path.exists(os.path.join(path, img['file_name'])), imgs)) if len(imgs) > len(_imgs): tf.logging.warn('%d of %d images not exists' % (len(imgs) - len(_imgs), len(imgs))) cnt_noobj = 0 for img in tqdm.tqdm(_imgs): annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) if len(anns) <= 0: cnt_noobj += 1 continue imagepath = os.path.join(path, img['file_name']) width, height = img['width'], img['height'] imageshape = [height, width, 3] objects_class = np.array([id_index[ann['category_id']] for ann in anns], dtype=np.int64) objects_coord = [ann['bbox'] for ann in anns] objects_coord = [(x, y, x + w, y + h) for x, y, w, h in objects_coord] objects_coord = np.array(objects_coord, dtype=np.float32) if verify: if not verify_coords(objects_coord, imageshape): tf.logging.error('failed to verify coordinates of ' + imagepath) continue if not verify_image_jpeg(imagepath, imageshape): tf.logging.error('failed to decode ' + imagepath) continue assert len(objects_class) == len(objects_coord) example = tf.train.Example(features=tf.train.Features(feature={ 'imagepath': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.compat.as_bytes(imagepath)])), 'imageshape': tf.train.Feature(int64_list=tf.train.Int64List(value=imageshape)), 'objects': tf.train.Feature(bytes_list=tf.train.BytesList(value=[objects_class.tostring(), objects_coord.tostring()])), })) writer.write(example.SerializeToString()) if cnt_noobj > 0: tf.logging.warn('%d of %d images have no object' % (cnt_noobj, len(_imgs))) return True
def get_category_info(category_names, coco): """ Get the coco category info and image ids for given category names. :param category_names: List of category names. :param coco: Pycocotools COCO instance. :return: Dict with: category_info['person'] = (id, [image_ids, ...]) """ category_ids = zip(category_names, coco.getCatIds(catNms=category_names)) info = {} for category_name, category_id in category_ids: image_ids = coco.getImgIds(catIds=category_id) info[category_name] = (category_id, image_ids) return info
def get_keypoint_names(parameters): """ Return the keypoint names for persons category. :param parameters: Dict with: parameters = { 'coco-data-dir': pathlib.Path(...), 'annotations': Path to annotations file with keypoints } :return: List of keypoint names. """ coco = pycocotools.coco.COCO( str(parameters['coco-data-dir'] / parameters['annotations'])) category_id = coco.getCatIds('person') category_info = coco.loadCats(category_id)[0] keypoint_names = category_info['keypoints'] return keypoint_names
dataType = 'val2014' dirImg = '%s/%s' % (dataDir, dataType) if not os.path.isdir(dirImg): raise Exception('Cant find directory with images [%s]' % dirImg) dirOut = '%s/%s-food2' % (dataDir, dataType) makeDirIfNotExists(pathToDir=dirOut, isCleanIfExists=False) # annFile = '%s/annotations/instances_%s.json' % (dataDir, dataType) imgDir = '%s/%s' % (dataDir, dataType) if not os.path.isdir(imgDir): raise Exception('Cant find directory with MS-COCO images [%s]' % dataDir) # coco = COCO(annFile) # listCatsFoodIdx = coco.getCatIds(supNms=['food']) assert (set(listCatsFoodIdx) == set(listSortedFoodIds)) for ii, idx in enumerate(listCatsFoodIdx): tmpCat = coco.loadCats(ids=idx)[0] print('%d [%d] : %s (%s)' % (ii, idx, tmpCat['name'], tmpCat['supercategory'])) # tmpDictFoodImgIds = {} for ii, idx in enumerate(listSortedFoodIds): tmpImgIds = coco.getImgIds(catIds=idx) for timgId in tmpImgIds: if tmpDictFoodImgIds.has_key(timgId): tmpDictFoodImgIds[timgId].append(idx) else: tmpDictFoodImgIds[timgId] = [idx] setAllFoodImgIds = sorted(tmpDictFoodImgIds.keys())
STD = np.array([0.289, 0.274, 0.278]).astype(np.float32) max_objs = 32 _valid_ids = [1] class_name = ['__background__', 'person'] _data_rng = np.random.RandomState(123) _eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32) _eig_vec = np.array([[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]], dtype=np.float32) coco = coco.COCO(anno_path) images = coco.getImgIds() catIds = coco.getCatIds(class_name[-1]) assert catIds == _valid_ids images = coco.getImgIds(images, catIds) num_samples = len(images) index = np.random.randint(num_samples) img_id = images[index] file_name = coco.loadImgs(ids=[img_id])[0]['file_name'] img_path = os.path.join(img_dir, file_name) ann_ids = coco.getAnnIds(imgIds=[img_id]) anns = coco.loadAnns(ids=ann_ids) anns = list( filter(lambda x: x['category_id'] in _valid_ids and x['iscrowd'] != 1, anns))
def cache(config, path, category_index): phase = os.path.splitext(os.path.basename(path))[0] data = [] for i, row in pd.read_csv(os.path.splitext(__file__)[0] + '.tsv', sep='\t').iterrows(): logging.info('loading data %d (%s)' % (i, ', '.join([k + '=' + str(v) for k, v in row.items()]))) root = os.path.expanduser(os.path.expandvars(row['root'])) year = str(row['year']) suffix = phase + year path = os.path.join(root, 'annotations', 'instances_%s.json' % suffix) if not os.path.exists(path): logging.warning(path + ' not exists') continue coco = pycocotools.coco.COCO(path) catIds = coco.getCatIds(catNms=list(category_index.keys())) cats = coco.loadCats(catIds) id_index = dict( (cat['id'], category_index[cat['name']]) for cat in cats) imgIds = coco.getImgIds() path = os.path.join(root, suffix) imgs = coco.loadImgs(imgIds) _imgs = list( filter( lambda img: os.path.exists(os.path.join( path, img['file_name'])), imgs)) if len(imgs) > len(_imgs): logging.warning('%d of %d images not exists' % (len(imgs) - len(_imgs), len(imgs))) for img in tqdm.tqdm(_imgs): annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) if len(anns) <= 0: continue path = os.path.join(path, img['file_name']) width, height = img['width'], img['height'] bbox = np.array([ann['bbox'] for ann in anns], dtype=np.float32) yx_min = bbox[:, 1::-1] hw = bbox[:, -1:1:-1] yx_max = yx_min + hw cls = np.array([id_index[ann['category_id']] for ann in anns], dtype=np.int) difficult = np.zeros(cls.shape, dtype=np.uint8) try: if config.getboolean('cache', 'verify'): size = (height, width) image = cv2.imread(path) assert image is not None assert image.shape[:2] == size[:2] utils.cache.verify_coords(yx_min, yx_max, size[:2]) except configparser.NoOptionError: pass assert len(yx_min) == len(cls) assert yx_min.shape == yx_max.shape assert len(yx_min.shape) == 2 and yx_min.shape[-1] == 2 data.append( dict(path=path, yx_min=yx_min, yx_max=yx_max, cls=cls, difficult=difficult)) logging.warning('%d of %d images are saved' % (len(data), len(_imgs))) return data
def main(input_json: str, image_dir: str, coco_output_dir: Optional[str], tfrecords_output_dir: Optional[str], detection_output: str, detection_input: Optional[str], split_by: str, exclude_categories: List[str], detection_threshold: float, padding_factor: float, test_fraction: float, ims_per_record: int) -> None: """ Args: input_json: str, path to JSON file with COCO-style dataset annotations image_dir: str, path to root folder of images coco_output_dir: str, path to output directory for a dataset in COCO format tfrecords_output_dir: str, path to output directory for a dataset in TFRecords format detection_output: str, path to pickle file for saving detections detection_input: str, path to pickle file of existing detections generated by this script, used to continue a partially processed dataset split_by: str, key in image-level annotations that specifies the splitting criteria exclude_categories: list of str, names of categories to ignore during detection detection_threshold: float, in [0, 1] padding_factor: float, padding around detected objects when cropping test_fraction: float, in [0, 1] ims_per_record: int """ graph = load_frozen_graph(args.frozen_graph) # Load COCO style annotations from the input dataset coco = pycocotools.coco.COCO(input_json) # Get all categories, their names, and create updated ID for the json file categories = coco.loadCats(coco.getCatIds()) cat_id_to_names = {cat['id']: cat['name'] for cat in categories} cat_id_to_new_id = { old_key: idx for idx, old_key in enumerate(cat_id_to_names.keys()) } print('All categories:', list(cat_id_to_names.values())) for ignore_cat in exclude_categories: if ignore_cat not in cat_id_to_names.values(): raise ValueError(f'Category {ignore_cat} does not exist in dataset') # Prepare the coco-style json files train_json = dict(images=[], categories=[], annotations=[]) test_json = dict(images=[], categories=[], annotations=[]) for old_cat_id in cat_id_to_names.keys(): train_json['categories'].append(dict( id=cat_id_to_new_id[old_cat_id], name=cat_id_to_names[old_cat_id], supercategory='entity')) test_json['categories'] = train_json['categories'] # Split the dataset by locations random.seed(0) print('Example of the annotation of a single image:') print(list(coco.imgs.items())[0]) print('The corresponding category annoation:') print(coco.imgToAnns[list(coco.imgs.items())[0][0]]) locations = sorted(set(ann[split_by] for ann in coco.imgs.values())) test_locations = sorted( random.sample(locations, max(1, int(test_fraction * len(locations))))) train_locations = sorted(set(locations) - set(test_locations)) print('{} locations in total, {} for training, {} for testing'.format( len(locations), len(train_locations), len(test_locations))) print('Training uses locations ', train_locations) print('Testing uses locations ', test_locations) # Load detections if detection_input is not None: print(f'Loading existing detections from {detection_input}') with open(detection_input, 'rb') as f: detections = pickle.load(f) else: detections = dict() train_tfr_writer = None test_tfr_writer = None if tfrecords_output_dir is not None: train_tfr_writer = TFRecordsWriter( os.path.join(tfrecords_output_dir, 'train-{:05d}'), ims_per_record) test_tfr_writer = TFRecordsWriter( os.path.join(tfrecords_output_dir, 'test-{:05d}'), ims_per_record) with graph.as_default(): with tf.Session() as sess: run_detection( sess, coco, cat_id_to_names, cat_id_to_new_id, detections, train_locations, train_json, test_json, train_tfr_writer, test_tfr_writer, image_dir, coco_output_dir, split_by, exclude_categories, detection_threshold, padding_factor) if tfrecords_output_dir is not None: train_tfr_writer.close() test_tfr_writer.close() label_map = [ 'item {{name: "{}" id: {}}}\n'.format(cat['name'], cat['id']) for cat in train_json['categories']] pbtxt_path = os.path.join(tfrecords_output_dir, 'label_map.pbtxt') with open(pbtxt_path, 'w') as f: f.write(''.join(label_map)) if coco_output_dir is not None: # Write out COCO-style json files to the output directory with open(os.path.join(coco_output_dir, 'train.json'), 'wt') as fi: json.dump(train_json, fi) with open(os.path.join(coco_output_dir, 'test.json'), 'wt') as fi: json.dump(test_json, fi) # Write detections to file with pickle with open(detection_output, 'wb') as f: pickle.dump(detections, f, pickle.HIGHEST_PROTOCOL)
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, class_map=None, return_coco=False, auto_download=False): """Load a subset of the COCO dataset. dataset_dir: The root directory of the COCO dataset. subset: What to load (train, val, minival, valminusminival) year: What dataset year to load (2014, 2017) as a string, not an integer class_ids: If provided, only loads images that have the given classes. class_map: TODO: Not implemented yet. Supports maping classes from different datasets to the same class ID. return_coco: If True, returns the COCO object. auto_download: Automatically download and unzip MS-COCO images and annotations """ if auto_download is True: self.auto_download(dataset_dir, subset, year) coco = COCO("{}/annotations/instances_{}{}.json".format( dataset_dir, subset, year)) if subset == "minival" or subset == "valminusminival": subset = "val" image_dir = "{}/{}{}".format(dataset_dir, subset, year) # Load all classes or a subset? if not class_ids: # All classes class_ids = sorted(coco.getCatIds()) # All images or a subset? if class_ids: image_ids = [] for id in class_ids: image_ids.extend(list(coco.getImgIds(catIds=[id]))) # Remove duplicates image_ids = list(set(image_ids)) else: # All images image_ids = list(coco.imgs.keys()) # Add classes for i in class_ids: self.add_class("coco", i, coco.loadCats(i)[0]["name"]) # Add images for i in image_ids: self.add_image("coco", image_id=i, path=os.path.join(image_dir, coco.imgs[i]['file_name']), width=coco.imgs[i]["width"], height=coco.imgs[i]["height"], annotations=coco.loadAnns( coco.getAnnIds(imgIds=[i], catIds=class_ids, iscrowd=None))) if return_coco: return coco
from pycocotools import coco import matplotlib.pyplot as plt import skimage.io as io import time time1 = time.time() annFile = '/media/zhuzhu/ec114170-f406-444f-bee7-a3dc0a86cfa2/dataset/coco/annotations/person_keypoints_val2017.json' dataDir = '/media/zhuzhu/ec114170-f406-444f-bee7-a3dc0a86cfa2/dataset/coco/images/val2017' saveDir = '/media/zhuzhu/ec114170-f406-444f-bee7-a3dc0a86cfa2/coco_ground/val2017' coco = coco.COCO(annFile) catIds = coco.getCatIds(catNms=['person']) imgIds = coco.getImgIds(catIds=catIds) for idx in imgIds: img = coco.loadImgs(ids=idx)[0] annIds = coco.getAnnIds(imgIds=idx, catIds=catIds) anns = coco.loadAnns(ids=annIds) plt.figure(idx) I = io.imread('%s/%s' % (dataDir, img['file_name'])) plt.imshow(I) plt.axis('off') coco.showAnns(anns=anns) plt.savefig('%s/%s' % (saveDir, img['file_name'])) plt.close() time2 = time.time() print('spent t = %.2f min' % ((time2 - time1) / 60))
] num_classes = 3 _valid_ids = [3, 6, 8] _classes = { ind + 1: cat_id for ind, cat_id in enumerate(_valid_ids) } _to_order = {cat_id: ind for ind, cat_id in enumerate(_valid_ids)} coco = coco.COCO(ANN_PATH) CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \ for i in range(num_classes)] COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \ for _ in range(num_classes)] # self.images = self.coco.getImgIds() catIds = coco.getCatIds(catNms=['car','bus','truck']) img_ids = coco.getImgIds(catIds=catIds) num_samples = len(img_ids) print(num_samples) save_dir='/kaggle/working/cnn/exp/ctdet/coco_dla_test/' dets =coco.loadRes('{}/results.json'.format(save_dir)) imgs=[] for i, img_id in enumerate(img_ids): print(img_id) img_info = coco.loadImgs(ids=[img_id])[0] img_path = IMG_PATH + img_info['file_name'] img = cv2.imread(img_path) gt_ids = coco.getAnnIds(imgIds=[img_id],catIds=catIds) gts = coco.loadAnns(gt_ids) gt_img = img.copy() pred_img = img.copy()