def _load_trainvalsplit(self, ): assert self._split in ['trainval2014'] annotation_dir = os.path.join(self._data_dir, 'annotations') minival_path = os.path.join(annotation_dir, 'instances_minival2014.json') minival2014_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0' assert os.path.exists( minival_path), 'need to download %s minival split to %s' % ( minival2014_url, minival_path) import ujson as json with open(minival_path, 'r') as f: self._minival = json.load(f) annFile = os.path.join(annotation_dir, 'instances_train2014.json') coco_train = COCO(annFile) annFile = os.path.join(annotation_dir, 'instances_val2014.json') coco_val = COCO(annFile) imgs1 = [(img_id, coco_train.imgs[img_id], 0) for img_id in coco_train.imgs if img_id != 320612] imgs2 = [(img_id, coco_val.imgs[img_id], 1) for img_id in coco_val.imgs if not self._is_in_minival(img_id) and img_id != 320612] imgs = imgs1 + imgs2 self._data_size = len(imgs) self._imgs = imgs self._cocos = (coco_train, coco_val) self.classes = [u'background'] + [ cls['name'] for cls in coco_train.loadCats(coco_train.getCatIds()) ] return
def _load_trainvalsplit(self, ): assert self._split in ['trainval2014'] annotation_dir = os.path.join(self._data_dir, 'annotations') annFile = os.path.join(annotation_dir, 'instances_trainval2014.json') coco_train = COCO(annFile) imgs = [(img_id, coco_train.imgs[img_id], 0) for img_id in coco_train.imgs if img_id != 320612] self._data_size = len(imgs) self._imgs = imgs self._cocos = (coco_train, ) self.classes = [u'background'] + [ cls['name'] for cls in coco_train.loadCats(coco_train.getCatIds()) ] return
def _load(self, ): assert self._split in ['train2014', 'val2014', 'minival2014'] annFile = os.path.join(self._data_dir, 'annotations', 'instances_%s.json' % (self._split)) coco = COCO(annFile) # imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] imgs = [] for img_id in coco.imgs: if img_id == 320612: continue imgs.append((img_id, coco.imgs[img_id], 0)) self._data_size = len(imgs) self._imgs = imgs self._cocos = (coco, ) self.classes = [u'background'] + [ cls['name'] for cls in coco.loadCats(coco.getCatIds()) ] return
def _add_to_tfrecord_trainvalsplit(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['trainval2014', 'minival2014'] # NOTE: this instances_minival2014.json file cannot be processed by official COCO API, # so just use its id list, ['images']['id'] minival_path = os.path.join(annotation_dir, 'instances_minival2014.json') minival2014_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0' assert os.path.exists( minival_path), 'need to download %s minival split to %s' % ( minival2014_url, minival_path) import ujson as json with open(minival_path, 'r') as f: minival = json.load(f) def is_in_minival(img_id, minival): for img in minival['images']: if (img['id']) == (img_id): return True return False annFile = os.path.join(annotation_dir, 'instances_train2014.json') coco_train = COCO(annFile) annFile = os.path.join(annotation_dir, 'instances_val2014.json') coco_val = COCO(annFile) cats = coco_train.loadCats(coco_train.getCatIds()) # imgs = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] + \ # [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs1 = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] imgs2 = [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs = imgs1 + imgs2 num_of_train = len(coco_train.imgs) num_of_all = len(imgs) num_per_shard = 2500 num_shards = int( np.ceil((len(imgs) + 0.0 - len(minival['images'])) / num_per_shard)) if split_name == 'minival2014': num_shards = int( np.ceil((len(minival['images']) + 0.0) / num_per_shard)) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: cnt = 0 shard_id = -1 for i in range(len(imgs)): img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if str(img_id) == '320612': continue is_minival = is_in_minival(img_id, minival) if split_name == 'trainval2014' and is_minival: continue if split_name == 'minival2014' and not is_minival: continue cnt += 1 if cnt % num_per_shard == 1: shard_id += 1 record_filename = _get_dataset_filename( record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions( TFRecordCompressionType.ZLIB) tfrecord_writer = tf.python_io.TFRecordWriter( record_filename, options=options) if cnt % 100 == 1: print('%d (image_id: %d) of %d, split: %s, shard_id: %d' % (i, img_id, len(imgs), split_name, shard_id)) # process anns height, width = imgs[i][1]['height'], imgs[i][1]['width'] coco = coco_train if i < num_of_train else coco_val gt_boxes, masks, mask = _get_coco_masks( coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str(img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw(img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) if cnt % num_per_shard == 0 or i == len(imgs) - 1: tfrecord_writer.close()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in [ 'train2014', 'val2014', 'valminusminival2014', 'minival2014' ] annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name)) coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) print('%s has %d images' % (split_name, len(coco.imgs))) imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] num_shards = int(len(imgs) / 2500) num_per_shard = int(math.ceil(len(imgs) / float(num_shards))) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: for shard_id in range(num_shards): record_filename = _get_dataset_filename( record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions( TFRecordCompressionType.ZLIB) with tf.python_io.TFRecordWriter( record_filename, options=options) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(imgs)) for i in range(start_ndx, end_ndx): if i % 50 == 0: sys.stdout.write( '\r>> Converting image %d/%d shard %d\n' % (i + 1, len(imgs), shard_id)) sys.stdout.flush() # image id and path img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if FLAGS.vis: im = Image.open(img_name) im.save('img.png') plt.figure(0) plt.axis('off') plt.imshow(im) # plt.show() # plt.close() # jump over the damaged images if str(img_id) == '320612': continue # process anns height, width = imgs[i][1]['height'], imgs[i][1][ 'width'] gt_boxes, masks, mask = _get_coco_masks( coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str( img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['train2014', 'val2014'] annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name)) coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) print('%s has %d images' % (split_name, len(coco.imgs))) imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] num_shards = 40 if split_name == 'train2014' else 20 num_per_shard = int(math.ceil(len(imgs) / float(num_shards))) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: for shard_id in range(num_shards): record_filename = _get_dataset_filename( record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions( TFRecordCompressionType.ZLIB) with tf.python_io.TFRecordWriter( record_filename, options=options) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(imgs)) for i in range(start_ndx, end_ndx): sys.stdout.write( '\r>> Converting image %d/%d shard %d\n' % (i + 1, len(imgs), shard_id)) sys.stdout.flush() # image id and path img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] img_name = os.path.join(image_dir, split_name, img_name) if FLAGS.vis: im = Image.open(img_name) im.save('img.png') plt.figure(0) plt.axis('off') plt.imshow(im) # plt.show() # plt.close() # jump over the damaged images if split_name == 'val2014' and str(img_id) == '320612': continue # process anns h, w = imgs[i][1]['height'], imgs[i][1]['width'] classes, bboxes, masks, mask = _get_coco_masks( coco, img_id, h, w) assert classes.shape[0] == bboxes.shape[0] == masks.shape[0], \ 'Check number of instances for %s' % (img_name) # this encode matrix to png format string buff label_data = sess.run(encoded_image, feed_dict={ mask_placeholder: np.expand_dims(mask, axis=2) }) # read image assert os.path.exists( img_name), '%s dont exists' % img_name image_data = tf.gfile.FastGFile(img_name, 'r').read() height, width, depth = image_reader.read_jpeg_dims( sess, image_data) # to tf-record example = _to_tfexample_v2(image_data, 'jpg', label_data, 'png', height, width, classes.shape[0], classes.tolist(), bboxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush()
def _add_to_tfrecord_trainvalsplit(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['trainval2014', 'minival2014'] # NOTE: this instances_minival2014.json file cannot be processed by official COCO API, # so just use its id list, ['images']['id'] minival_path = os.path.join(annotation_dir, 'instances_minival2014.json') minival2014_url='https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0' assert os.path.exists(minival_path), 'need to download %s minival split to %s' %(minival2014_url, minival_path) import ujson as json with open(minival_path, 'r') as f: minival = json.load(f) def is_in_minival(img_id, minival): for img in minival['images']: if (img['id']) == (img_id): return True return False annFile = os.path.join(annotation_dir, 'instances_train2014.json') coco_train = COCO(annFile) annFile = os.path.join(annotation_dir, 'instances_val2014.json') coco_val = COCO(annFile) cats = coco_train.loadCats(coco_train.getCatIds()) # imgs = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] + \ # [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs1 = [(img_id, coco_train.imgs[img_id]) for img_id in coco_train.imgs] imgs2 = [(img_id, coco_val.imgs[img_id]) for img_id in coco_val.imgs] imgs = imgs1 + imgs2 num_of_train = len(coco_train.imgs) num_of_all = len(imgs) num_per_shard = 2500 num_shards = int(np.ceil((len(imgs) + 0.0 - len(minival['images'])) / num_per_shard)) if split_name == 'minival2014': num_shards = int(np.ceil((len(minival['images']) + 0.0) / num_per_shard)) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: cnt = 0 shard_id = -1 for i in range(len(imgs)): img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if str(img_id) == '320612': continue is_minival = is_in_minival(img_id, minival) if split_name == 'trainval2014' and is_minival: continue if split_name == 'minival2014' and not is_minival: continue cnt += 1 if cnt % num_per_shard == 1: shard_id += 1 record_filename = _get_dataset_filename(record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) tfrecord_writer = tf.python_io.TFRecordWriter(record_filename, options=options) if cnt % 100 == 1: print ('%d (image_id: %d) of %d, split: %s, shard_id: %d' %(i, img_id, len(imgs), split_name, shard_id)) # process anns height, width = imgs[i][1]['height'], imgs[i][1]['width'] coco = coco_train if i < num_of_train else coco_val gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print ('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str(img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) if cnt % num_per_shard == 0 or i == len(imgs)-1: tfrecord_writer.close()
def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): """Loads image files and writes files to a TFRecord. Note: masks and bboxes will lose shape info after converting to string. """ assert split_name in ['train2014', 'val2014', 'valminusminival2014', 'minival2014'] annFile = os.path.join(annotation_dir, 'instances_%s.json' % (split_name)) coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) print ('%s has %d images' %(split_name, len(coco.imgs))) imgs = [(img_id, coco.imgs[img_id]) for img_id in coco.imgs] num_shards = int(len(imgs) / 2500) num_per_shard = int(math.ceil(len(imgs) / float(num_shards))) with tf.Graph().as_default(), tf.device('/cpu:0'): image_reader = ImageReader() # encode mask to png_string mask_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(mask_placeholder) with tf.Session('') as sess: for shard_id in range(num_shards): record_filename = _get_dataset_filename(record_dir, split_name, shard_id, num_shards) options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(imgs)) for i in range(start_ndx, end_ndx): if i % 50 == 0: sys.stdout.write('\r>> Converting image %d/%d shard %d\n' % ( i + 1, len(imgs), shard_id)) sys.stdout.flush() # image id and path img_id = imgs[i][0] img_name = imgs[i][1]['file_name'] split = img_name.split('_')[1] img_name = os.path.join(image_dir, split, img_name) if FLAGS.vis: im = Image.open(img_name) im.save('img.png') plt.figure(0) plt.axis('off') plt.imshow(im) # plt.show() # plt.close() # jump over the damaged images if str(img_id) == '320612': continue # process anns height, width = imgs[i][1]['height'], imgs[i][1]['width'] gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name) # read image as RGB numpy img = np.array(Image.open(img_name)) if img.size == height * width: print ('Gray Image %s' % str(img_id)) im = np.empty((height, width, 3), dtype=np.uint8) im[:, :, :] = img[:, :, np.newaxis] img = im img = img.astype(np.uint8) assert img.size == width * height * 3, '%s' % str(img_id) img_raw = img.tostring() mask_raw = mask.tostring() example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) tfrecord_writer.write(example.SerializeToString()) sys.stdout.write('\n') sys.stdout.flush()
import skimage.io as io import pylab import json pylab.rcParams['figure.figsize'] = (10.0, 8.0) annType = ['segm', 'bbox', 'keypoints'] annType = annType[0] #specify type here prefix = 'person_keypoints' if annType == 'keypoints' else 'instances' print 'Running demo for *%s* results.' % (annType) #initialize COCO ground truth api dataDir = 'data/coco/' dataType = 'train2014' #val2014 annFile = '%s/annotations/%s_%s.json' % (dataDir, prefix, dataType) cocoGt = COCO(annFile) #initialize COCO detections api # resFile='%s/results/%s_%s_fake%s100_results.json' # resFile = resFile%(dataDir, prefix, dataType, annType) resFile = 'output/mask_rcnn/results.json' cocoDt = cocoGt.loadRes(resFile) with open(resFile) as results: res = json.load(results) imgIds = [] for inst in res: imgIds.append(inst['image_id'])
def mat2coco_annots(annot_path): for split, annot_path in zip(['train', 'val'], [ 'data/citypersons/annotations/anno_train.mat', 'data/citypersons/annotations/anno_val.mat' ]): annots = sio.loadmat(annot_path) annots = annots['anno_' + split + '_aligned'].reshape([-1]) coco = COCO() coco.dataset['images'] = [] coco.dataset['categories'] = [] coco.dataset['annotations'] = [] coco.dataset['categories'].append({ 'name': 'pedestrian', 'id': 1, 'supercategory': 'person', }) coco.dataset['categories'].append({ 'name': 'rider', 'id': 2, 'supercategory': 'person', }) coco.dataset['categories'].append({ 'name': 'sitting_person', 'id': 3, 'supercategory': 'person', }) coco.dataset['categories'].append({ 'name': 'other', 'id': 4, 'supercategory': 'person', }) annid = 0 for i, ann in enumerate(annots): ann = ann.reshape([-1]) city_name, image_name, bbs = ann[0][0][0], ann[0][1][0], ann[0][2] coco.dataset['images'].append({ 'id': i, 'file_name': image_name, 'height': 1024, 'width': 2048, 'url': 'citypersons', }) gt_classes, gt_boxes = bbs[:, 0], bbs[:, 1:5] areas = (gt_boxes[:, 2] + 1) * (gt_boxes[:, 3] + 1) heights = gt_boxes[:, 3].copy() gt_boxes[:, 2:4] += gt_boxes[:, 0:2] for j in range(gt_classes.size): bb = gt_boxes[j, :] x1, y1, x2, y2 = [bb[0], bb[1], bb[2], bb[3]] coco.dataset['annotations'].append({ 'image_id': i, 'bbox': gt_boxes[j, :], 'category_id': gt_classes[j], 'id': annid + 1, 'iscrowd': 0, 'area': areas[j], 'segmentation': [[x1, y1, x1, y2, x2, y2, x2, y1]], 'height': heights[j], }) coco.createIndex() return coco
gt_classes[j], 'id': annid + 1, 'iscrowd': 0, 'area': areas[j], 'segmentation': [[x1, y1, x1, y2, x2, y2, x2, y1]], 'height': heights[j], }) coco.createIndex() return coco if __name__ == '__main__': d = json.load(open(annot_path, 'r')) print(d.keys()) print(d['annotations'][0].keys(), d['annotations'][0]) print(d['categories'][0].keys(), d['categories'][0], len(d['categories'])) print(d['images'][0], d['images'][0]['id']) annot_path_trainval = './data/coco/annotations/instances_trainval_minus2014.json' if not os.path.exists(annot_path_trainval): merge_trainval(annot_path_trainval) with open(annot_path_new, 'w') as f: json.dump(d, f, indent=1, separators=(',', ': ')) train_annots = COCO(annot_path_trainval)