def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem('no_label/q/1', image=Image(path='q/1.JPEG', data=np.zeros( (4, 3, 3)))), DatasetItem('a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), annotations=[ Bbox(0, 2, 4, 2, label=0), Points([ 4.23, 4.32, 5.34, 4.45, 3.54, 3.56, 4.52, 3.51, 4.78, 3.34 ], label=0), ]), ], categories=['a']) with TestDir() as test_dir: VggFace2Converter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def _image_converter(image): if callable(image) or isinstance(image, np.ndarray): image = Image(data=image) elif isinstance(image, str): image = Image(path=image) assert image is None or isinstance(image, Image), type(image) return image
def test_ctors(self): with TestDir() as test_dir: path = osp.join(test_dir, 'path.png') image = np.ones([2, 4, 3]) save_image(path, image) for args in [ { 'data': image }, { 'data': image, 'path': path }, { 'data': image, 'path': path, 'size': (2, 4) }, { 'data': image, 'ext': 'png' }, { 'data': image, 'ext': 'png', 'size': (2, 4) }, { 'data': lambda p: image }, { 'data': lambda p: image, 'path': 'somepath' }, { 'data': lambda p: image, 'ext': 'jpg' }, { 'path': path }, { 'path': path, 'data': load_image }, { 'path': path, 'data': load_image, 'size': (2, 4) }, { 'path': path, 'size': (2, 4) }, ]: with self.subTest(**args): img = Image(**args) self.assertTrue(img.has_data) np.testing.assert_array_equal(img.data, image) self.assertEqual(img.size, tuple(image.shape[:2])) with self.subTest(): img = Image(size=(2, 4)) self.assertEqual(img.size, (2, 4))
def __iter__(self): return iter([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ])
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem('1', image=Image(path='1.JPEG', data=np.zeros((4, 3, 3))), annotations=[ Bbox(0, 4, 4, 8, label=0, attributes={ 'occluded': True, 'visibility': 0.0, 'ignored': False, }), ]), DatasetItem( '2', image=Image(path='2.bmp', data=np.zeros((3, 4, 3))), ), ], categories=['a']) with TestDir() as test_dir: self._test_save_and_load(expected, partial(MotSeqGtConverter.convert, save_images=True), test_dir, require_images=True)
def test_ctor_errors(self): with self.subTest('no data specified'): with self.assertRaisesRegex(Exception, "can not be empty"): Image(ext='jpg') with self.subTest('either path or ext'): with self.assertRaisesRegex(Exception, "both 'path' and 'ext'"): Image(path='somepath', ext='someext')
def test_can_keep_image_ext_on_resize(self): expected = Image(np.ones((8, 4)), ext='jpg') dataset = Dataset.from_iterable([ DatasetItem(id=1, image=Image(np.ones((4, 2)), ext='jpg')) ]) dataset.transform('resize', width=4, height=8) actual = dataset.get('1').image self.assertEqual(actual.ext, expected.ext) self.assertTrue(np.array_equal(actual.data, expected.data))
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='a/1', image=Image(path='a/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='b/c/d/2', image=Image(path='b/c/d/2.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: self._test_save_and_load(dataset, partial(LabelMeConverter.convert, save_images=True), test_dir, require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem('q/1', subset='train', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem('a/b/c/2', subset='valid', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: YoloConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'yolo') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def __iter__(self): return iter([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image( path='a/b/c/2.bmp', data=np.ones((1, 5, 3)) ), annotations=[ Mask(image=np.array([[1, 0, 0, 1, 0]]), label=0, id=0, attributes={'is_crowd': True}), Mask(image=np.array([[0, 1, 1, 0, 1]]), label=1, id=0, attributes={'is_crowd': True}), ]), ])
def _load_items(self, parsed): items = [] for item_desc in parsed['items']: item_id = item_desc['id'] image = None image_info = item_desc.get('image') if image_info: image_filename = image_info.get('path') or \ item_id + DatumaroPath.IMAGE_EXT image_path = osp.join(self._images_dir, self._subset, image_filename) if not osp.isfile(image_path): # backward compatibility old_image_path = osp.join(self._images_dir, image_filename) if osp.isfile(old_image_path): image_path = old_image_path image = Image(path=image_path, size=image_info.get('size')) point_cloud = None pcd_info = item_desc.get('point_cloud') if pcd_info: pcd_path = pcd_info.get('path') point_cloud = osp.join(self._pcd_dir, self._subset, pcd_path) related_images = None ri_info = item_desc.get('related_images') if ri_info: related_images = [ Image(size=ri.get('size'), path=osp.join(self._related_images_dir, self._subset, item_id, ri.get('path'))) for ri in ri_info ] annotations = self._load_annotations(item_desc) item = DatasetItem(id=item_id, subset=self._subset, annotations=annotations, image=image, point_cloud=point_cloud, related_images=related_images, attributes=item_desc.get('attr')) items.append(item) return items
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3))), attributes={'frame': 1}), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), attributes={'frame': 2}), ]) with TestDir() as test_dir: self._test_save_and_load( expected, partial(DatumaroConverter.convert, save_images=True), test_dir)
def __iter__(self): return iter([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.ones((1, 5, 3))), annotations=[ Mask(np.array([[0, 0, 0, 1, 0]]), label=self._label('a')), Mask(np.array([[0, 1, 1, 0, 0]]), label=self._label('b')), ]) ])
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='no_label/a', image=Image(path='a.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='no_label/b', image=Image(path='b.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: ImagenetConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def test_inplace_save_writes_only_updated_data(self): expected = Dataset.from_iterable([ DatasetItem(1, subset='train', image=np.ones((2, 4, 3))), DatasetItem(2, subset='train', image=np.ones((3, 2, 3))), ], categories=[]) with TestDir() as path: dataset = Dataset.from_iterable([ DatasetItem(1, subset='train', image=np.ones((2, 4, 3))), DatasetItem(2, subset='train', image=Image(path='2.jpg', size=(3, 2))), DatasetItem(3, subset='valid', image=np.ones((2, 2, 3))), ], categories=[]) dataset.export(path, 'yolo', save_images=True) dataset.put(DatasetItem(2, subset='train', image=np.ones((3, 2, 3)))) dataset.remove(3, 'valid') dataset.save(save_images=True) self.assertEqual({'1.txt', '2.txt', '1.jpg', '2.jpg'}, set(os.listdir(osp.join(path, 'obj_train_data')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'obj_valid_data')))) compare_datasets(self, expected, Dataset.import_from(path, 'yolo'), require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem('q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3))), attributes={'frame': 1}), DatasetItem('a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), attributes={'frame': 2}), ], categories=[]) with TestDir() as test_dir: self._test_save_and_load(expected, partial(CvatConverter.convert, save_images=True), test_dir, require_images=True) self.assertTrue(osp.isfile( osp.join(test_dir, 'images', 'q', '1.JPEG'))) self.assertTrue(osp.isfile( osp.join(test_dir, 'images', 'a', 'b', 'c', '2.bmp')))
def test_inplace_save_writes_only_updated_data(self): expected = Dataset.from_iterable([ DatasetItem(1, subset='train', image=np.ones((2, 4, 3))), DatasetItem(2, subset='train', image=np.ones((3, 2, 3))), ], categories=[]) with TestDir() as path: dataset = Dataset.from_iterable([ DatasetItem(1, subset='train', image=np.ones((2, 4, 3))), DatasetItem( 2, subset='train', image=Image(path='2.jpg', size=(3, 2))), DatasetItem(3, subset='valid', image=np.ones((2, 2, 3))), ], categories=[]) dataset.export(path, 'wider_face', save_images=True) dataset.put( DatasetItem(2, subset='train', image=np.ones((3, 2, 3)))) dataset.remove(3, 'valid') dataset.save(save_images=True) self.assertEqual({'1.jpg', '2.jpg'}, set( os.listdir( osp.join(path, 'WIDER_train', 'images', 'no_label')))) self.assertEqual({'wider_face_train_bbx_gt.txt'}, set(os.listdir(osp.join(path, 'wider_face_split')))) compare_datasets(self, expected, Dataset.import_from(path, 'wider_face'), require_images=True, ignored_attrs=IGNORE_ALL)
def _load_polygons(self, items): polygons_dir = osp.join(self._annotations_dir, MapillaryVistasPath.POLYGON_DIR) for item_path in glob.glob(osp.join(polygons_dir, '**', '*.json'), recursive=True): item_id = osp.splitext(osp.relpath(item_path, polygons_dir))[0] item = items.get(item_id) item_info = {} item_info = parse_json_file(item_path) image_size = self._get_image_size(item_info) if image_size and item.has_image: item.image = Image(path=item.image.path, size=image_size) polygons = item_info['objects'] annotations = [] for polygon in polygons: label = polygon['label'] label_id = self._categories[AnnotationType.label].find(label)[0] if label_id is None: label_id = self._categories[AnnotationType.label].add(label) points = [coord for point in polygon['polygon'] for coord in point] annotations.append(Polygon(label=label_id, points=points)) if item is None: items[item_id] = DatasetItem(id=item_id, subset=self._subset, annotations=annotations) else: item.annotations.extend(annotations)
def test_can_save_and_load_with_arbitrary_extensions(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='subset/1', image=Image(data=np.ones((10, 10, 3)), path='subset/1.png')), DatasetItem(id='2', image=Image(data=np.ones((4, 5, 3)), path='2.jpg')), ]) with TestDir() as test_dir: save_image(osp.join(test_dir, '2.jpg'), source_dataset.get('2').image.data) save_image(osp.join(test_dir, 'subset', '1.png'), source_dataset.get('subset/1').image.data, create_dir=True) self._test_can_save_and_load(source_dataset, test_dir)
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros( (4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ]) with TestDir() as test_dir: check_save_and_load(self, dataset, ImageDirConverter.convert, test_dir, importer='image_dir', require_images=True)
def test_inplace_save_writes_only_updated_data_with_transforms(self): with TestDir() as path: expected = Dataset.from_iterable([ DatasetItem(2, subset='test'), DatasetItem(3, subset='train', image=np.ones((2, 2, 3))), DatasetItem(4, subset='train', image=np.ones((2, 3, 3))), DatasetItem(5, subset='test', point_cloud=osp.join(path, 'point_clouds', 'test', '5.pcd'), related_images=[ Image(data=np.ones((3, 4, 3)), path=osp.join(path, 'test', '5', 'image_0.jpg')), osp.join(path, 'test', '5', 'a', '5.png'), ]), ]) dataset = Dataset.from_iterable([ DatasetItem(1, subset='a'), DatasetItem(2, subset='b'), DatasetItem(3, subset='c', image=np.ones((2, 2, 3))), DatasetItem(4, subset='d', image=np.ones((2, 3, 3))), DatasetItem(5, subset='e', point_cloud='5.pcd', related_images=[ np.ones((3, 4, 3)), 'a/5.png', ]), ]) dataset.save(path, save_images=True) dataset.filter('/item[id >= 2]') dataset.transform('random_split', splits=(('train', 0.5), ('test', 0.5)), seed=42) dataset.save(save_images=True) self.assertEqual( {'images', 'annotations', 'point_clouds', 'related_images'}, set(os.listdir(path))) self.assertEqual({'train.json', 'test.json'}, set(os.listdir(osp.join(path, 'annotations')))) self.assertEqual({'3.jpg', '4.jpg'}, set(os.listdir(osp.join(path, 'images', 'train')))) self.assertEqual({'train', 'c', 'd'}, set(os.listdir(osp.join(path, 'images')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'images', 'c')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'images', 'd')))) self.assertEqual( {'image_0.jpg'}, set(os.listdir(osp.join(path, 'related_images', 'test', '5')))) compare_datasets_strict(self, expected, Dataset.load(path))
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='a/1', image=Image(path='a/1.JPEG', data=np.zeros((4, 3, 3))), annotations=[Label(0)]), DatasetItem(id='b/c/d/2', image=Image(path='b/c/d/2.bmp', data=np.zeros((3, 4, 3))), annotations=[Label(1)]), ], categories=['name0', 'name1']) with TestDir() as test_dir: LfwConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'lfw') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='c/0001_c1s1_000000_00', image=Image( path='c/0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))), attributes={'camera_id': 0, 'person_id': '0001', 'track_id': 1, 'frame_id': 0, 'bbox_id': 0, 'query': False} ), DatasetItem(id='a/b/0002_c2s2_000001_00', image=Image( path='a/b/0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))), attributes={'camera_id': 1, 'person_id': '0002', 'track_id': 2, 'frame_id': 1, 'bbox_id': 0, 'query': False} ), ]) with TestDir() as test_dir: Market1501Converter.convert(expected, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'market1501') compare_datasets(self, expected, parsed_dataset, require_images=True)
def _load_instances_items(self): items = {} instances_dir = osp.join(self._annotations_dir, MapillaryVistasPath.INSTANCES_DIR) for instance_path in find_images(instances_dir, recursive=True): item_id = osp.splitext(osp.relpath(instance_path, instances_dir))[0] mask = load_image(instance_path, dtype=np.uint32) annotations = [] for uval in np.unique(mask): label_id, instance_id = uval >> 8, uval & 255 annotations.append( Mask( image=self._lazy_extract_mask(mask, uval), label=label_id, id=instance_id ) ) items[item_id] = DatasetItem(id=item_id, subset=self._subset, annotations=annotations) class_dir = osp.join(self._annotations_dir, MapillaryVistasPath.CLASS_DIR) for class_path in find_images(class_dir, recursive=True): item_id = osp.splitext(osp.relpath(class_path, class_dir))[0] if item_id in items: continue from PIL import Image as PILImage class_mask = np.array(PILImage.open(class_path)) classes = np.unique(class_mask) annotations = [] for label_id in classes: annotations.append(Mask(label=label_id, image=self._lazy_extract_mask(class_mask, label_id)) ) items[item_id] = DatasetItem(id=item_id, subset=self._subset, annotations=annotations) for image_path in find_images(self._images_dir, recursive=True): item_id = osp.splitext(osp.relpath(image_path, self._images_dir))[0] image = Image(path=image_path) if item_id in items: items[item_id].image = image else: items[item_id] = DatasetItem(id=item_id, subset=self._subset, image=image) self._load_polygons(items) return items.values()
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem('q/1', image=Image( path='q/1.JPEG', data=np.zeros((4, 3, 3))), annotations=[ Mask(np.array([[0, 1, 0, 0, 0]]), label=0, attributes={'track_id': 1}), ] ), DatasetItem('a/b/c/2', image=Image( path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), annotations=[ Mask(np.array([[0, 1, 0, 0, 0]]), label=0, attributes={'track_id': 1}), ] ), ], categories=['a']) with TestDir() as test_dir: self._test_save_and_load(expected, partial(MotsPngConverter.convert, save_images=True), test_dir, require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros( (4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ]) for importer, converter in [ ('icdar_word_recognition', IcdarWordRecognitionConverter), ('icdar_text_localization', IcdarTextLocalizationConverter), ('icdar_text_segmentation', IcdarTextSegmentationConverter), ]: with self.subTest(subformat=converter), TestDir() as test_dir: self._test_save_and_load(expected, partial(converter.convert, save_images=True), test_dir, importer, require_images=True)
def _add_item(self, item_id, subset): image_path = self._image_paths_by_id.get(item_id) image = None if image_path is None: log.warning("Can't find image for item: %s. " "It should be in the '%s' directory" % (item_id, OpenImagesPath.IMAGES_DIR)) else: image = Image(path=image_path, size=self._image_meta.get(item_id)) item = DatasetItem(id=item_id, image=image, subset=subset) self._items.append(item) return item
def _load_items(self, path): anno_dict = parse_json_file(path) label_categories = self._categories[AnnotationType.label] tags = anno_dict.get('tags', []) for label in tags: label_name = label.get('name') label_idx = label_categories.find(label_name)[0] if label_idx is None: label_idx = label_categories.add(label_name) items = {} for id, asset in anno_dict.get('assets', {}).items(): item_id = osp.splitext(asset.get('asset', {}).get('name'))[0] annotations = [] for region in asset.get('regions', []): tags = region.get('tags', []) if not tags: bbox = region.get('boundingBox', {}) if bbox: annotations.append( Bbox(float(bbox['left']), float(bbox['top']), float(bbox['width']), float(bbox['height']), attributes={'id': region.get('id')})) for tag in region.get('tags', []): label_idx = label_categories.find(tag)[0] if label_idx is None: label_idx = label_categories.add(tag) bbox = region.get('boundingBox', {}) if bbox: annotations.append( Bbox(float(bbox['left']), float(bbox['top']), float(bbox['width']), float(bbox['height']), label=label_idx, attributes={'id': region.get('id')})) items[item_id] = DatasetItem( id=item_id, subset=self._subset, attributes={'id': id}, image=Image(path=osp.join(osp.dirname(path), asset.get('asset', {}).get('path'))), annotations=annotations) return items
def test_can_save_dataset_to_correct_dir_with_correct_filename(self): dataset = Dataset.from_iterable([ DatasetItem(id='dir/a', image=Image(path='dir/a.JPEG', data=np.zeros((4, 3, 3)))), ], categories=[]) with TestDir() as test_dir: self._test_save_and_load(dataset, partial(LabelMeConverter.convert, save_images=True), test_dir, require_images=True) xml_dirpath = osp.join(test_dir, 'default/dir') self.assertEqual(os.listdir(osp.join(test_dir, 'default')), ['dir']) self.assertEqual(set(os.listdir(xml_dirpath)), {'a.xml', 'a.JPEG'})
def test_can_save_and_load_with_pointcloud(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='test', point_cloud='1.pcd', related_images=[ Image(data=np.ones((5, 5, 3)), path='1/a.jpg'), Image(data=np.ones((5, 4, 3)), path='1/b.jpg'), Image(size=(5, 3), path='1/c.jpg'), '1/d.jpg', ], annotations=[ Cuboid3d([2, 2, 2], [1, 1, 1], [3, 3, 1], id=1, group=1, label=0, attributes={'x': True}) ]), ], categories=['label']) with TestDir() as test_dir: target_dataset = Dataset.from_iterable([ DatasetItem( id=1, subset='test', point_cloud=osp.join(test_dir, 'point_clouds', 'test', '1.pcd'), related_images=[ Image(data=np.ones((5, 5, 3)), path=osp.join(test_dir, 'related_images', 'test', '1', 'image_0.jpg')), Image(data=np.ones((5, 4, 3)), path=osp.join(test_dir, 'related_images', 'test', '1', 'image_1.jpg')), Image(size=(5, 3), path=osp.join(test_dir, 'related_images', 'test', '1', 'image_2.jpg')), osp.join(test_dir, 'related_images', 'test', '1', 'image_3.jpg'), ], annotations=[ Cuboid3d([2, 2, 2], [1, 1, 1], [3, 3, 1], id=1, group=1, label=0, attributes={'x': True}) ]), ], categories=['label']) self._test_save_and_load(source_dataset, partial(DatumaroConverter.convert, save_images=True), test_dir, target_dataset, compare=None, dimension=Dimensions.dim_3d)