def _image_validator(self, attribute, image): if callable(image) or isinstance(image, np.ndarray): image = Image(data=image) elif isinstance(image, str): image = Image(path=image) assert image is None or isinstance(image, Image) self.image = image
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3))), attributes={ 'camera_id': 1, 'person_id': 1, 'query': False }), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), attributes={ 'camera_id': 1, 'person_id': 2, 'query': True }), ]) with TestDir() as test_dir: Market1501Converter.convert(expected, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'market1501') compare_datasets(self, expected, parsed_dataset, require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem('q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3))), annotations=[ Mask(np.array([[0, 1, 0, 0, 0]]), label=0, attributes={'track_id': 1}), ]), DatasetItem('a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), annotations=[ Mask(np.array([[0, 1, 0, 0, 0]]), label=0, attributes={'track_id': 1}), ]), ], categories=['a']) with TestDir() as test_dir: self._test_save_and_load(expected, partial(MotsPngConverter.convert, save_images=True), test_dir, require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem('1', image=Image(path='1.JPEG', data=np.zeros((4, 3, 3))), annotations=[ Bbox(0, 4, 4, 8, label=0, attributes={ 'occluded': True, 'visibility': 0.0, 'ignored': False, }), ]), DatasetItem( '2', image=Image(path='2.bmp', data=np.zeros((3, 4, 3))), ), ], categories=['a']) with TestDir() as test_dir: self._test_save_and_load(expected, partial(MotSeqGtConverter.convert, save_images=True), test_dir, require_images=True)
def __init__(self, id=None, annotations=None, subset=None, path=None, image=None): assert id is not None self._id = str(id) if subset is None: subset = '' else: subset = str(subset) self._subset = subset if path is None: path = [] else: path = list(path) self._path = path if annotations is None: annotations = [] else: annotations = list(annotations) self._annotations = annotations if callable(image) or isinstance(image, np.ndarray): image = Image(data=image) elif isinstance(image, str): image = Image(path=image) assert image is None or isinstance(image, Image) self._image = image
def test_lazy_image_shape(self): data = np.ones((5, 6, 3)) image_lazy = Image(data=data, size=(2, 4)) image_eager = Image(data=data) self.assertEqual((2, 4), image_lazy.size) self.assertEqual((5, 6), image_eager.size)
def __iter__(self): return iter([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ])
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ]) with TestDir() as test_dir: test_save_and_load(self, dataset, ImageDirConverter.convert, test_dir, importer='image_dir', require_images=True)
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='a', image=Image(path='a.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='b', image=Image(path='b.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: ImagenetConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def __iter__(self): return iter([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.ones((1, 5, 3))), annotations=[ Mask(np.array([[0, 0, 0, 1, 0]]), label=self._label('a')), Mask(np.array([[0, 1, 1, 0, 0]]), label=self._label('b')), ]) ])
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3))), attributes={'frame': 1}), DatasetItem(id='a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), attributes={'frame': 2}), ]) with TestDir() as test_dir: self._test_save_and_load( expected, partial(DatumaroConverter.convert, save_images=True), test_dir)
def __iter__(self): anno_dir = osp.join(self._dataset_dir, VocPath.ANNOTATIONS_DIR) for item_id in self._items: log.debug("Reading item '%s'" % item_id) image = item_id + VocPath.IMAGE_EXT height, width = 0, 0 anns = [] ann_file = osp.join(anno_dir, item_id + '.xml') if osp.isfile(ann_file): root_elem = ElementTree.parse(ann_file) height = root_elem.find('size/height') if height is not None: height = int(height.text) width = root_elem.find('size/width') if width is not None: width = int(width.text) filename_elem = root_elem.find('filename') if filename_elem is not None: image = filename_elem.text anns = self._parse_annotations(root_elem) image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, image) if height and width: image = Image(path=image, size=(height, width)) yield DatasetItem(id=item_id, subset=self._subset, image=image, annotations=anns)
def _load_items(self, loader): items = OrderedDict() for img_id in loader.getImgIds(): image_info = loader.loadImgs(img_id)[0] image_path = osp.join(self._images_dir, image_info['file_name']) image_size = (image_info.get('height'), image_info.get('width')) if all(image_size): image_size = (int(image_size[0]), int(image_size[1])) else: image_size = None image = Image(path=image_path, size=image_size) anns = loader.getAnnIds(imgIds=img_id) anns = loader.loadAnns(anns) anns = sum((self._load_annotations(a, image_info) for a in anns), []) items[img_id] = DatasetItem(id=osp.splitext( image_info['file_name'])[0], subset=self._subset, image=image, annotations=anns, attributes={'id': img_id}) return items
def test_inplace_save_writes_only_updated_data(self): with TestDir() as path: # generate initial dataset dataset = Dataset.from_iterable([ DatasetItem(1, subset='train', image=np.ones((2, 4, 3))), DatasetItem( 2, subset='train', image=Image(path='2.jpg', size=(3, 2))), DatasetItem(3, subset='valid', image=np.ones((2, 2, 3))), ], categories=[]) dataset.export(path, 'yolo', save_images=True) os.unlink(osp.join(path, 'obj_train_data', '1.txt')) os.unlink(osp.join(path, 'obj_train_data', '2.txt')) os.unlink(osp.join(path, 'obj_valid_data', '3.txt')) self.assertFalse( osp.isfile(osp.join(path, 'obj_train_data', '2.jpg'))) self.assertTrue( osp.isfile(osp.join(path, 'obj_valid_data', '3.jpg'))) dataset.put( DatasetItem(2, subset='train', image=np.ones((3, 2, 3)))) dataset.remove(3, 'valid') dataset.save(save_images=True) self.assertTrue( osp.isfile(osp.join(path, 'obj_train_data', '1.txt'))) self.assertTrue( osp.isfile(osp.join(path, 'obj_train_data', '2.txt'))) self.assertFalse( osp.isfile(osp.join(path, 'obj_valid_data', '3.txt'))) self.assertTrue( osp.isfile(osp.join(path, 'obj_train_data', '2.jpg'))) self.assertFalse( osp.isfile(osp.join(path, 'obj_valid_data', '3.jpg')))
def __init__(self, url): super().__init__() local_dir = url self._local_dir = local_dir self._cache_dir = osp.join(local_dir, 'images') with open(osp.join(url, 'config.json'), 'r') as config_file: config = json.load(config_file) config = Config(config, schema=CONFIG_SCHEMA) self._config = config with open(osp.join(url, 'images_meta.json'), 'r') as images_file: images_meta = json.load(images_file) image_list = images_meta['images'] items = [] for entry in image_list: item_id = entry['id'] item_filename = entry.get('name', str(item_id)) size = None if entry.get('height') and entry.get('width'): size = (entry['height'], entry['width']) image = Image(data=self._make_image_loader(item_id), path=item_filename, size=size) item = DatasetItem(id=item_id, image=image) items.append((item.id, item)) items = sorted(items, key=lambda e: int(e[0])) items = OrderedDict(items) self._items = items self._cvat_cli = None self._session = None
def test_can_save_dataset_with_image_info(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=Image(path='1.jpg', size=(10, 15)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(i) for i in range(10)), }) with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir) save_image(osp.join(test_dir, 'obj_train_data', '1.jpg'), np.ones((10, 15, 3))) # put the image for dataset parsed_dataset = Dataset.import_from(test_dir, 'yolo') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_load_dataset_with_exact_image_info(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=Image(path='1.jpg', size=(10, 15)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(i) for i in range(10)), }) with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir) parsed_dataset = Dataset.import_from(test_dir, 'yolo', image_info={'1': (10, 15)}) compare_datasets(self, source_dataset, parsed_dataset)
def __init__(self, task_data, include_images=False): super().__init__() self._categories = self._load_categories(task_data) dm_items = [] if include_images: frame_provider = FrameProvider(task_data.db_task.data) for frame_data in task_data.group_by_frame(include_empty=True): loader = None if include_images: loader = lambda p, i=frame_data.idx: frame_provider.get_frame( i, quality=frame_provider.Quality.ORIGINAL, out_type=frame_provider.Type.NUMPY_ARRAY)[0] dm_image = Image(path=frame_data.name, loader=loader, size=(frame_data.height, frame_data.width)) dm_anno = self._read_cvat_anno(frame_data, task_data) dm_item = datumaro.DatasetItem( id=osp.splitext(frame_data.name)[0], annotations=dm_anno, image=dm_image, attributes={'frame': frame_data.frame}) dm_items.append(dm_item) self._items = dm_items
def _parse(self, path): categories = { AnnotationType.label: LabelCategories(attributes={ 'occluded', 'username' }) } items = [] for p in os.listdir(path): if not p.endswith('.xml'): continue root = ElementTree.parse(osp.join(path, p)) item_id = osp.join(root.find('folder').text or '', root.find('filename').text) image_path = osp.join(path, item_id) image_size = None imagesize_elem = root.find('imagesize') if imagesize_elem is not None: width_elem = imagesize_elem.find('ncols') height_elem = imagesize_elem.find('nrows') image_size = (int(height_elem.text), int(width_elem.text)) image = Image(path=image_path, size=image_size) annotations = self._parse_annotations(root, path, categories) items.append(DatasetItem(id=osp.splitext(item_id)[0], subset=self._subset, image=image, annotations=annotations)) return items, categories
def _generate_classification_dataset(self, config, num_duplicate): subsets = ["train", "val", "test"] dummy_images = [ np.random.randint(0, 255, size=(224, 224, 3)) for _ in range(num_duplicate) ] iterable = [] label_cat = LabelCategories() idx = 0 for label_id, label in enumerate(config.keys()): label_cat.add(label, attributes=None) num_item = config[label] for subset in subsets: for _ in range(num_item): idx += 1 iterable.append( DatasetItem( idx, subset=subset, annotations=[Label(label_id)], image=Image(data=dummy_images[idx % num_duplicate]), )) categories = {AnnotationType.label: label_cat} dataset = Dataset.from_iterable(iterable, categories) return dataset
def __iter__(self): return iter([ DatasetItem( id='2007_000001', subset='train', image=Image(path='2007_000001.jpg', size=(10, 20)), annotations=[ Label(self._label(l.name)) for l in VOC.VocLabel if l.value % 2 == 1 ] + [ Bbox( 1, 2, 2, 2, label=self._label('cat'), attributes={ 'pose': VOC.VocPose(1).name, 'truncated': True, 'difficult': False, 'occluded': False, }, id=1, group=1, ), Bbox( 4, 5, 2, 2, label=self._label('person'), attributes={ 'truncated': False, 'difficult': False, 'occluded': False, **{ a.name: a.value % 2 == 1 for a in VOC.VocAction } }, id=2, group=2, ), Bbox(5.5, 6, 2, 2, label=self._label(VOC.VocBodyPart(1).name), group=2), Mask( image=np.ones([5, 10]), label=self._label(VOC.VocLabel(2).name), group=1, ), ]), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ])
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='a/1', image=Image(path='a/1.JPEG', data=np.zeros( (4, 3, 3)))), DatasetItem(id='b/c/d/2', image=Image(path='b/c/d/2.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: self._test_save_and_load(dataset, partial(LabelMeConverter.convert, save_images=True), test_dir, require_images=True)
def __iter__(self): return iter([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Caption('hello', id=1), Caption('world', id=2, group=5), Label(2, id=3, attributes={ 'x': 1, 'y': '2', }), Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={ 'score': 1.0, }), Bbox(5, 6, 7, 8, id=5, group=5), Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), ]), DatasetItem(id=21, subset='train', annotations=[ Caption('test'), Label(2), Bbox(1, 2, 3, 4, 5, id=42, group=42) ]), DatasetItem(id=2, subset='val', annotations=[ PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), ]), DatasetItem(id=42, subset='test', attributes={ 'a1': 5, 'a2': '42' }), DatasetItem(id=42), DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), ])
def __iter__(self): return iter([ DatasetItem(id=1, subset='train', image=Image(path='1.jpg', size=(10, 15)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ])
def test_can_save_dataset_with_image_info(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), attributes={'id': 1}), ]) with TestDir() as test_dir: self._test_save_and_load(expected_dataset, CocoImageInfoConverter.convert, test_dir)
def test_ctors_with_image(): for args in [ { 'id': 0, 'image': None }, { 'id': 0, 'image': 'path.jpg' }, { 'id': 0, 'image': np.array([1, 2, 3]) }, { 'id': 0, 'image': lambda f: np.array([1, 2, 3]) }, { 'id': 0, 'image': Image(data=np.array([1, 2, 3])) }, ]: DatasetItem(**args)
def __iter__(self): frames = self._frame_provider.get_frames( self._frame_provider.Quality.ORIGINAL, self._frame_provider.Type.NUMPY_ARRAY) for item_id, image in enumerate(frames): yield datumaro.DatasetItem( id=item_id, image=Image(image), )
def __iter__(self): return iter([ DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), annotations=[ Polygon([0, 0, 4, 0, 4, 4], label=1, group=4, attributes={'occluded': True}), Points([1, 1, 3, 2, 2, 3], label=2, attributes={ 'occluded': False, 'a1': 'x', 'a2': 42 }), Label(1), Label(2, attributes={ 'a1': 'y', 'a2': 44 }), ], attributes={'frame': 0}), DatasetItem(id=1, subset='s1', annotations=[ PolyLine([0, 0, 4, 0, 4, 4], label=3, group=4, attributes={'occluded': False}), Bbox(5, 0, 1, 9, label=3, group=4, attributes={'occluded': False}), ], attributes={'frame': 1}), DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), annotations=[ Polygon([0, 0, 4, 0, 4, 4], z_order=1, label=3, group=4, attributes={'occluded': False}), ], attributes={'frame': 0}), DatasetItem(id=3, subset='s3', image=Image(path='3.jpg', size=(2, 4)), attributes={'frame': 0}), ])
def test_can_import_image_info(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), attributes={'id': 1}), ]) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'coco_image_info'), 'coco') compare_datasets(self, expected_dataset, dataset)
def parse_image_dir(image_dir, subset): for file in sorted(glob(image_dir), key=osp.basename): name, ext = osp.splitext(osp.basename(file)) if ext.lower() in CvatPath.MEDIA_EXTS: items[(subset, name)] = DatasetItem( id=name, annotations=[], image=Image(path=file), subset=subset or DEFAULT_SUBSET_NAME, )