def test_can_change_extension_for_images_in_zip(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((5, 5, 3))), DatasetItem(id='2', image=np.ones((2, 8, 3))) ]) with TestDir() as test_dir: source_dataset.export(test_dir, format='image_dir', image_ext='.jpg') zip_path = osp.join(test_dir, 'images.zip') make_zip_archive(test_dir, zip_path) proj_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', proj_dir) run(self, 'import', '-p', proj_dir, '-f', 'image_zip', zip_path) export_path = osp.join(test_dir, 'export.zip') run(self, 'export', '-p', proj_dir, '-f', 'image_zip', '-o', test_dir, '--overwrite', '--', '--name', osp.basename(export_path), '--image-ext', '.png') self.assertTrue(osp.isfile(export_path)) with ZipFile(export_path, 'r') as zf: images = {f.filename for f in zf.filelist} self.assertTrue(images == {'1.png', '2.png'})
def test_can_import_with_meta_file(self): label_cat = LabelCategories.from_iterable(['animal--bird', 'construction--barrier--curb', 'human--person']) mask_cat = MaskCategories({ 0: (10, 50, 90), 1: (20, 30, 80), 2: (30, 70, 40) }) expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', annotations=[ Mask(image=np.array([[1, 1, 0, 0, 0]] * 5), label=0, id=0), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=0, id=1), Mask(image=np.array([[0, 0, 1, 1, 0]] * 5), label=1, id=0), ], image=np.ones((5, 5, 3))), DatasetItem(id='2', subset='train', annotations=[ Mask(image=np.array([[1, 1, 0, 1, 1]] * 5), label=1), Mask(image=np.array([[0, 0, 1, 0, 0]] * 5), label=2), ], image=np.ones((5, 5, 3))), ], categories={ AnnotationType.label: label_cat, AnnotationType.mask: mask_cat }) imported_dataset = Dataset.import_from(DUMMY_DATASET_WITH_META_FILE, 'mapillary_vistas') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_can_import_v2_0_panoptic_with_keeping_category_ids(self): labels = [f'class-{i}' for i in range(101)] labels[1] = ('animal--bird', 'animal') labels[10] = ('construction--barrier--separator', 'construction') labels[100] = ('object--vehicle--bicycle', 'object') label_cat = LabelCategories.from_iterable(labels) mask_cat = MaskCategories({ 1: (165, 42, 42), 10: (128, 128, 128), 100: (119, 11, 32) }) expected_dataset = Dataset.from_iterable([ DatasetItem(id='0', subset='val', annotations=[ Mask(image=np.array([[1, 1, 1, 0, 0]] * 5), id=1, group=1, label=1, attributes={'is_crowd': True}), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), id=2, group=2, label=10, attributes={'is_crowd': False}), Polygon(points=[0, 0, 1, 0, 2, 0, 2, 4, 0, 4], label=1), Polygon(points=[3, 0, 4, 0, 4, 1, 4, 4, 3, 4], label=10), ], image=np.ones((5, 5, 3))), DatasetItem(id='1', subset='val', annotations=[ Mask(image=np.array([[1, 1, 0, 0, 0]] * 5), id=1, group=1, label=100, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 1, 0, 0]] * 5), id=2, group=2, label=10, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), id=3, group=3, label=100, attributes={'is_crowd': True}), Polygon(points=[2, 0, 2, 1, 2, 2, 2, 3, 2, 4], label=10), Polygon(points=[0, 0, 1, 0, 1, 4, 4, 0, 0, 0], label=100), Polygon(points=[3, 0, 4, 0, 4, 4, 3, 4, 3, 0], label=100), ], image=np.ones((5, 5, 3))), DatasetItem(id='2', subset='train', annotations=[ Mask(image=np.array([[1, 0, 0, 0, 0]] * 5), id=1, group=1, label=1, attributes={'is_crowd': False}), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), id=2, group=2, label=10, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 1, 0, 0]] * 5), id=3, group=3, label=1, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 0, 1, 0]] * 5), id=4, group=4, label=10, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), id=5, group=5, label=1, attributes={'is_crowd': False}), Polygon(points=[0, 0, 0, 1, 0, 2, 0, 3, 0, 4], label=1), Polygon(points=[2, 0, 2, 1, 2, 2, 2, 3, 2, 4], label=1), Polygon(points=[4, 0, 4, 1, 4, 2, 4, 3, 4, 4], label=1), Polygon(points=[1, 0, 1, 1, 1, 2, 1, 3, 1, 4], label=10), Polygon(points=[3, 0, 3, 1, 3, 2, 3, 3, 3, 4], label=10), ], image=np.ones((5, 5, 3))), ], categories={ AnnotationType.label: label_cat, AnnotationType.mask: mask_cat }) imported_dataset = Dataset.import_from(DUMMY_DATASET_V2_0, 'mapillary_vistas_panoptic', keep_original_category_ids=True) compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_convert_from_voc_format(self): """ <b>Description:</b> Ensure that the dataset can be converted from VOC format with command `datum convert`. <b>Expected results:</b> A ImageNet dataset that matches the expected dataset. <b>Steps:</b> 1. Get path to the source dataset from assets. 2. Convert source dataset to LabelMe format, using the `convert` command. 3. Verify that resulting dataset is equal to the expected dataset. """ labels = sorted([l.name for l in VOC.VocLabel if l.value % 2 == 1]) expected_dataset = Dataset.from_iterable([ DatasetItem(id='/'.join([label, '2007_000001']), subset='default', annotations=[Label(i)]) for i, label in enumerate(labels) ] + [DatasetItem(id='no_label/2007_000002', subset='default', image=np.ones((10, 20, 3))) ], categories=labels ) voc_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') with TestDir() as test_dir: imagenet_dir = osp.join(test_dir, 'imagenet') run(self, 'convert', '-if', 'voc', '-i', voc_dir, '-f', 'imagenet', '-o', imagenet_dir, '--', '--save-image') target_dataset = Dataset.import_from(imagenet_dir, format='imagenet') compare_datasets(self, expected_dataset, target_dataset, require_images=True)
def test_can_save_and_load_voc_segmentation_dataset(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[ Mask(image=np.ones([10, 20]), label=2, group=1) ]), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ], categories=VOC.make_voc_categories()) dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') rpath = osp.join('ImageSets', 'Segmentation', 'train.txt') matrix = [ ('voc_segmentation', '', ''), ('voc_segmentation', 'train', rpath), ('voc', 'train', rpath), ] for format, subset, path in matrix: with self.subTest(format=format, subset=subset, path=path): if subset: expected = expected_dataset.get_subset(subset) else: expected = expected_dataset with TestDir() as test_dir: self._test_can_save_and_load(test_dir, dataset_dir, expected, format, result_path=path, label_map='voc')
def _load_items(self, subset, path): items = [] for label_cat in self._categories[AnnotationType.label]: label = label_cat.name label_id = self._categories[AnnotationType.label].find(label)[0] for image_path in find_images(osp.join(path, label)): image_name = osp.basename(image_path) item_id = osp.splitext(image_name)[0] pedestrian_id = image_name[0:4] if not fnmatch.fnmatch(image_name, label + MarsPath.IMAGE_NAME_POSTFIX): items.append(DatasetItem(id=item_id, image=image_path)) continue if pedestrian_id != label: log.warning(f'The image {image_path} will be skip because' 'pedestrian id for it does not match with' f'the directory name: {label}') continue items.append( DatasetItem(id=item_id, image=image_path, subset=subset, annotations=[Label(label=label_id)], attributes={ 'person_id': pedestrian_id, 'camera_id': int(image_name[5]), 'track_id': int(image_name[7:11]), 'frame_id': int(image_name[12:15]) })) return items
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='street/1', subset='training', image=np.ones((5, 5, 3)), annotations=[ Polygon([1, 0, 1, 1, 1, 2, 1, 3, 1, 4], group=1, z_order=0, id=1, label=1, attributes={'walkin': True}), Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=401, z_order=0, id=401), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1831, z_order=0, id=1831), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, id=774, group=774, z_order=1), Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=0, z_order=0, id=0), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1, z_order=0, id=1, attributes={'walkin': True}), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, group=2, z_order=1, id=2), ]), DatasetItem(id='2', subset='validation', image=np.ones((5, 5, 3)), annotations=[ Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=401, z_order=0, id=401), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1831, z_order=0, id=1831), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=3, group=2122, z_order=2, id=2122), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, group=774, z_order=1, id=774), Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=0, z_order=0, id=0), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1, z_order=0, id=1, attributes={'walkin': True}), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, group=2, z_order=1, id=2), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=3, group=3, z_order=2, id=3), ]) ], categories={AnnotationType.label: LabelCategories.from_iterable([ 'car', 'person', 'door', 'rim']) } ) imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'ade20k2020') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_can_import_mots_annotations_with_splited_masks(self): #https://github.com/openvinotoolkit/cvat/issues/3360 format_name = 'MOTS PNG 1.0' source_dataset = Dataset.from_iterable([ DatasetItem(id='image_0', annotations=[ Mask(np.array([[1, 1, 1, 0, 1, 1, 1]] * 5), label=0, attributes={'track_id': 0}) ]) ], categories=['label_0']) with tempfile.TemporaryDirectory() as temp_dir: dataset_dir = osp.join(temp_dir, 'dataset') source_dataset.export(dataset_dir, 'mots_png') dataset_path = osp.join(temp_dir, 'annotations.zip') make_zip_archive(dataset_dir, dataset_path) images = self._generate_task_images(1, size=(5, 7)) task = { 'name': 'test', "overlap": 0, "segment_size": 100, "labels": [{ 'name': 'label_0' }] } task.update() task = self._create_task(task, images) dm.task.import_task_annotations(task['id'], dataset_path, format_name) self._test_can_import_annotations(task, format_name)
def test_can_import_v2_0_instances(self): label_cat = LabelCategories.from_iterable(['animal--bird', 'construction--barrier--separator', 'object--vehicle--bicycle']) mask_cat = MaskCategories({ 0: (165, 42, 42), 1: (128, 128, 128), 2: (119, 11, 32) }) expected_dataset = Dataset.from_iterable([ DatasetItem(id='0', subset='val', annotations=[ Mask(image=np.array([[1, 1, 1, 0, 0]] * 5), id=0, label=0), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), id=0, label=1), Polygon(points=[0, 0, 1, 0, 2, 0, 2, 4, 0, 4], label=0), Polygon(points=[3, 0, 4, 0, 4, 1, 4, 4, 3, 4], label=1), ], image=np.ones((5, 5, 3))), DatasetItem(id='1', subset='val', annotations=[ Mask(image=np.array([[0, 0, 1, 0, 0]] * 5), id=0, label=1), Mask(image=np.array([[1, 1, 0, 0, 0]] * 5), id=0, label=2), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), id=1, label=2), Polygon(points=[2, 0, 2, 1, 2, 2, 2, 3, 2, 4], label=1), Polygon(points=[0, 0, 1, 0, 1, 4, 4, 0, 0, 0], label=2), Polygon(points=[3, 0, 4, 0, 4, 4, 3, 4, 3, 0], label=2), ], image=np.ones((5, 5, 3))), DatasetItem(id='2', subset='train', annotations=[ Mask(image=np.array([[1, 0, 0, 0, 0]] * 5), id=0, label=0), Mask(image=np.array([[0, 0, 1, 0, 0]] * 5), id=1, label=0), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), id=2, label=0), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), id=0, label=1), Mask(image=np.array([[0, 0, 0, 1, 0]] * 5), id=1, label=1), Polygon(points=[0, 0, 0, 1, 0, 2, 0, 3, 0, 4], label=0), Polygon(points=[2, 0, 2, 1, 2, 2, 2, 3, 2, 4], label=0), Polygon(points=[4, 0, 4, 1, 4, 2, 4, 3, 4, 4], label=0), Polygon(points=[1, 0, 1, 1, 1, 2, 1, 3, 1, 4], label=1), Polygon(points=[3, 0, 3, 1, 3, 2, 3, 3, 3, 4], label=1), ], image=np.ones((5, 5, 3))), ], categories={ AnnotationType.label: label_cat, AnnotationType.mask: mask_cat }) imported_dataset = Dataset.import_from(DUMMY_DATASET_V2_0, 'mapillary_vistas_instances') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_can_save_and_load_voc_detection_dataset(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[ Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=2, group=2, attributes={ 'difficult': False, 'truncated': False, 'occluded': False, **{ a.name : a.value % 2 == 1 for a in VOC.VocAction } } ), Bbox(1.0, 2.0, 2.0, 2.0, label=8, id=1, group=1, attributes={ 'difficult': False, 'truncated': True, 'occluded': False, 'pose': 'Unspecified' } ) ]), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ], categories=VOC.make_voc_categories()) dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') rpath = osp.join('ImageSets', 'Main', 'train.txt') matrix = [ ('voc_detection', '', ''), ('voc_detection', 'train', rpath), ] for format, subset, path in matrix: with self.subTest(format=format, subset=subset, path=path): if subset: expected = expected_dataset.get_subset(subset) else: expected = expected_dataset with TestDir() as test_dir: self._test_can_save_and_load(test_dir, dataset_dir, expected, format, result_path=path, label_map='voc')
def parse_image_dir(image_dir, subset): for file in sorted(glob(image_dir), key=osp.basename): name, ext = osp.splitext(osp.basename(file)) if ext.lower() in CvatPath.MEDIA_EXTS: items[(subset, name)] = DatasetItem( id=name, annotations=[], image=Image(path=file), subset=subset or DEFAULT_SUBSET_NAME, )
def setUpClass(cls): cls.dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.ones((5, 5, 3)), annotations=[ Label(1, id=0, attributes={'a': 1, 'b': 7, }), Bbox(1, 2, 3, 4, id=1, label=0, attributes={ 'a': 1, 'b': 2, }), ]), DatasetItem(id=2, image=np.ones((2, 4, 3)), annotations=[ Label(2, id=0, attributes={'a': 2, 'b': 2, }), Bbox(2, 3, 1, 4, id=1, label=0, attributes={ 'a': 1, 'b': 1, }), ]), DatasetItem(id=3), DatasetItem(id=4, image=np.ones((2, 4, 3)), annotations=[ Label(0, id=0, attributes={'b': 4, }), Label(1, id=1, attributes={'a': 11, 'b': 7, }), Bbox(1, 3, 2, 4, id=2, label=0, attributes={ 'a': 2, 'b': 1, }), Bbox(3, 1, 4, 2, id=3, label=0, attributes={ 'a': 2, 'b': 2, }), ]), DatasetItem(id=5, image=np.ones((2, 4, 3)), annotations=[ Label(0, id=0, attributes={'a': 20, 'b': 10, }), Bbox(1, 2, 3, 4, id=1, label=1, attributes={ 'a': 1, 'b': 1, }), ]), DatasetItem(id=6, image=np.ones((2, 4, 3)), annotations=[ Label(1, id=0, attributes={'a': 11, 'b': 2, 'c': 3, }), Bbox(2, 3, 4, 1, id=1, label=1, attributes={ 'a': 2, 'b': 2, }), ]), DatasetItem(id=7, image=np.ones((2, 4, 3)), annotations=[ Label(1, id=0, attributes={'a': 1, 'b': 2, 'c': 5, }), Bbox(1, 2, 3, 4, id=1, label=2, attributes={ 'a': 1, 'b': 2, }), ]), DatasetItem(id=8, image=np.ones((2, 4, 3)), annotations=[ Label(2, id=0, attributes={'a': 7, 'b': 9, 'c': 5, }), Bbox(2, 1, 3, 4, id=1, label=2, attributes={ 'a': 2, 'b': 1, }), ]), ], categories=[[f'label_{i}', None, {'a', 'b', }] for i in range(2)])
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='0001C1T0001F001', image=np.ones((10, 10, 3)), subset='train', annotations=[Label(label=2)], attributes={ 'person_id': '0001', 'camera_id': 1, 'track_id': 1, 'frame_id': 1 }), DatasetItem(id='0000C6T0101F001', image=np.ones((10, 10, 3)), subset='train', annotations=[Label(label=1)], attributes={ 'person_id': '0000', 'camera_id': 6, 'track_id': 101, 'frame_id': 1 }), DatasetItem(id='00-1C2T0081F201', image=np.ones((10, 10, 3)), subset='test', annotations=[Label(label=0)], attributes={ 'person_id': '00-1', 'camera_id': 2, 'track_id': 81, 'frame_id': 201 }), ], categories=['00-1', '0000', '0001']) imported_dataset = Dataset.import_from(DUMMY_MARS_DATASET, 'mars') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_label_projection_with_masks(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[ Bbox(1, 2, 2, 2, label=3, attributes={ 'pose': VOC.VocPose(1).name, 'truncated': True, 'difficult': False, 'occluded': False, }, id=1, group=1, ), ] ), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ], categories=VOC.make_voc_categories({ 'background': [(0, 0, 0), [], []], # Added on export 'a': [(128, 0, 0), [], []], # Generated by the transform 'b': [(0, 128, 0), [], []], # Generated by the transform 'cat': [(64, 0, 0), [], []] # Original })) dataset_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') with TestDir() as test_dir: run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'voc', dataset_path) run(self, 'transform', '-p', test_dir, '-t', 'project_labels', '--', '-l', 'a', '-l', 'b', '-l', 'cat') parsed_dataset = Dataset.import_from( osp.join(test_dir, 'source-1'), 'voc') compare_datasets(self, expected_dataset, parsed_dataset)
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((5, 5, 3))), DatasetItem(id='2', image=np.ones((2, 8, 3))) ]) with TestDir() as test_dir: source_dataset.export(test_dir, format='image_dir') zip_path = osp.join(test_dir, 'images.zip') make_zip_archive(test_dir, zip_path) proj_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', proj_dir) run(self, 'import', '-p', proj_dir, '-f', 'image_zip', zip_path) result_dir = osp.join(test_dir, 'result') export_path = osp.join(result_dir, 'export.zip') run(self, 'export', '-p', proj_dir, '-f', 'image_zip', '-o', result_dir, '--', '--name', osp.basename(export_path)) parsed_dataset = Dataset.import_from(export_path, format='image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) image_meta_path = osp.join(tmp_dir, OpenImagesPath.ANNOTATIONS_DIR, DEFAULT_IMAGE_META_FILE_NAME) image_meta = None if not osp.isfile(image_meta_path): image_meta = {} item_ids = list(find_item_ids(tmp_dir)) root_hint = find_dataset_root( [DatasetItem(id=item_id) for item_id in item_ids], instance_data) for item_id in item_ids: frame_info = None try: frame_id = match_dm_item(DatasetItem(id=item_id), instance_data, root_hint) frame_info = instance_data.frame_info[frame_id] except Exception: # nosec pass if frame_info is not None: image_meta[item_id] = (frame_info['height'], frame_info['width']) dataset = Dataset.import_from(tmp_dir, 'open_images', image_meta=image_meta, env=dm_env) dataset.transform('masks_to_polygons') if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data)
def test_can_save_and_load_voc_dataset(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[Label(i) for i in range(22) if i % 2 == 1] + [ Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=1, group=1, attributes={ 'difficult': False, 'truncated': False, 'occluded': False, **{ a.name : a.value % 2 == 1 for a in VOC.VocAction } }, ), Bbox(1.0, 2.0, 2.0, 2.0, label=8, id=2, group=2, attributes={ 'difficult': False, 'truncated': True, 'occluded': False, 'pose': 'Unspecified' } ), Bbox(5.5, 6.0, 2.0, 2.0, label=22, id=0, group=1), Mask(image=np.ones([10, 20]), label=2, group=1), ]), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))) ], categories=VOC.make_voc_categories()) voc_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') with TestDir() as test_dir: self._test_can_save_and_load(test_dir, voc_dir, source_dataset, 'voc', label_map='voc')
def test_convert_to_voc_format(self): """ <b>Description:</b> Ensure that the dataset can be converted to VOC format with command `datum convert`. <b>Expected results:</b> A VOC dataset that matches the expected dataset. <b>Steps:</b> 1. Get path to the source dataset from assets. 2. Convert source dataset to VOC format, using the `convert` command. 3. Verify that resulting dataset is equal to the expected dataset. """ label_map = OrderedDict(('label_' + str(i), [None, [], []]) for i in range(10)) label_map['background'] = [None, [], []] label_map.move_to_end('background', last=False) expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='default', image=np.ones((16, 16, 3)), annotations=[ Bbox(0.0, 4.0, 4.0, 8.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False, 'visibility': '1.0', 'ignored': 'False' }, id=1, label=3, group=1 ) ] ) ], categories=VOC.make_voc_categories(label_map)) mot_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'mot_dataset') with TestDir() as test_dir: voc_dir = osp.join(test_dir, 'voc') run(self, 'convert', '-if', 'mot_seq', '-i', mot_dir, '-f', 'voc', '-o', voc_dir, '--', '--save-images') target_dataset = Dataset.import_from(voc_dir, format='voc') compare_datasets(self, expected_dataset, target_dataset, require_images=True)
def test_can_import_with_original_config(self): exptected_dataset = Dataset.from_iterable([ DatasetItem(id='0', subset='val', annotations=[ Mask(image=np.array([[1, 1, 0, 0, 0]] * 5), label=0), Mask(image=np.array([[0, 0, 1, 1, 0]] * 5), label=1), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=2) ], image=np.ones((5, 5, 3))), ], categories=make_mapillary_instance_categories( MapillaryVistasLabelMaps['v1.2'])) imported_dataset = Dataset.import_from( osp.join(DUMMY_DATASET_V1_2, 'val'), 'mapillary_vistas', use_original_config=True ) compare_datasets(self, exptected_dataset, imported_dataset, require_images=True)
def test_can_import_v2_0_panoptic_wo_images(self): label_cat = LabelCategories.from_iterable([ ('animal--bird', 'animal'), ('construction--barrier--separator', 'construction'), ('object--vehicle--bicycle', 'object') ]) mask_cat = MaskCategories({ 0: (165, 42, 42), 1: (128, 128, 128), 2: (119, 11, 32) }) expected_dataset = Dataset.from_iterable([ DatasetItem(id='2', subset='dataset', annotations=[ Mask(image=np.array([[1, 0, 0, 0, 0]] * 5), id=1, group=1, label=0, attributes={'is_crowd': False}), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), id=2, group=2, label=1, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 1, 0, 0]] * 5), id=3, group=3, label=0, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 0, 1, 0]] * 5), id=4, group=4, label=1, attributes={'is_crowd': False}), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), id=5, group=5, label=0, attributes={'is_crowd': False}), Polygon(points=[0, 0, 0, 1, 0, 2, 0, 3, 0, 4], label=0), Polygon(points=[2, 0, 2, 1, 2, 2, 2, 3, 2, 4], label=0), Polygon(points=[4, 0, 4, 1, 4, 2, 4, 3, 4, 4], label=0), Polygon(points=[1, 0, 1, 1, 1, 2, 1, 3, 1, 4], label=1), Polygon(points=[3, 0, 3, 1, 3, 2, 3, 3, 3, 4], label=1), ]) ], categories={ AnnotationType.label: label_cat, AnnotationType.mask: mask_cat }) with TestDir() as test_dir: dataset_path = osp.join(test_dir, 'dataset') shutil.copytree(osp.join(DUMMY_DATASET_V2_0, 'train'), dataset_path) shutil.rmtree(osp.join(dataset_path, 'images')) imported_dataset = Dataset.import_from(dataset_path, 'mapillary_vistas_panoptic') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_export_to_voc_format(self): label_map = OrderedDict(('label_%s' % i, [None, [], []]) for i in range(10)) label_map['background'] = [None, [], []] label_map.move_to_end('background', last=False) expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0.0, 2.0, 4.0, 2.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False }, id=1, label=3, group=1 ), Bbox(3.0, 3.0, 2.0, 3.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False }, id=2, label=5, group=2 ) ] ) ], categories=VOC.make_voc_categories(label_map)) with TestDir() as test_dir: yolo_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'yolo_dataset') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'yolo', yolo_dir) voc_export = osp.join(test_dir, 'voc_export') run(self, 'export', '-p', test_dir, '-f', 'voc', '-o', voc_export, '--', '--save-images') parsed_dataset = Dataset.import_from(voc_export, format='voc') compare_datasets(self, expected_dataset, parsed_dataset, require_images=True)
def _load_items(self, parsed, image_items): for (subset, frame_id), item_desc in parsed.items(): name = item_desc.get('name', 'frame_%06d.PNG' % int(frame_id)) image = osp.join(self._images_dir, subset, name) if subset else osp.join( self._images_dir, name) image_size = (item_desc.get('height'), item_desc.get('width')) if all(image_size): image = Image(path=image, size=tuple(map(int, image_size))) di = image_items.get((subset, osp.splitext(name)[0]), DatasetItem( id=name, annotations=[], )) di.subset = subset or DEFAULT_SUBSET_NAME di.annotations = item_desc.get('annotations') di.attributes = {'frame': int(frame_id)} di.image = image if isinstance(image, Image) else di.image image_items[(subset, osp.splitext(name)[0])] = di return image_items
def test_can_import_v1_2_wo_images(self): exptected_dataset = Dataset.from_iterable([ DatasetItem(id='0', subset='dataset', annotations=[ Mask(image=np.array([[1, 1, 0, 0, 0]] * 5), label=0), Mask(image=np.array([[0, 0, 1, 1, 0]] * 5), label=1), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=2) ]), ], categories=make_mapillary_instance_categories( MapillaryVistasLabelMaps['v1.2'])) with TestDir() as test_dir: dataset_path = osp.join(test_dir, 'dataset') shutil.copytree(osp.join(DUMMY_DATASET_V1_2, 'val'), dataset_path) shutil.rmtree(osp.join(dataset_path, 'images')) imported_dataset = Dataset.import_from(dataset_path, 'mapillary_vistas', use_original_config=True) compare_datasets(self, exptected_dataset, imported_dataset, require_images=True)
def test_can_import_with_meta_file(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='street/1', subset='training', image=np.ones((3, 4, 3)), annotations=[ Mask(image=np.array([[0, 1, 0, 0]] * 3), label=0, group=1, z_order=0, id=1), Mask(image=np.array([[0, 0, 0, 1]] * 3), label=2, group=1, z_order=1, id=1), Mask(image=np.array([[0, 0, 1, 1]] * 3), group=2, label=1, z_order=0, id=2, attributes={'walkin': True}) ]) ], categories={ AnnotationType.label: LabelCategories.from_iterable( ['sky', 'person', 'license plate', 'rim']) }) imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR_META_FILE, 'ade20k2017') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_preparing_dataset_for_train_model(self): """ <b>Description:</b> Testing a particular example of working with VOC dataset. <b>Expected results:</b> A VOC dataset that matches the expected result. <b>Steps:</b> 1. Get path to the source dataset from assets. 2. Create a datumaro project and add source dataset to it. 3. Leave only non-occluded annotations with `filter` command. 4. Split the dataset into subsets with `transform` command. 5. Export the project to a VOC dataset with `export` command. 6. Verify that the resulting dataset is equal to the expected result. """ expected_dataset = Dataset.from_iterable([ DatasetItem(id='c', subset='train', annotations=[ Bbox(3.0, 1.0, 8.0, 5.0, attributes={ 'truncated': False, 'occluded': False, 'difficult': False }, id=1, label=2, group=1 ) ] ), DatasetItem(id='d', subset='test', annotations=[ Bbox(4.0, 4.0, 4.0, 4.0, attributes={ 'truncated': False, 'occluded': False, 'difficult': False }, id=1, label=3, group=1 ) ] ) ], categories=VOC.make_voc_categories()) dataset_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset2') with TestDir() as test_dir: run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'voc', dataset_path) run(self, 'filter', '-p', test_dir, '-m', 'i+a', '-e', "/item/annotation[occluded='False']") run(self, 'transform', '-p', test_dir, '-t', 'random_split', '--', '-s', 'test:.5', '-s', 'train:.5', '--seed', '1') export_path = osp.join(test_dir, 'dataset') run(self, 'export', '-p', test_dir, '-f', 'voc', '-o', export_path, '--', '--label-map', 'voc') parsed_dataset = Dataset.import_from(export_path, format='voc') compare_datasets(self, expected_dataset, parsed_dataset)
def load_anno(file_object, annotations): supported_shapes = ('box', 'ellipse', 'polygon', 'polyline', 'points', 'cuboid') context = ElementTree.iterparse(file_object, events=("start", "end")) context = iter(context) next(context) track = None shape = None tag = None image_is_opened = False attributes = None for ev, el in context: if ev == 'start': if el.tag == 'track': track = annotations.Track( label=el.attrib['label'], group=int(el.attrib.get('group_id', 0)), source=el.attrib.get('source', 'manual'), shapes=[], ) elif el.tag == 'image': image_is_opened = True frame_id = annotations.abs_frame_id( match_dm_item(DatasetItem( id=osp.splitext(el.attrib['name'])[0], attributes={'frame': el.attrib['id']}, image=el.attrib['name']), task_data=annotations)) elif el.tag in supported_shapes and (track is not None or image_is_opened): attributes = [] shape = { 'attributes': attributes, 'points': [], } elif el.tag == 'tag' and image_is_opened: attributes = [] tag = { 'frame': frame_id, 'label': el.attrib['label'], 'group': int(el.attrib.get('group_id', 0)), 'attributes': attributes, 'source': str(el.attrib.get('source', 'manual')) } elif ev == 'end': if el.tag == 'attribute' and attributes is not None: attributes.append( annotations.Attribute( name=el.attrib['name'], value=el.text or "", )) if el.tag in supported_shapes: if track is not None: shape['frame'] = el.attrib['frame'] shape['outside'] = el.attrib['outside'] == "1" shape['keyframe'] = el.attrib['keyframe'] == "1" else: shape['frame'] = frame_id shape['label'] = el.attrib['label'] shape['group'] = int(el.attrib.get('group_id', 0)) shape['source'] = str(el.attrib.get('source', 'manual')) shape['type'] = 'rectangle' if el.tag == 'box' else el.tag shape['occluded'] = el.attrib['occluded'] == '1' shape['z_order'] = int(el.attrib.get('z_order', 0)) shape['rotation'] = float(el.attrib.get('rotation', 0)) if el.tag == 'box': shape['points'].append(el.attrib['xtl']) shape['points'].append(el.attrib['ytl']) shape['points'].append(el.attrib['xbr']) shape['points'].append(el.attrib['ybr']) elif el.tag == 'ellipse': shape['points'].append(el.attrib['cx']) shape['points'].append(el.attrib['cy']) shape['points'].append("{:.2f}".format( float(el.attrib['cx']) + float(el.attrib['rx']))) shape['points'].append("{:.2f}".format( float(el.attrib['cy']) - float(el.attrib['ry']))) elif el.tag == 'cuboid': shape['points'].append(el.attrib['xtl1']) shape['points'].append(el.attrib['ytl1']) shape['points'].append(el.attrib['xbl1']) shape['points'].append(el.attrib['ybl1']) shape['points'].append(el.attrib['xtr1']) shape['points'].append(el.attrib['ytr1']) shape['points'].append(el.attrib['xbr1']) shape['points'].append(el.attrib['ybr1']) shape['points'].append(el.attrib['xtl2']) shape['points'].append(el.attrib['ytl2']) shape['points'].append(el.attrib['xbl2']) shape['points'].append(el.attrib['ybl2']) shape['points'].append(el.attrib['xtr2']) shape['points'].append(el.attrib['ytr2']) shape['points'].append(el.attrib['xbr2']) shape['points'].append(el.attrib['ybr2']) else: for pair in el.attrib['points'].split(';'): shape['points'].extend(map(float, pair.split(','))) if track is not None: if shape["keyframe"]: track.shapes.append(annotations.TrackedShape(**shape)) else: annotations.add_shape(annotations.LabeledShape(**shape)) shape = None elif el.tag == 'track': annotations.add_track(track) track = None elif el.tag == 'image': image_is_opened = False elif el.tag == 'tag': annotations.add_tag(annotations.Tag(**tag)) tag = None el.clear()