def test_flushes_changes_on_save(self): dataset = Dataset.from_iterable([]) dataset.put(DatasetItem(1)) self.assertTrue(dataset.is_modified) with TestDir() as test_dir: dataset.save(test_dir) self.assertFalse(dataset.is_modified)
def setUp(self): self.source = Dataset.from_iterable([ DatasetItem(id='1', subset='val', attributes={'qq': 1, 'x': 2}, annotations=[ Label(0, attributes={ 'x': 1, 'y': 2 }) ]), DatasetItem(id='2', subset='val', attributes={'qq': 2}, annotations=[ Label(0, attributes={ 'x': 1, 'y': 2 }) ]), ], categories=['a'])
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: if save_images: dataset.export(temp_dir, 'imagenet', save_images=save_images) else: dataset.export(temp_dir, 'imagenet_txt', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def test_remap_labels_delete_unspecified(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, annotations=[ Label(0, id=0), # will be removed Label(1, id=1), Bbox(1, 2, 3, 4, label=None), ]) ], categories=['label0', 'label1']) target_dataset = Dataset.from_iterable([ DatasetItem(id=1, annotations=[ Label(0, id=1), ]), ], categories=['label1']) actual = transforms.RemapLabels(source_dataset, mapping={ 'label1': 'label1' }, default='delete') compare_datasets(self, target_dataset, actual)
def test_project_labels(self): source = Dataset.from_iterable([ DatasetItem(id=1, annotations=[ Label(1), # Label must be remapped Label(3), # Must be removed (extra label) Bbox(1, 2, 3, 4, label=None), # Must be kept (no label) ]) ], categories=['a', 'b', 'c', 'd']) expected = Dataset.from_iterable([ DatasetItem(id=1, annotations=[ Label(2), Bbox(1, 2, 3, 4, label=None), ]), ], categories=['c', 'a', 'b']) actual = transforms.ProjectLabels(source, dst_labels=['c', 'a', 'b']) compare_datasets(self, expected, actual)
def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='кириллица с пробелом', subset='train', attributes={'id': 1}), ]) with TestDir() as test_dir: self._test_save_and_load(expected_dataset, CocoImageInfoConverter.convert, test_dir)
def test_can_save_dataset_with_image_info(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), attributes={'id': 1}), ]) with TestDir() as test_dir: self._test_save_and_load(expected_dataset, CocoImageInfoConverter.convert, test_dir)
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='1', image=np.ones((8, 8, 3)), annotations=[Label(0), Label(1)]), DatasetItem( id='2', image=np.ones( (10, 10, 3)), annotations=[Label(0)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(2)), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'imagenet') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_relative_paths(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((4, 2, 3))), DatasetItem( id='subdir1/1', subset='train', image=np.ones((2, 6, 3))), DatasetItem( id='subdir2/1', subset='train', image=np.ones((5, 4, 3))), ], categories=[]) for save_images in {True, False}: with self.subTest(save_images=save_images): with TestDir() as test_dir: YoloConverter.convert(source_dataset, test_dir, save_images=save_images) parsed_dataset = Dataset.import_from(test_dir, 'yolo') compare_datasets(self, source_dataset, parsed_dataset)
def test_dataset_with_save_dataset_meta_file(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id='1', subset='train', annotations=[Label(0)]), DatasetItem(id='2', subset='train', annotations=[Label(1)]), ], categories=['label_0', 'label_1']) with TestDir() as test_dir: ImagenetTxtConverter.convert(source_dataset, test_dir, save_images=False, save_dataset_meta=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt') self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json'))) compare_datasets(self, source_dataset, parsed_dataset)
def _export_recognition(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(LabelToCaption) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def test_can_import_without_people_file(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='name0_0001', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(0, attributes={ 'negative_pairs': ['name1/name1_0001', 'name1/name1_0002'] }), Points([0, 4, 3, 3, 2, 2, 1, 0, 3, 0], label=0), ]), DatasetItem(id='name1_0001', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(1, attributes={ 'positive_pairs': ['name1/name1_0002'], }), Points([1, 6, 4, 6, 3, 3, 2, 1, 4, 1], label=1), ]), DatasetItem(id='name1_0002', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(1), Points([0, 5, 3, 5, 2, 2, 1, 0, 3, 0], label=1), ]), ], categories=['name0', 'name1']) with TestDir() as test_dir: dataset_path = osp.join(test_dir, 'dataset') shutil.copytree(DUMMY_DATASET_DIR, dataset_path) os.remove( osp.join(dataset_path, 'test', 'annotations', 'people.txt')) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'lfw') compare_datasets(self, expected_dataset, dataset)
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0, group=1), Points([3.2, 3.12, 4.11, 3.2, 2.11, 2.5, 3.5, 2.11, 3.8, 2.13], label=0, group=1), ] ), DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)), annotations=[ Points([4.23, 4.32, 5.34, 4.45, 3.54, 3.56, 4.52, 3.51, 4.78, 3.34], label=1, group=1), ] ), DatasetItem(id='3', subset='train', image=np.ones((8, 8, 3)), annotations=[Label(2, group=1)] ), DatasetItem(id='4', subset='train', image=np.ones((10, 10, 3)), annotations=[ Bbox(0, 2, 4, 2, label=3, group=1), Points([3.2, 3.12, 4.11, 3.2, 2.11, 2.5, 3.5, 2.11, 3.8, 2.13], label=3, group=1), ] ), DatasetItem(id='a/5', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(2, 2, 2, 2, group=1), ] ), DatasetItem(id='label_0', subset='train', image=np.ones((8, 8, 3)), ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( [('label_%s' % i, 'class_%s' % i) for i in range(5)]), }) with TestDir() as test_dir: VggFace2Converter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_extract_coco(self): tfds_example = { 'image': encode_image(np.ones((20, 10)), '.png'), 'image/filename': 'test.png', 'image/id': 123, 'objects': { 'bbox': [[0.1, 0.2, 0.3, 0.4]], 'label': [5], 'is_crowd': [True], } } with mock_tfds_data(example=tfds_example): tfds_info = tfds.builder('coco/2014').info expected_dataset = Dataset.from_iterable( [ DatasetItem( id='test', subset='train', image=np.ones((20, 10)), annotations=[ Bbox(2, 2, 2, 4, label=5, attributes={'is_crowd': True}), ], attributes={'id': 123}, ), ], categories=tfds_info.features['objects'].feature['label'].names ) extractor = make_tfds_extractor('coco/2014') actual_dataset = Dataset(extractor) compare_datasets(self, expected_dataset, actual_dataset, require_images=True)
def test_export_to_voc_format(self): label_map = OrderedDict(('label_%s' % i, [None, [], []]) for i in range(10)) label_map['background'] = [None, [], []] label_map.move_to_end('background', last=False) expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0.0, 2.0, 4.0, 2.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False }, id=1, label=3, group=1 ), Bbox(3.0, 3.0, 2.0, 3.0, attributes={ 'difficult': False, 'truncated': False, 'occluded': False }, id=2, label=5, group=2 ) ] ) ], categories=VOC.make_voc_categories(label_map)) with TestDir() as test_dir: yolo_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], 'tests', 'assets', 'yolo_dataset') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'yolo', yolo_dir) voc_export = osp.join(test_dir, 'voc_export') run(self, 'export', '-p', test_dir, '-f', 'voc', '-o', voc_export, '--', '--save-images') parsed_dataset = Dataset.import_from(voc_export, format='voc') compare_datasets(self, expected_dataset, parsed_dataset, require_images=True)
def test_can_crop_covered_segments(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 5, 3)), annotations=[ Mask(np.array([ [0, 0, 1, 1, 1], [0, 0, 1, 1, 1], [1, 1, 0, 1, 1], [1, 1, 1, 0, 0], [1, 1, 1, 0, 0]], ), label=2, id=1, z_order=0), Polygon([1, 1, 4, 1, 4, 4, 1, 4], label=1, id=2, z_order=1), ] ), ], categories=[str(i) for i in range(10)]) target_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 5, 3)), annotations=[ Mask(np.array([ [0, 0, 1, 1, 1], [0, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 0], [1, 1, 1, 0, 0]], ), attributes={ 'is_crowd': True }, label=2, id=1, group=1), Polygon([1, 1, 4, 1, 4, 4, 1, 4], label=1, id=2, group=2, attributes={ 'is_crowd': False }), ], attributes={'id': 1} ), ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(CocoInstancesConverter.convert, crop_covered=True), test_dir, target_dataset=target_dataset)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'voc', save_images=save_images, label_map='source') make_zip_archive(temp_dir, dst_file)
def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': False, }), ] ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(label) for label in range(10)), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'mot_seq') compare_datasets(self, expected_dataset, dataset)
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem(id='a/1', image=Image(path='a/1.JPEG', data=np.zeros( (4, 3, 3)))), DatasetItem(id='b/c/d/2', image=Image(path='b/c/d/2.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: OpenImagesConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'open_images') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def test_can_check_item_existence(self): dataset = Dataset.from_iterable( [DatasetItem(0, subset='a'), DatasetItem(1)]) self.assertTrue(DatasetItem(0, subset='a') in dataset) self.assertFalse(DatasetItem(0, subset='b') in dataset) self.assertTrue((0, 'a') in dataset) self.assertFalse((0, 'b') in dataset) self.assertTrue(1 in dataset) self.assertFalse(0 in dataset)
def test_binds_on_save(self): dataset = Dataset.from_iterable([DatasetItem(1)]) self.assertFalse(dataset.is_bound) with TestDir() as test_dir: dataset.save(test_dir) self.assertTrue(dataset.is_bound) self.assertEqual(dataset.data_path, test_dir) self.assertEqual(dataset.format, DEFAULT_FORMAT)
def test_can_track_modifications_on_addition(self): dataset = Dataset.from_iterable([ DatasetItem(1), DatasetItem(2), ]) self.assertFalse(dataset.is_modified) dataset.put(DatasetItem(3, subset='a')) self.assertTrue(dataset.is_modified)
def test_can_track_modifications_on_removal(self): dataset = Dataset.from_iterable([ DatasetItem(1), DatasetItem(2), ]) self.assertFalse(dataset.is_modified) dataset.remove(1) self.assertTrue(dataset.is_modified)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data=instance_data, include_images=save_images, dimension=DimensionType.DIM_3D), env=dm_env) if not save_images: dataset.transform(DeleteImagePath) with TemporaryDirectory() as tmp_dir: dataset.export(tmp_dir, 'datumaro', save_images=save_images) make_zip_archive(tmp_dir, dst_file)
def test_can_save_and_load_without_saving_images(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=0, subset='train', annotations=[Label(0)]), DatasetItem(id=1, subset='train', annotations=[Label(1)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable( str(label) for label in range(10)), }) with TestDir() as test_dir: MnistConverter.convert(source_dataset, test_dir, save_images=False) parsed_dataset = Dataset.import_from(test_dir, 'mnist') compare_datasets(self, source_dataset, parsed_dataset, require_images=True)
def test_can_import_with_meta_file(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='img0001', subset='test', image=np.ones((5, 5, 3)), annotations=[Bbox(10, 5, 10, 2, label=0)]), DatasetItem(id='img0002', subset='test', image=np.ones((5, 5, 3)), annotations=[ Bbox(11.5, 12, 10.2, 20.5, label=1), ]) ], categories=['helmet', 'person']) dataset = Dataset.import_from(DUMMY_DATASET_DIR_WITH_META_FILE, 'vott_csv') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_relative_paths(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((4, 2, 3))), DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), ]) target_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((4, 2, 3)), attributes={'frame': 0}), DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), attributes={'frame': 1}), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), attributes={'frame': 2}), ], categories=[]) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(CvatConverter.convert, save_images=True), test_dir, target_dataset=target_dataset, require_images=True)
def _export(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(CvatTaskDataExtractor( task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'coco_instances', save_images=save_images, merge_images=True) make_zip_archive(temp_dir, dst_file)
def test_can_transform_dataset_inplace(self): test_dir = scope_add(TestDir()) Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0)]), DatasetItem(2, annotations=[Label(1)]), ], categories=['a', 'b']).export(test_dir, 'coco') run(self, 'transform', '-t', 'remap_labels', '--overwrite', test_dir + ':coco', '--', '-l', 'a:cat', '-l', 'b:dog') expected_dataset = Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0, id=1, group=1)]), DatasetItem(2, annotations=[Label(1, id=2, group=2)]), ], categories=['cat', 'dog']) compare_datasets(self, expected_dataset, Dataset.import_from(test_dir, 'coco'), ignored_attrs='*')
def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='c/0001_c1s1_000000_00', image=Image( path='c/0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))), attributes={'camera_id': 0, 'person_id': '0001', 'track_id': 1, 'frame_id': 0, 'bbox_id': 0, 'query': False} ), DatasetItem(id='a/b/0002_c2s2_000001_00', image=Image( path='a/b/0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))), attributes={'camera_id': 1, 'person_id': '0002', 'track_id': 2, 'frame_id': 1, 'bbox_id': 0, 'query': False} ), ]) with TestDir() as test_dir: Market1501Converter.convert(expected, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'market1501') compare_datasets(self, expected, parsed_dataset, require_images=True)