def test_can_import_with_no_labels_file(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='1', subset='train', annotations=[Label(4)]), ], categories=['class-%s' % label for label in range(5)]) dataset = Dataset.import_from(DUMMY_DATASET_WITH_NO_LABELS_DIR, 'imagenet_txt', labels='generate') compare_datasets(self, expected_dataset, dataset, require_images=True)
def _test_save_and_load(self, source_dataset, converter, test_dir, target_dataset=None, importer_args=None): converter(source_dataset, test_dir) if importer_args is None: importer_args = {} parsed_dataset = VocImporter()(test_dir, **importer_args).make_dataset() if target_dataset is None: target_dataset = source_dataset compare_datasets(self, expected=target_dataset, actual=parsed_dataset)
def test_inplace_save_writes_only_updated_data_with_direct_changes(self): expected = Dataset.from_iterable([ DatasetItem(1, subset='a', image=np.ones((1, 2, 3)), annotations=[ # Bbox(0, 0, 0, 0, label=1) # won't find removed anns ]), DatasetItem(2, subset='b', image=np.ones((3, 2, 3)), annotations=[ Bbox(0, 0, 0, 0, label=4, id=1, group=1, attributes={ 'truncated': False, 'difficult': False, 'occluded': False, }) ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable( ['background', 'a', 'b', 'c', 'd']), AnnotationType.mask: MaskCategories( colormap=VOC.generate_colormap(5)), }) dataset = Dataset.from_iterable([ DatasetItem(1, subset='a', image=np.ones((1, 2, 3)), annotations=[Bbox(0, 0, 0, 0, label=1)]), DatasetItem(2, subset='b', annotations=[Bbox(0, 0, 0, 0, label=2)]), DatasetItem(3, subset='c', image=np.ones((2, 2, 3)), annotations=[ Bbox(0, 0, 0, 0, label=3), Mask(np.ones((2, 2)), label=1) ]), ], categories=['a', 'b', 'c', 'd']) with TestDir() as path: dataset.export(path, 'voc', save_images=True) os.unlink(osp.join(path, 'Annotations', '1.xml')) os.unlink(osp.join(path, 'Annotations', '2.xml')) os.unlink(osp.join(path, 'Annotations', '3.xml')) dataset.put(DatasetItem(2, subset='b', image=np.ones((3, 2, 3)), annotations=[Bbox(0, 0, 0, 0, label=3)])) dataset.remove(3, 'c') dataset.save(save_images=True) self.assertEqual({'2.xml'}, # '1.xml' won't be touched set(os.listdir(osp.join(path, 'Annotations')))) self.assertEqual({'1.jpg', '2.jpg'}, set(os.listdir(osp.join(path, 'JPEGImages')))) self.assertEqual({'a.txt', 'b.txt'}, set(os.listdir(osp.join(path, 'ImageSets', 'Main')))) compare_datasets(self, expected, Dataset.import_from(path, 'voc'), require_images=True)
def test_can_import_with_custom_labels_file(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='1', subset='train', annotations=[Label(0)]), ], categories=['alt_label_%s' % label for label in range(10)]) dataset = Dataset.import_from(DUMMY_DATASET_WITH_CUSTOM_LABELS_DIR, 'imagenet_txt', labels_file='synsets-alt.txt') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_id_from_image(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image='path.jpg'), DatasetItem(id=2), ]) target_dataset = Dataset.from_iterable([ DatasetItem(id='path', image='path.jpg'), DatasetItem(id=2), ]) actual = transforms.IdFromImageName(source_dataset) compare_datasets(self, target_dataset, actual)
def test_can_load(self): class TestExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=0, subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0, z_order=1, attributes={ 'occluded': True, 'a1': True, 'a2': 'v3' }), PolyLine([1, 2, 3, 4, 5, 6, 7, 8], z_order=0, attributes={'occluded': False}), ]), DatasetItem(id=1, subset='train', image=np.ones((10, 10, 3)), annotations=[ Polygon([1, 2, 3, 4, 6, 5], z_order=1, attributes={'occluded': False}), Points([1, 2, 3, 4, 5, 6], label=1, z_order=2, attributes={'occluded': False}), ]), ]) def categories(self): label_categories = LabelCategories() label_categories.add('label1', attributes={'a1', 'a2'}) label_categories.add('label2') return { AnnotationType.label: label_categories, } with TestDir() as test_dir: generate_dummy_cvat(test_dir) source_dataset = TestExtractor() parsed_dataset = CvatImporter()(test_dir).make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='street/1', subset='training', image=np.ones((5, 5, 3)), annotations=[ Polygon([1, 0, 1, 1, 1, 2, 1, 3, 1, 4], group=1, z_order=0, id=1, label=1, attributes={'walkin': True}), Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=401, z_order=0, id=401), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1831, z_order=0, id=1831), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, id=774, group=774, z_order=1), Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=0, z_order=0, id=0), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1, z_order=0, id=1, attributes={'walkin': True}), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, group=2, z_order=1, id=2), ]), DatasetItem(id='2', subset='validation', image=np.ones((5, 5, 3)), annotations=[ Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=401, z_order=0, id=401), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1831, z_order=0, id=1831), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=3, group=2122, z_order=2, id=2122), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, group=774, z_order=1, id=774), Mask(image=np.array([[0, 0, 1, 1, 1]] * 5), label=0, group=0, z_order=0, id=0), Mask(image=np.array([[0, 1, 0, 0, 0]] * 5), label=1, group=1, z_order=0, id=1, attributes={'walkin': True}), Mask(image=np.array([[0, 0, 0, 1, 1]] * 5), label=2, group=2, z_order=1, id=2), Mask(image=np.array([[0, 0, 0, 0, 1]] * 5), label=3, group=3, z_order=2, id=3), ]) ], categories={AnnotationType.label: LabelCategories.from_iterable([ 'car', 'person', 'door', 'rim']) } ) imported_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'ade20k2020') compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
def test_inplace_save_writes_only_updated_data(self): src_mask_cat = MaskCategories.generate(3, include_background=False) expected = Dataset.from_iterable( [ DatasetItem(1, subset='a', image=np.ones((2, 1, 3)), annotations=[Mask(np.ones((2, 1)), label=2)]), DatasetItem(2, subset='a', image=np.ones((3, 2, 3))), DatasetItem(2, subset='b'), ], categories=Camvid.make_camvid_categories( OrderedDict([ ('background', (0, 0, 0)), ('a', src_mask_cat.colormap[0]), ('b', src_mask_cat.colormap[1]), ]))) with TestDir() as path: dataset = Dataset.from_iterable( [ DatasetItem(1, subset='a', image=np.ones((2, 1, 3)), annotations=[Mask(np.ones((2, 1)), label=1)]), DatasetItem(2, subset='b'), DatasetItem(3, subset='c', image=np.ones((2, 2, 3)), annotations=[Mask(np.ones((2, 2)), label=0)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), AnnotationType.mask: src_mask_cat }) dataset.export(path, 'camvid', save_images=True) dataset.put(DatasetItem(2, subset='a', image=np.ones((3, 2, 3)))) dataset.remove(3, 'c') dataset.save(save_images=True) self.assertEqual( {'a', 'aannot', 'a.txt', 'b.txt', 'label_colors.txt'}, set(os.listdir(path))) self.assertEqual({'1.jpg', '2.jpg'}, set(os.listdir(osp.join(path, 'a')))) compare_datasets(self, expected, Dataset.import_from(path, 'camvid'), require_images=True)
def test_can_import(self): class DstExtractor(TestExtractorBase): def __iter__(self): return iter([ DatasetItem(id='2007_000001', subset='train', image=np.ones((10, 20, 3)), annotations=[ Label(self._label(l.name)) for l in VOC.VocLabel if l.value % 2 == 1 ] + [ Bbox(1, 2, 2, 2, label=self._label('cat'), attributes={ 'pose': VOC.VocPose(1).name, 'truncated': True, 'difficult': False, 'occluded': False, }, id=1, group=1, ), # Only main boxes denote instances (have ids) Mask(image=np.ones([10, 20]), label=self._label(VOC.VocLabel(2).name), group=1, ), Bbox(4, 5, 2, 2, label=self._label('person'), attributes={ 'truncated': False, 'difficult': False, 'occluded': False, **{ a.name: a.value % 2 == 1 for a in VOC.VocAction } }, id=2, group=2, ), # Only main boxes denote instances (have ids) Bbox(5.5, 6, 2, 2, label=self._label(VOC.VocBodyPart(1).name), group=2 ), ] ), DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ]) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'voc') compare_datasets(self, DstExtractor(), dataset)
def test_can_import(self): # is_crowd marks labels allowing to specify instance id source_dataset = Dataset.from_iterable([ DatasetItem(id='defaultcity/defaultcity_000001_000031', subset='test', image=np.ones((1, 5, 3)), annotations=[ Mask(np.array([[1, 1, 0, 0, 0]]), label=3, attributes={'is_crowd': True}), Mask(np.array([[0, 0, 1, 0, 0]]), id=1, label=27, attributes={'is_crowd': False}), Mask(np.array([[0, 0, 0, 1, 1]]), id=2, label=27, attributes={'is_crowd': False}), ] ), DatasetItem(id='defaultcity/defaultcity_000001_000032', subset='test', image=np.ones((1, 5, 3)), annotations=[ Mask(np.array([[1, 1, 0, 0, 0]]), id=1, label=31, attributes={'is_crowd': False}), Mask(np.array([[0, 0, 1, 0, 0]]), label=12, attributes={'is_crowd': True}), Mask(np.array([[0, 0, 0, 1, 1]]), label=3, attributes={'is_crowd': True}), ] ), DatasetItem(id='defaultcity/defaultcity_000002_000045', subset='train', image=np.ones((1, 5, 3)), annotations=[ Mask(np.array([[1, 1, 0, 1, 1]]), label=3, attributes={'is_crowd': True}), Mask(np.array([[0, 0, 1, 0, 0]]), id=1, label=24, attributes={'is_crowd': False}), ] ), DatasetItem(id='defaultcity/defaultcity_000001_000019', subset = 'val', image=np.ones((1, 5, 3)), annotations=[ Mask(np.array([[1, 0, 0, 1, 1]]), label=3, attributes={'is_crowd': True}), Mask(np.array([[0, 1, 1, 0, 0]]), id=24, label=1, attributes={'is_crowd': False}), ] ), ], categories=Cityscapes.make_cityscapes_categories()) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'cityscapes') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_save_dataset_with_non_widerface_attributes(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0), Bbox(0, 1, 2, 3, label=0, attributes={ 'non-widerface attribute': '0', 'blur': 1, 'invalid': '1' }), Bbox(1, 1, 2, 2, label=0, attributes={'non-widerface attribute': '0'}), ]), ], categories=['face']) target_dataset = Dataset.from_iterable([ DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0), Bbox(0, 1, 2, 3, label=0, attributes={ 'blur': '1', 'invalid': '1' }), Bbox(1, 1, 2, 2, label=0), ]), ], categories=['face']) with TestDir() as test_dir: WiderFaceConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'wider_face') compare_datasets(self, target_dataset, parsed_dataset)
def test_can_remove_items_by_ids(self): expected = Dataset.from_iterable([ DatasetItem(id='1', subset='train') ]) dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train'), DatasetItem(id='2', subset='train') ]) actual = transforms.RemoveItems(dataset, ids=[('2', 'train')]) compare_datasets(self, expected, actual)
def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ DatasetItem('q/1', subset='train', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem('a/b/c/2', subset='valid', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3)))), ], categories=[]) with TestDir() as test_dir: YoloConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'yolo') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def test_can_remove_all_attrs_by_item_id(self): expected = Dataset.from_iterable([ DatasetItem(id='1', subset='val', annotations=[ Label(0) ]), DatasetItem(id='2', subset='val', attributes={'qq': 2}, annotations=[ Label(0, attributes={ 'x': 1, 'y': 2 }) ]), ], categories=['a']) actual = transforms.RemoveAttributes(self.source, ids=[('1', 'val')]) compare_datasets(self, expected, actual)
def test_can_import_v5(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='aa', subset='train', image=np.zeros((8, 6, 3))), DatasetItem(id='cc', subset='test', image=np.ones((10, 5, 3))), ], categories=[ '/m/0', '/m/1', ]) dataset = Dataset.import_from(DUMMY_DATASET_DIR_V5, 'open_images') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_can_import_labels(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='train', annotations=[ Label(1, id=1, group=1), Label(0, id=2, group=2), ], attributes={'id': 1}), ], categories=['a', 'b']) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'coco_labels'), 'coco') compare_datasets(self, expected_dataset, dataset)
def test_can_save_in_another_format(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 2, 3, label=2), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=2), Bbox(5, 6, 2, 3, label=3), Bbox(1, 2, 3, 3, label=1), ]), ], categories=['background', 'a', 'b', 'c']) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) result_dir = osp.join(test_dir, 'result') run(self, 'merge', '-o', result_dir, '-f', 'yolo', dataset2_url + ':voc', dataset1_url + ':coco', '--', '--save-images') compare_datasets(self, expected, Dataset.import_from(result_dir, 'yolo'), require_images=True)
def test_can_load_voc_det(self): class DstExtractor(TestExtractorBase): def __iter__(self): return iter([ DatasetItem( id='2007_000001', subset='train', annotations=[ Bbox( 1, 2, 2, 2, label=self._label('cat'), attributes={ 'pose': VOC.VocPose(1).name, 'truncated': True, 'difficult': False, 'occluded': False, }, id=1, group=1, ), Bbox( 4, 5, 2, 2, label=self._label('person'), attributes={ 'truncated': False, 'difficult': False, 'occluded': False, **{ a.name: a.value % 2 == 1 for a in VOC.VocAction } }, id=2, group=2, # TODO: Actions and group should be excluded # as soon as correct merge is implemented ), ]), ]) with TestDir() as test_dir: generate_dummy_voc(test_dir) parsed_train = VocDetectionExtractor( osp.join(test_dir, 'ImageSets', 'Main', 'train.txt')) compare_datasets(self, DstExtractor(), parsed_train)
def test_can_save_dataset_with_no_save_images(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='0001_c2s3_000001_00', subset='query', image=np.ones((2, 5, 3)), attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3, 'frame_id': 1, 'bbox_id': 0, 'query': True} ), ]) with TestDir() as test_dir: Market1501Converter.convert(source_dataset, test_dir, save_images=False) parsed_dataset = Dataset.import_from(test_dir, 'market1501') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_extract_voc(self): # TFDS is unable to generate fake examples for object detection # datasets. See <https://github.com/tensorflow/datasets/issues/3633>. tfds_example = { 'image/filename': 'test.png', 'image': encode_image(np.ones((20, 10)), '.png'), 'objects': { 'bbox': [[0.1, 0.2, 0.3, 0.4]], 'label': [5], 'is_difficult': [True], 'is_truncated': [False], 'pose': [0], } } with mock_tfds_data(example=tfds_example): tfds_info = tfds.builder('voc/2012').info pose_names = tfds_info.features['objects'].feature['pose'].names expected_dataset = Dataset.from_iterable( [ DatasetItem( id='test', subset='train', image=np.ones((20, 10)), annotations=[ Bbox(2, 2, 2, 4, label=5, attributes={ 'difficult': True, 'truncated': False, 'pose': pose_names[0].title(), }), ], ), ], categories=tfds_info.features['objects'].feature['label'].names ) extractor = make_tfds_extractor('voc/2012') actual_dataset = Dataset(extractor) compare_datasets(self, expected_dataset, actual_dataset, require_images=True)
def test_can_remove_annotations_in_dataset(self): expected = Dataset.from_iterable([ DatasetItem(id='1', subset='test'), DatasetItem(id='2', subset='test'), ], categories=['a', 'b']) dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='test', annotations=[Label(0)]), DatasetItem(id='2', subset='test', annotations=[Label(1)]), ], categories=['a', 'b']) actual = transforms.RemoveAnnotations(dataset) compare_datasets(self, expected, actual)
def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem( id='img0001', subset='train', image=np.ones((5, 5, 3)), attributes={'id': '0d3de147f'}, annotations=[ Bbox(5, 10, 10, 2, label=0, attributes={'id': 'BsO3zj9bn'}) ]), DatasetItem(id='img0002', subset='train', image=np.ones((5, 5, 3)), attributes={'id': 'b482849bc'}, annotations=[ Bbox(11.5, 12, 10.2, 20.5, label=0, attributes={'id': 'mosw0b97K'}), Bbox(11.5, 12, 10.2, 20.5, label=1, attributes={'id': 'mosw0b97K'}) ]), DatasetItem( id='img0003', subset='train', image=np.ones((5, 5, 3)), attributes={'id': '50fef05a8'}, annotations=[ Bbox(6.7, 10.3, 3.3, 4.7, attributes={'id': '35t9mf-Zr'}), Bbox(13.7, 20.2, 31.9, 43.4, label=2, attributes={'id': 'sO4m1DtTZ'}) ]) ], categories=[ 'animal', 'dog', 'person' ]) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'vott_json') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_can_save_and_load_with_no_save_images(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', annotations=[Label(0)]), ], categories=['label_0']) with TestDir() as test_dir: ImagenetTxtConverter.convert(source_dataset, test_dir, save_images=False) parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt') compare_datasets(self, source_dataset, parsed_dataset)
def test_can_save_and_load_empty_image(self): dataset = Dataset.from_iterable( [DatasetItem(id='a', annotations=[Label(0)]), DatasetItem(id='b')], categories=['label_0']) with TestDir() as test_dir: CifarConverter.convert(dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'cifar') compare_datasets(self, dataset, parsed_dataset, require_images=True)
def test_remap_labels_delete_unspecified(self): source_dataset = Dataset.from_iterable( [DatasetItem(id=1, annotations=[Label(0)])], categories=['label0']) target_dataset = Dataset.from_iterable([ DatasetItem(id=1), ], categories=[]) actual = transforms.RemapLabels(source_dataset, mapping={}, default='delete') compare_datasets(self, target_dataset, actual)
def test_can_save_and_load_image_with_custom_extension(self): expected = Dataset.from_iterable([ DatasetItem(id='a/3', image=Image(path='a/3.qq', data=np.zeros((3, 4, 3)))), ]) with TestDir() as test_dir: image_path = osp.join(test_dir, 'a', '3.jpg') save_image(image_path, expected.get('a/3').image.data, create_dir=True) os.rename(image_path, osp.join(test_dir, 'a', '3.qq')) actual = Dataset.import_from(test_dir, 'image_dir', exts='qq') compare_datasets(self, expected, actual, require_images=True)
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem( id='name0_0001', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(0, attributes={'positive_pairs': ['name0/name0_0002']}) ]), DatasetItem(id='name0_0002', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(0, attributes={ 'positive_pairs': ['name0/name0_0001'], 'negative_pairs': ['name1/name1_0001'] }) ]), DatasetItem( id='name1_0001', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(1, attributes={'positive_pairs': ['name1/name1_0002']}) ]), DatasetItem(id='name1_0002', subset='test', image=np.ones((2, 5, 3)), annotations=[ Label(1, attributes={ 'positive_pairs': ['name1/name1_0002'], 'negative_pairs': ['name0/name0_0001'] }) ]), ], categories=['name0', 'name1']) with TestDir() as test_dir: LfwConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'lfw') compare_datasets(self, source_dataset, parsed_dataset, require_images=True)
def test_can_merge_categories(self): source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), AnnotationType.points: PointsCategories.from_iterable([ (0, ['l0', 'l1']), (1, ['l2', 'l3']), ]), AnnotationType.mask: MaskCategories({ 0: (0, 1, 2), 1: (1, 2, 3), }), }) source1 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['c', 'b']), AnnotationType.points: PointsCategories.from_iterable([ (0, []), (1, ['l2', 'l3']), ]), AnnotationType.mask: MaskCategories({ 0: (0, 2, 4), 1: (1, 2, 3), }), }) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), Label(2), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['a', 'b', 'c']), AnnotationType.points: PointsCategories.from_iterable([ (0, ['l0', 'l1']), (1, ['l2', 'l3']), (2, []), ]), AnnotationType.mask: MaskCategories({ 0: (0, 1, 2), 1: (1, 2, 3), 2: (0, 2, 4), }), }) merger = IntersectMerge() merged = merger([source0, source1]) compare_datasets(self, expected, merged, ignored_attrs={'score'})
def test_inplace_save_writes_only_updated_data(self): expected = Dataset.from_iterable([ DatasetItem(1, subset='a', image=np.ones((2, 1, 3)), annotations=[Label(0)]), DatasetItem(2, subset='a', image=np.ones((3, 2, 3)), annotations=[Label(1)]), DatasetItem(2, subset='b', image=np.ones((2, 2, 3)), annotations=[Label(1)]), ], categories=['a', 'b', 'c', 'd']) dataset = Dataset.from_iterable([ DatasetItem(1, subset='a', image=np.ones((2, 1, 3)), annotations=[Label(0)]), DatasetItem(2, subset='b', image=np.ones((2, 2, 3)), annotations=[Label(1)]), DatasetItem(3, subset='c', image=np.ones((2, 3, 3)), annotations=[Label(2)]), ], categories=['a', 'b', 'c', 'd']) with TestDir() as path: dataset.export(path, 'cifar', save_images=True) dataset.put( DatasetItem(2, subset='a', image=np.ones((3, 2, 3)), annotations=[Label(1)])) dataset.remove(3, 'c') dataset.save(save_images=True) self.assertEqual({'a', 'b', 'batches.meta'}, set(os.listdir(path))) compare_datasets(self, expected, Dataset.import_from(path, 'cifar'), require_images=True)
def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): source_dataset = Dataset.from_iterable([ DatasetItem(id="label_0/кириллица с пробелом", image=np.ones((8, 8, 3)), annotations=[Label(0)] ), ], categories=['label_0']) with TestDir() as test_dir: ImagenetConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'imagenet') compare_datasets(self, source_dataset, parsed_dataset, require_images=True)