def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id=1, image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': False, }), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \ .make_dataset() compare_datasets(self, expected_dataset, dataset)
def test_can_import(self): target_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2), Bbox(0, 4, 4, 4, label=3), Bbox(2, 4, 4, 4), ], attributes={'source_id': '1'}), DatasetItem(id=2, subset='val', image=np.ones((8, 8, 3)), annotations=[ Bbox(1, 2, 4, 2, label=3), ], attributes={'source_id': '2'}), DatasetItem(id=3, subset='test', image=np.ones((5, 4, 3)) * 3, attributes={'source_id': '3'}), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \ .make_dataset() compare_datasets(self, target_dataset, dataset)
def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='000000000001', image=np.ones((10, 5, 3)), subset='val', attributes={'id': 1}, annotations=[ Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, id=1, group=1, attributes={'is_crowd': False}), Mask(np.array([[1, 0, 0, 1, 0]] * 5 + [[1, 1, 1, 1, 0]] * 5), label=0, id=2, group=2, attributes={'is_crowd': True}), ]), ], categories=[ 'TEST', ]) dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \ .make_dataset() compare_datasets(self, expected_dataset, dataset)
def test_can_save_and_load(self): with TestDir() as test_dir: source_dataset = self.TestExtractor() converter = DatumaroConverter(save_images=True) converter(source_dataset, test_dir) project = Project.import_from(test_dir, 'datumaro') parsed_dataset = project.make_dataset() self.assertListEqual( sorted(source_dataset.subsets()), sorted(parsed_dataset.subsets()), ) self.assertEqual(len(source_dataset), len(parsed_dataset)) for subset_name in source_dataset.subsets(): source_subset = source_dataset.get_subset(subset_name) parsed_subset = parsed_dataset.get_subset(subset_name) self.assertEqual(len(source_subset), len(parsed_subset)) for idx, (item_a, item_b) in enumerate( zip(source_subset, parsed_subset)): self.assertEqual(item_a, item_b, '%s:\n%s\nvs.\n%s\n' % \ (idx, item_to_str(item_a), item_to_str(item_b))) self.assertEqual(source_dataset.categories(), parsed_dataset.categories())
def test_can_import(self): class DstExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ]) def categories(self): label_categories = LabelCategories() for i in range(10): label_categories.add('label_' + str(i)) return { AnnotationType.label: label_categories, } dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \ .make_dataset() compare_datasets(self, DstExtractor(), dataset)
def test_can_import(self): class DstExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': False, }), ]), ]) def categories(self): label_cat = LabelCategories() for label in range(10): label_cat.add('label_' + str(label)) return { AnnotationType.label: label_cat, } dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \ .make_dataset() compare_datasets(self, DstExtractor(), dataset)
def test_can_import(self): class DstExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, image=np.ones((10, 5, 3)), subset='val', annotations=[ Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, id=1, group=1, attributes={'is_crowd': False}), Mask(np.array([[1, 0, 0, 1, 0]] * 5 + [[1, 1, 1, 1, 0]] * 5), label=0, id=2, group=2, attributes={'is_crowd': True}), ]), ]) def categories(self): label_cat = LabelCategories() label_cat.add('TEST') return {AnnotationType.label: label_cat} with TestDir() as test_dir: self.COCO_dataset_generate(test_dir) dataset = Project.import_from(test_dir, 'coco').make_dataset() compare_datasets(self, DstExtractor(), dataset)
def test_can_save_and_load(self): with TestDir() as test_dir: source_dataset = self.TestExtractor() converter = DatumaroConverter(save_images=True, apply_colormap=True) converter(source_dataset, test_dir.path) project = Project.import_from(test_dir.path, 'datumaro') parsed_dataset = project.make_dataset() self.assertListEqual( sorted(source_dataset.subsets()), sorted(parsed_dataset.subsets()), ) self.assertEqual(len(source_dataset), len(parsed_dataset)) for subset_name in source_dataset.subsets(): source_subset = source_dataset.get_subset(subset_name) parsed_subset = parsed_dataset.get_subset(subset_name) for idx, (item_a, item_b) in enumerate( zip_longest(source_subset, parsed_subset)): self.assertEqual(item_a, item_b, str(idx)) self.assertEqual(source_dataset.categories(), parsed_dataset.categories())
def _test_save_and_load(self, source_dataset, converter_type, test_dir, importer_params=None): converter = converter_type() converter(source_dataset, test_dir.path) if not importer_params: importer_params = {} project = Project.import_from(test_dir.path, 'ms_coco', **importer_params) parsed_dataset = project.make_dataset() source_subsets = [ s if s else DEFAULT_SUBSET_NAME for s in source_dataset.subsets() ] self.assertListEqual( sorted(source_subsets), sorted(parsed_dataset.subsets()), ) self.assertEqual(len(source_dataset), len(parsed_dataset)) for item_a in source_dataset: item_b = find(parsed_dataset, lambda x: x.id == item_a.id) self.assertFalse(item_b is None) self.assertEqual(len(item_a.annotations), len(item_b.annotations)) for ann_a in item_a.annotations: ann_b = find(item_b.annotations, lambda x: \ x.id == ann_a.id and \ x.type == ann_a.type and x.group == ann_a.group) self.assertEqual(ann_a, ann_b, 'id: ' + str(ann_a.id))
def test_can_import(self): with TestDir() as temp_dir: self.COCO_dataset_generate(temp_dir.path) project = Project.import_from(temp_dir.path, 'ms_coco') dataset = project.make_dataset() self.assertListEqual(['val'], sorted(dataset.subsets())) self.assertEqual(1, len(dataset)) item = next(iter(dataset)) self.assertTrue(item.has_image) self.assertEqual(np.sum(item.image), np.prod(item.image.shape)) self.assertEqual(4, len(item.annotations)) ann_1 = find(item.annotations, lambda x: x.id == 1) ann_1_poly = find(item.annotations, lambda x: \ x.group == ann_1.id and x.type == AnnotationType.polygon) self.assertFalse(ann_1 is None) self.assertFalse(ann_1_poly is None) ann_2 = find(item.annotations, lambda x: x.id == 2) ann_2_mask = find(item.annotations, lambda x: \ x.group == ann_2.id and x.type == AnnotationType.mask) self.assertFalse(ann_2 is None) self.assertFalse(ann_2_mask is None)
def test_can_load(self): with TestDir() as test_dir: source_dataset = self.TestExtractor() ImageDirConverter()(source_dataset, save_dir=test_dir) project = Project.import_from(test_dir, 'image_dir') parsed_dataset = project.make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def test_can_import(self): with TestDir() as test_dir: subsets = generate_dummy_voc(test_dir) dataset = Project.import_from(test_dir, 'voc').make_dataset() self.assertEqual(len(VOC.VocTask), len(dataset.sources)) self.assertEqual(set(subsets), set(dataset.subsets())) self.assertEqual( sum([len(s) for _, s in subsets.items()]), len(dataset))
def test_can_import_with_custom_importer(self): class TestImporter: def __call__(self, path, subset=None): return Project({'project_filename': path, 'subsets': [subset]}) path = 'path' importer_name = 'test_importer' env = Environment() env.importers.register(importer_name, TestImporter) project = Project.import_from(path, importer_name, env, subset='train') self.assertEqual(path, project.config.project_filename) self.assertListEqual(['train'], project.config.subsets)
def test_can_import(self): class DstExtractor(TestExtractorBase): def __iter__(self): return iter([ DatasetItem(id='2007_000001', subset='train', image=Image(path='2007_000001.jpg', size=(20, 10)), annotations=[ Label(self._label(l.name)) for l in VOC.VocLabel if l.value % 2 == 1 ] + [ Bbox(1, 2, 2, 2, label=self._label('cat'), attributes={ 'pose': VOC.VocPose(1).name, 'truncated': True, 'difficult': False, 'occluded': False, }, id=1, group=1, ), Bbox(4, 5, 2, 2, label=self._label('person'), attributes={ 'truncated': False, 'difficult': False, 'occluded': False, **{ a.name: a.value % 2 == 1 for a in VOC.VocAction } }, id=2, group=2, ), Bbox(5.5, 6, 2, 2, label=self._label( VOC.VocBodyPart(1).name), group=2 ), Mask(image=np.ones([5, 10]), label=self._label(VOC.VocLabel(2).name), group=1, ), ] ), DatasetItem(id='2007_000002', subset='test', image=np.zeros((20, 10, 3))), ]) dataset = Project.import_from(DUMMY_DATASET_DIR, 'voc').make_dataset() compare_datasets(self, DstExtractor(), dataset)
def test_can_load(self): class TestExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, image=np.ones((10, 6, 3))), DatasetItem(id=2, image=np.ones((5, 4, 3))), ]) with TestDir() as test_dir: source_dataset = TestExtractor() ImageDirConverter.convert(source_dataset, save_dir=test_dir) project = Project.import_from(test_dir, 'image_dir') parsed_dataset = project.make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((8, 8, 3)), annotations=[Label(0), Label(1)] ), DatasetItem(id='2', image=np.ones((10, 10, 3)), annotations=[Label(0)] ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( 'label_' + str(label) for label in range(2)), }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'imagenet').make_dataset() compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_relative_paths(self): class TestExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id='1', image=np.ones((4, 2, 3))), DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), ]) with TestDir() as test_dir: source_dataset = TestExtractor() ImageDirConverter.convert(source_dataset, save_dir=test_dir) project = Project.import_from(test_dir, 'image_dir') parsed_dataset = project.make_dataset() compare_datasets(self, source_dataset, parsed_dataset)
def _test_save_and_load(self, source_dataset, converter, test_dir, target_dataset=None, importer_args=None): converter(source_dataset, test_dir) if importer_args is None: importer_args = {} parsed_dataset = Project.import_from(test_dir, 'datumaro', **importer_args).make_dataset() if target_dataset is None: target_dataset = source_dataset compare_datasets_strict(self, expected=target_dataset, actual=parsed_dataset)
def test_can_detect_and_import(self): env = Environment() env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]} env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]} source_dataset = Dataset.from_iterable([ DatasetItem(id=1, annotations=[Label(2)]), ], categories=['a', 'b', 'c']) with TestDir() as test_dir: source_dataset.save(test_dir) project = Project.import_from(test_dir, env=env) imported_dataset = project.make_dataset() self.assertEqual( next(iter(project.config.sources.values())).format, DEFAULT_FORMAT) compare_datasets(self, source_dataset, imported_dataset)
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0, 2, 4, 2, label=2), Bbox(3, 3, 2, 3, label=4), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(i) for i in range(10)), }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \ .make_dataset() compare_datasets(self, expected_dataset, dataset)
def test_save_and_load(test, source_dataset, converter, test_dir, importer, target_dataset=None, importer_args=None, compare=None): converter(source_dataset, test_dir) if importer_args is None: importer_args = {} parsed_dataset = Project.import_from(test_dir, importer, **importer_args) \ .make_dataset() if target_dataset is None: target_dataset = source_dataset if not compare: compare = compare_datasets compare(test, expected=target_dataset, actual=parsed_dataset)
def test_can_import(self): target = Dataset.from_iterable([ DatasetItem(id=1, subset='train', image=np.ones((5, 1)), annotations=[ Mask(np.array([[0, 0, 0, 1, 0]]), label=3, attributes={'track_id': 1}), Mask(np.array([[0, 0, 1, 0, 0]]), label=2, attributes={'track_id': 2}), Mask(np.array([[1, 1, 0, 0, 0]]), label=3, attributes={'track_id': 3}), ]), DatasetItem(id=2, subset='train', image=np.ones((5, 1)), annotations=[ Mask(np.array([[1, 0, 0, 0, 0]]), label=3, attributes={'track_id': 2}), ]), DatasetItem(id=3, subset='val', image=np.ones((5, 1)), annotations=[ Mask(np.array([[0, 1, 0, 0, 0]]), label=0, attributes={'track_id': 1}), ]), ], categories=['a', 'b', 'c', 'd']) parsed = Project.import_from(DUMMY_DATASET_DIR, 'mots').make_dataset() compare_datasets(self, expected=target, actual=parsed)
def test_can_import(self): class DstExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2), Bbox(0, 4, 4, 4, label=3), Bbox(2, 4, 4, 4), ], attributes={'source_id': '1'}), DatasetItem(id=2, subset='val', image=np.ones((8, 8, 3)), annotations=[ Bbox(1, 2, 4, 2, label=3), ], attributes={'source_id': '2'}), DatasetItem(id=3, subset='test', image=np.ones((5, 4, 3)) * 3, attributes={'source_id': '3'}), ]) def categories(self): label_cat = LabelCategories() for label in range(10): label_cat.add('label_' + str(label)) return { AnnotationType.label: label_cat, } dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \ .make_dataset() compare_datasets(self, DstExtractor(), dataset)
def import_command(args): project_dir = osp.abspath(args.dst_dir) project_path = make_project_path(project_dir) if not args.overwrite and osp.isfile(project_path): log.error("Project file '%s' already exists" % (project_path)) return 1 project_name = args.name if project_name is None: project_name = osp.basename(project_dir) log.info("Importing project from '%s' as '%s'" % \ (args.source_path, args.format)) source_path = osp.abspath(args.source_path) project = Project.import_from(source_path, args.format) project.config.project_name = project_name project.config.project_dir = project_dir project = project.make_dataset() project.save(merge=True, save_images=False) log.info("Project has been created at '%s'" % (project_dir)) return 0
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem(id='1', subset='train', image=np.zeros((8, 6, 3)), annotations=[Label(0)]), DatasetItem(id='2', subset='train', image=np.zeros((2, 8, 3)), annotations=[Label(5)]), DatasetItem(id='3', subset='train', annotations=[Label(3)]), DatasetItem(id='4', subset='train', annotations=[Label(5)]), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_%s' % label for label in range(10)), }) dataset = Project.import_from(DUMMY_DATASET_DIR, 'imagenet_txt') \ .make_dataset() compare_datasets(self, expected_dataset, dataset, require_images=True)
def convert_to_datumaro(dataset_path, args): """datum project import -f {dataset_format} -i {dataset_path} -o {project_dir}""" dataset_format = detect_format(dataset_path, args) if dataset_format == "datumaro": return dataset_path tmp_project_dir = project_path(dataset_path, ".datumaro") print(f"Converting {dataset_path} to datumaro format") project = Project.import_from(dataset_path, dataset_format) project.config.project_name = dataset_path.name project.config.project_dir = str(tmp_project_dir) print("Checking dataset...") dataset = project.make_dataset() # check dataset # if dataset_format in ['tf_detection_api',...]: print("Cloning data...") dataset.save(merge=True, save_images=True) # else: # project.save() return tmp_project_dir
def test_can_import(self): class DstExtractor(Extractor): def __iter__(self): img1 = np.ones((77, 102, 3)) * 255 img1[6:32, 7:41] = 0 mask1 = np.zeros((77, 102), dtype=int) mask1[67:69, 58:63] = 1 mask2 = np.zeros((77, 102), dtype=int) mask2[13:25, 54:71] = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ] return iter([ DatasetItem(id='img1', image=img1, annotations=[ Polygon([43, 34, 45, 34, 45, 37, 43, 37], label=0, id=0, attributes={ 'occluded': False, 'username': '******' } ), Mask(mask1, label=1, id=1, attributes={ 'occluded': False, 'username': '******' } ), Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], label=2, group=2, id=2, attributes={ 'a1': True, 'occluded': True, 'username': '******' } ), Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], label=3, group=2, id=3, attributes={ 'kj': True, 'occluded': False, 'username': '******' } ), Bbox(13, 19, 10, 11, label=4, group=2, id=4, attributes={ 'hg': True, 'occluded': True, 'username': '******' } ), Mask(mask2, label=5, group=1, id=5, attributes={ 'd': True, 'occluded': False, 'username': '******' } ), Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], label=6, group=1, id=6, attributes={ 'gfd lkj lkj hi': True, 'occluded': False, 'username': '******' } ), ] ), ]) def categories(self): label_cat = LabelCategories() label_cat.add('window') label_cat.add('license plate') label_cat.add('o1') label_cat.add('q1') label_cat.add('b1') label_cat.add('m1') label_cat.add('hg') return { AnnotationType.label: label_cat, } parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \ .make_dataset() compare_datasets(self, expected=DstExtractor(), actual=parsed)
def import_command(args): project_dir = osp.abspath(args.dst_dir) project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir) if osp.isdir(project_env_dir) and os.listdir(project_env_dir): if not args.overwrite: raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % project_env_dir) else: shutil.rmtree(project_env_dir, ignore_errors=True) own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir) if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir): if not args.overwrite: raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % own_dataset_dir) else: # NOTE: remove the dir to avoid using data from previous project shutil.rmtree(own_dataset_dir) project_name = args.name if project_name is None: project_name = osp.basename(project_dir) env = Environment() log.info("Importing project from '%s'" % args.source) extra_args = {} fmt = args.format if not args.format: if args.extra_args: raise CliException("Extra args can not be used without format") log.info("Trying to detect dataset format...") matches = env.detect_dataset(args.source) if len(matches) == 0: log.error("Failed to detect dataset format. " "Try to specify format with '-f/--format' parameter.") return 1 elif len(matches) != 1: log.error("Multiple formats match the dataset: %s. " "Try to specify format with '-f/--format' parameter.", ', '.join(matches)) return 1 fmt = matches[0] elif args.extra_args: if fmt in env.importers: arg_parser = env.importers[fmt] elif fmt in env.extractors: arg_parser = env.extractors[fmt] else: raise CliException("Unknown format '%s'. A format can be added" "by providing an Extractor and Importer plugins" % fmt) if hasattr(arg_parser, 'parse_cmdline'): extra_args = arg_parser.parse_cmdline(args.extra_args) else: raise CliException("Format '%s' does not accept " "extra parameters" % fmt) log.info("Importing project as '%s'" % fmt) project = Project.import_from(osp.abspath(args.source), fmt, **extra_args) project.config.project_name = project_name project.config.project_dir = project_dir if not args.skip_check or args.copy: log.info("Checking the dataset...") dataset = project.make_dataset() if args.copy: log.info("Cloning data...") dataset.save(merge=True, save_images=True) else: project.save() log.info("Project has been created at '%s'" % project_dir) return 0
def test_can_import(self): img1 = np.ones((77, 102, 3)) * 255 img1[6:32, 7:41] = 0 mask1 = np.zeros((77, 102), dtype=int) mask1[67:69, 58:63] = 1 mask2 = np.zeros((77, 102), dtype=int) mask2[13:25, 54:71] = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ] target_dataset = Dataset.from_iterable( [ DatasetItem( id='example_folder/img1', image=img1, annotations=[ Polygon([43, 34, 45, 34, 45, 37, 43, 37], label=0, id=0, attributes={ 'occluded': False, 'username': '******' }), Mask(mask1, label=1, id=1, attributes={ 'occluded': False, 'username': '******' }), Polygon([ 30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12 ], label=2, group=2, id=2, attributes={ 'a1': True, 'occluded': True, 'username': '******' }), Polygon( [35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], label=3, group=2, id=3, attributes={ 'kj': True, 'occluded': False, 'username': '******' }), Bbox(13, 19, 10, 11, label=4, group=2, id=4, attributes={ 'hg': True, 'occluded': True, 'username': '******' }), Mask(mask2, label=5, group=1, id=5, attributes={ 'd': True, 'occluded': False, 'username': '******' }), Polygon( [64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], label=6, group=1, id=6, attributes={ 'gfd lkj lkj hi': True, 'occluded': False, 'username': '******' }), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable([ 'window', 'license plate', 'o1', 'q1', 'b1', 'm1', 'hg', ]), }) parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \ .make_dataset() compare_datasets(self, expected=target_dataset, actual=parsed)