def test_transform_fails_on_inplace_update_of_stage(self): with TestDir() as test_dir: dataset_url = osp.join(test_dir, 'dataset') dataset = Dataset.from_iterable([ DatasetItem(id=1, annotations=[Bbox(1, 2, 3, 4, label=1)]), ], categories=['a', 'b']) dataset.export(dataset_url, 'coco', save_images=True) project_dir = osp.join(test_dir, 'proj') with Project.init(project_dir) as project: project.import_source('source-1', dataset_url, 'coco', no_cache=True) project.commit('first commit') with self.subTest('without overwrite'): run(self, 'transform', '-p', project_dir, '-t', 'random_split', 'HEAD:source-1', expected_code=1) with self.subTest('with overwrite'): with self.assertRaises(ReadonlyDatasetError): run(self, 'transform', '-p', project_dir, '--overwrite', '-t', 'random_split', 'HEAD:source-1')
def test_ambiguous_format(self): test_dir = scope_add(TestDir()) dataset_url = osp.join(test_dir, 'source') # create an ambiguous dataset by merging annotations from # datasets in different formats annotation_dir = osp.join(dataset_url, 'training/street') assets_dir = osp.join(osp.dirname(__file__), '../assets') os.makedirs(annotation_dir) for asset in [ 'ade20k2017_dataset/dataset/training/street/1_atr.txt', 'ade20k2020_dataset/dataset/training/street/1.json', ]: shutil.copy(osp.join(assets_dir, asset), annotation_dir) with self.subTest("no context"): with self.assertRaises(WrongRevpathError) as cm: parse_full_revpath(dataset_url) self.assertEqual({ProjectNotFoundError, MultipleFormatsMatchError}, set(type(e) for e in cm.exception.problems)) proj_dir = osp.join(test_dir, 'proj') proj = scope_add(Project.init(proj_dir)) with self.subTest("in context"): with self.assertRaises(WrongRevpathError) as cm: parse_full_revpath(dataset_url, proj) self.assertEqual({UnknownTargetError, MultipleFormatsMatchError}, set(type(e) for e in cm.exception.problems))
def test_can_release_resources_on_checkout(self, fxt_sample_video): test_dir = scope_add(TestDir()) project = scope_add(Project.init(test_dir)) src_url = osp.join(test_dir, 'src') src = Dataset.from_iterable([ DatasetItem(1), ], categories=['a']) src.save(src_url) project.add_source(src_url, 'datumaro') project.commit('commit 1') project.remove_source('src', keep_data=False) project.import_source('src', osp.dirname(fxt_sample_video), 'video_frames', rpath=osp.basename(fxt_sample_video)) project.commit('commit 2') assert len(project.working_tree.make_dataset()) == 4 assert osp.isdir(osp.join(test_dir, 'src')) project.checkout('HEAD~1') assert len(project.working_tree.make_dataset()) == 1
def create_command(args): project_dir = osp.abspath(args.dst_dir) existing_project_dir = Project.find_project_dir(project_dir) if existing_project_dir and os.listdir(existing_project_dir): if args.overwrite: rmtree(existing_project_dir) else: raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % existing_project_dir) log.info("Creating project at '%s'" % project_dir) Project.init(project_dir) log.info("Project has been created at '%s'" % project_dir) return 0
def test_can_release_resources_on_exit(self, fxt_sample_video): with Scope() as scope: test_dir = scope.add(TestDir()) project = scope.add(Project.init(test_dir)) project.import_source('src', osp.dirname(fxt_sample_video), 'video_frames', rpath=osp.basename(fxt_sample_video)) assert len(project.working_tree.make_dataset()) == 4 assert not osp.exists(test_dir)
def test_can_release_resources_on_remove(self, fxt_sample_video): test_dir = scope_add(TestDir()) project = scope_add(Project.init(test_dir)) project.import_source('src', osp.dirname(fxt_sample_video), 'video_frames', rpath=osp.basename(fxt_sample_video)) project.commit('commit 1') assert len(project.working_tree.make_dataset()) == 4 assert osp.isdir(osp.join(test_dir, 'src')) project.remove_source('src', keep_data=False) assert not osp.exists(osp.join(test_dir, 'src'))
def test_can_parse(self): test_dir = scope_add(TestDir()) dataset_url = osp.join(test_dir, 'source') Dataset.from_iterable([DatasetItem(1)]).save(dataset_url) proj_dir = osp.join(test_dir, 'proj') proj = scope_add(Project.init(proj_dir)) proj.import_source('source-1', dataset_url, format=DEFAULT_FORMAT) ref = proj.commit("second commit", allow_empty=True) with self.subTest("project"): dataset, project = parse_full_revpath(proj_dir) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("project ref"): dataset, project = parse_full_revpath(f"{proj_dir}@{ref}") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("project ref source"): dataset, project = parse_full_revpath(f"{proj_dir}@{ref}:source-1") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("project ref source stage"): dataset, project = parse_full_revpath( f"{proj_dir}@{ref}:source-1.root") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("ref"): dataset, project = parse_full_revpath(ref, proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("ref source"): dataset, project = parse_full_revpath(f"{ref}:source-1", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("ref source stage"): dataset, project = parse_full_revpath(f"{ref}:source-1.root", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("source"): dataset, project = parse_full_revpath("source-1", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("source stage"): dataset, project = parse_full_revpath("source-1.root", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset (in context)"): dataset, project = parse_full_revpath(dataset_url, proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset format (in context)"): dataset, project = parse_full_revpath(f"{dataset_url}:datumaro", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset (no context)"): dataset, project = parse_full_revpath(dataset_url) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset format (no context)"): dataset, project = parse_full_revpath(f"{dataset_url}:datumaro") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project)
def test_can_run_self_merge(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 2, 3, label=2), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=2, id=1, group=1, attributes={ 'score': 0.5, 'occluded': False, 'difficult': False, 'truncated': False }), Bbox(5, 6, 2, 3, label=3, id=2, group=2, attributes={ 'score': 0.5, 'occluded': False, 'difficult': False, 'truncated': False }), Bbox(1, 2, 3, 3, label=1, id=1, group=1, attributes={ 'score': 0.5, 'is_crowd': False }), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['background', 'a', 'b', 'c']), AnnotationType.mask: MaskCategories(VOC.generate_colormap(4)) }) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) proj_dir = osp.join(test_dir, 'proj') with Project.init(proj_dir) as project: project.import_source('source', dataset2_url, 'voc') result_dir = osp.join(test_dir, 'result') run(self, 'merge', '-o', result_dir, '-p', proj_dir, dataset1_url + ':coco') compare_datasets(self, expected, Dataset.load(result_dir), require_images=True)