コード例 #1
0
ファイル: test_project.py プロジェクト: Eric2370/datumaro-1
    def test_add_source(self):
        source_name = 'source'
        origin = Source({'url': 'path', 'format': 'ext'})
        project = Project()

        project.add_source(source_name, origin)

        added = project.get_source(source_name)
        self.assertIsNotNone(added)
        self.assertEqual(added, origin)
コード例 #2
0
ファイル: test_project.py プロジェクト: zeeshy30/cvat-1
    def test_can_have_project_source(self):
        with TestDir() as test_dir:
            Project.generate(test_dir)

            project2 = Project()
            project2.add_source('project1', {
                'url': test_dir,
            })
            dataset = project2.make_dataset()

            self.assertTrue('project1' in dataset.sources)
コード例 #3
0
ファイル: test_project.py プロジェクト: zeeshy30/cvat-1
    def test_added_source_can_be_saved(self):
        source_name = 'source'
        origin = Source({
            'url': 'path',
        })
        project = Project()
        project.add_source(source_name, origin)

        saved = project.config

        self.assertEqual(origin, saved.sources[source_name])
コード例 #4
0
ファイル: test_project.py プロジェクト: Eric2370/datumaro-1
    def test_can_do_transform_with_custom_model(self):
        class TestExtractorSrc(Extractor):
            def __iter__(self):
                for i in range(2):
                    yield DatasetItem(id=i,
                                      image=np.ones([2, 2, 3]) * i,
                                      annotations=[Label(i)])

            def categories(self):
                label_cat = LabelCategories()
                label_cat.add('0')
                label_cat.add('1')
                return {AnnotationType.label: label_cat}

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for inp in inputs:
                    yield [Label(inp[0, 0, 0])]

        class TestExtractorDst(Extractor):
            def __init__(self, url):
                super().__init__()
                self.items = [
                    osp.join(url, p) for p in sorted(os.listdir(url))
                ]

            def __iter__(self):
                for path in self.items:
                    with open(path, 'r') as f:
                        index = osp.splitext(osp.basename(path))[0]
                        label = int(f.readline().strip())
                        yield DatasetItem(id=index, annotations=[Label(label)])

        model_name = 'model'
        launcher_name = 'custom_launcher'
        extractor_name = 'custom_extractor'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.env.extractors.register(extractor_name, TestExtractorSrc)
        project.add_model(model_name, {'launcher': launcher_name})
        project.add_source('source', {'format': extractor_name})

        with TestDir() as test_dir:
            project.make_dataset().apply_model(model=model_name,
                                               save_dir=test_dir)

            result = Project.load(test_dir)
            result.env.extractors.register(extractor_name, TestExtractorDst)
            it = iter(result.make_dataset())
            item1 = next(it)
            item2 = next(it)
            self.assertEqual(0, item1.annotations[0].label)
            self.assertEqual(1, item2.annotations[0].label)
コード例 #5
0
ファイル: extractor.py プロジェクト: shivam124081/datumaro
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subsets = self.find_subsets(path)
        if len(subsets) == 0:
            raise Exception("Failed to find dataset at '%s'" % path)

        for desc in subsets:
            source_name = osp.splitext(osp.basename(desc['url']))[0]
            project.add_source(source_name, desc)

        return project
コード例 #6
0
ファイル: test_project.py プロジェクト: zeeshy30/cvat-1
    def test_project_filter_can_be_applied(self):
        class TestExtractor(Extractor):
            def __iter__(self):
                for i in range(10):
                    yield DatasetItem(id=i, subset='train')

        e_type = 'type'
        project = Project()
        project.env.extractors.register(e_type, TestExtractor)
        project.add_source('source', {'format': e_type})

        dataset = project.make_dataset().extract('/item[id < 5]')

        self.assertEqual(5, len(dataset))
コード例 #7
0
ファイル: test_project.py プロジェクト: zeeshy30/cvat-1
    def test_added_source_can_be_dumped(self):
        source_name = 'source'
        origin = Source({
            'url': 'path',
        })
        project = Project()
        project.add_source(source_name, origin)

        with TestDir() as test_dir:
            project.save(test_dir)

            loaded = Project.load(test_dir)
            loaded = loaded.get_source(source_name)
            self.assertEqual(origin, loaded)
コード例 #8
0
    def __call__(self, path):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if not osp.exists(path):
            raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)

        source_name = osp.splitext(osp.basename(path))[0]
        project.add_source(source_name, {
            'url': path,
            'format': self.EXTRACTOR_NAME,
        })

        return project
コード例 #9
0
ファイル: image_dir.py プロジェクト: zz202/cvat
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        if not osp.isdir(path):
            raise Exception("Can't find a directory at '%s'" % path)

        source_name = osp.basename(osp.normpath(path))
        project.add_source(source_name, {
            'url': source_name,
            'format': self.EXTRACTOR_NAME,
            'options': dict(extra_params),
        })

        return project
コード例 #10
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        sources = self.find_sources(osp.normpath(path))
        if len(sources) == 0:
            raise Exception("Failed to find dataset at '%s'" % path)

        for desc in sources:
            params = dict(extra_params)
            params.update(desc.get('options', {}))
            desc['options'] = params

            source_name = osp.splitext(osp.basename(desc['url']))[0]
            project.add_source(source_name, desc)

        return project
コード例 #11
0
def check_data(json_path):
    # create Datumaro project
    project = Project()

    # add source
    project.add_source('src1', {
        'url': str(json_path),
        'format': 'coco_instances'
    })

    # create a dataset
    dataset = project.make_dataset()
    print(f'{json_path.stem}')

    print(f'num images: {num_img(dataset)}')
    print(f'num images with annotations: {num_img_with_annots(dataset)}')
    print(f'num annotations: {num_annots(dataset)}')
コード例 #12
0
ファイル: voc.py プロジェクト: benhoff/cvat
    def __call__(self, path):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        for task, extractor_type, task_dir in self._TASKS:
            task_dir = osp.join(path, VocPath.SUBSETS_DIR, task_dir)
            if not osp.isdir(task_dir):
                continue

            project.add_source(task.name, {
                'url': path,
                'format': extractor_type,
            })

        if len(project.config.sources) == 0:
            raise Exception("Failed to find 'voc' dataset at '%s'" % path)

        return project
コード例 #13
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subset_paths = self.find_subsets(path)
        if len(subset_paths) == 0:
            raise Exception("Failed to find 'voc' dataset at '%s'" % path)

        for task, extractor_type, subset_path in subset_paths:
            project.add_source(
                '%s-%s' %
                (task.name, osp.splitext(osp.basename(subset_path))[0]), {
                    'url': subset_path,
                    'format': extractor_type,
                    'options': dict(extra_params),
                })

        return project
コード例 #14
0
ファイル: mot_format.py プロジェクト: zeeshy30/cvat-1
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        subsets = self.find_subsets(path)
        if len(subsets) == 0:
            raise Exception("Failed to find 'mot' dataset at '%s'" % path)

        for ann_file in subsets:
            log.info("Found a dataset at '%s'" % ann_file)

            source_name = osp.splitext(osp.basename(ann_file))[0]
            project.add_source(source_name, {
                'url': ann_file,
                'format': self._EXTRACTOR_NAME,
                'options': extra_params,
            })

        return project
コード例 #15
0
ファイル: importer.py プロジェクト: zeeshy30/cvat-1
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subsets = self.find_subsets(path)

        if len(subsets) == 0:
            raise Exception("Failed to find 'coco' dataset at '%s'" % path)

        # TODO: should be removed when proper label merging is implemented
        conflicting_types = {
            CocoTask.instances, CocoTask.person_keypoints, CocoTask.labels
        }
        ann_types = set(t for s in subsets.values() for t in s) \
            & conflicting_types
        if 1 <= len(ann_types):
            selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0]
        if 1 < len(ann_types):
            log.warning("Not implemented: "
                "Found potentially conflicting source types with labels: %s. "
                "Only one type will be used: %s" \
                % (", ".join(t.name for t in ann_types), selected_ann_type.name))

        for ann_files in subsets.values():
            for ann_type, ann_file in ann_files.items():
                if ann_type in conflicting_types:
                    if ann_type is not selected_ann_type:
                        log.warning("Not implemented: "
                                    "conflicting source '%s' is skipped." %
                                    ann_file)
                        continue
                log.info("Found a dataset at '%s'" % ann_file)

                source_name = osp.splitext(osp.basename(ann_file))[0]
                project.add_source(
                    source_name, {
                        'url': ann_file,
                        'format': self._COCO_EXTRACTORS[ann_type],
                        'options': dict(extra_params),
                    })

        return project
コード例 #16
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subsets = self.find_subsets(path)

        if len(subsets) == 0:
            raise Exception("Failed to find 'coco' dataset at '%s'" % path)

        for ann_files in subsets.values():
            for ann_type, ann_file in ann_files.items():
                source_name = osp.splitext(osp.basename(ann_file))[0]
                project.add_source(
                    source_name, {
                        'url': ann_file,
                        'format': self._COCO_EXTRACTORS[ann_type],
                        'options': extra_params,
                    })

        return project
コード例 #17
0
ファイル: test_project.py プロジェクト: benhoff/cvat
    def test_project_can_merge_item_annotations(self):
        class TestExtractor(Extractor):
            def __init__(self, url, v=None):
                super().__init__()
                self.v = v

            def __iter__(self):
                v1_item = DatasetItem(id=1,
                                      subset='train',
                                      annotations=[
                                          LabelObject(2, id=3),
                                          LabelObject(3, attributes={'x': 1}),
                                      ])

                v2_item = DatasetItem(id=1,
                                      subset='train',
                                      annotations=[
                                          LabelObject(3, attributes={'x': 1}),
                                          LabelObject(4, id=4),
                                      ])

                if self.v == 1:
                    yield v1_item
                else:
                    yield v2_item

            def subsets(self):
                return ['train']

        project = Project()
        project.env.extractors.register('t1', lambda p: TestExtractor(p, v=1))
        project.env.extractors.register('t2', lambda p: TestExtractor(p, v=2))
        project.add_source('source1', {'format': 't1'})
        project.add_source('source2', {'format': 't2'})

        merged = project.make_dataset()

        self.assertEqual(1, len(merged))

        item = next(iter(merged))
        self.assertEqual(3, len(item.annotations))
コード例 #18
0
ファイル: importer.py プロジェクト: shivam124081/datumaro
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        config_paths = self.find_configs(path)
        if len(config_paths) == 0:
            raise Exception("Failed to find 'yolo' dataset at '%s'" % path)

        for config_path in config_paths:
            log.info("Found a dataset at '%s'" % config_path)

            source_name = '%s_%s' % (
                osp.basename(osp.dirname(config_path)),
                osp.splitext(osp.basename(config_path))[0])
            project.add_source(source_name, {
                'url': config_path,
                'format': 'yolo',
                'options': dict(extra_params),
            })

        return project
コード例 #19
0
ファイル: voc.py プロジェクト: benhoff/cvat
    def __call__(self, path):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        for task_name, extractor_type, task_dir in self._TASKS:
            task_dir = osp.join(path, task_dir)
            if not osp.isdir(task_dir):
                continue
            dir_items = os.listdir(task_dir)
            if not find(dir_items, lambda x: x == task_name):
                continue

            project.add_source(task_name, {
                'url': task_dir,
                'format': extractor_type,
            })

        if len(project.config.sources) == 0:
            raise Exception("Failed to find 'voc_results' dataset at '%s'" % \
                path)

        return project
コード例 #20
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        subset_paths = self.find_subsets(path)
        if len(subset_paths) == 0:
            raise Exception("Failed to find 'label_me' dataset at '%s'" % path)

        for subset_path, subset_name in subset_paths:
            params = {}
            if subset_name:
                params['subset_name'] = subset_name
            params.update(extra_params)

            source_name = osp.splitext(osp.basename(subset_path))[0]
            project.add_source(source_name, {
                'url': subset_path,
                'format': self._EXTRACTOR_NAME,
                'options': params,
            })

        return project
コード例 #21
0
ファイル: test_project.py プロジェクト: benhoff/cvat
    def test_project_filter_can_be_applied(self):
        class TestExtractor(Extractor):
            def __init__(self, url, n=10):
                super().__init__(length=n)
                self.n = n

            def __iter__(self):
                for i in range(self.n):
                    yield DatasetItem(id=i, subset='train')

            def subsets(self):
                return ['train']

        e_type = 'type'
        project = Project()
        project.env.extractors.register(e_type, TestExtractor)
        project.add_source('source', {'format': e_type})
        project.set_filter('/item[id < 5]')

        dataset = project.make_dataset()

        self.assertEqual(5, len(dataset))
コード例 #22
0
ファイル: importer.py プロジェクト: 654060747/cvat
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        subset_paths = self.find_subsets(path)

        if len(subset_paths) == 0:
            raise Exception("Failed to find 'cvat' dataset at '%s'" % path)

        for subset_path in subset_paths:
            if not osp.isfile(subset_path):
                continue

            log.info("Found a dataset at '%s'" % subset_path)

            subset_name = osp.splitext(osp.basename(subset_path))[0]

            project.add_source(subset_name, {
                'url': subset_path,
                'format': self.EXTRACTOR_NAME,
                'options': dict(extra_params),
            })

        return project
コード例 #23
0
ap = argparse.ArgumentParser()
group = ap.add_mutually_exclusive_group(required=True)
group.add_argument('--json_paths', nargs="+", help='json paths separated by whitespace')
group.add_argument('--annots_folder', help='path of annotation folder containing multiple jsons')
ap.add_argument('--output_json', help='path of output json', required=True)
args = ap.parse_args()

# create Datumaro project
project = Project()

# add sources
if args.json_paths:
	for i, json_path in enumerate(args.json_paths):
		new_json_path = check_json_path(json_path)
		project.add_source(f'src{i}', {'url': str(new_json_path), 'format': 'coco_instances'})
elif args.annots_folder:
	# doesnt recursively search in subfolders
	for i, json_path in enumerate(Path(args.annots_folder).iterdir()):
		if json_path.suffix == '.json':
			new_json_path = check_json_path(json_path)
			project.add_source(f'src{i}', {'url': str(new_json_path), 'format': 'coco_instances'})

# create a dataset
dataset = project.make_dataset()

# print some stats
print(f'num images: {num_img(dataset)}')
print(f'num images with annotations: {num_img_with_annots(dataset)}')
print(f'num annotations: {num_annots(dataset)}')
コード例 #24
0
from datumaro.components.project import Project
from datum_utils import num_img, num_img_with_annots, num_annots, export_json

ap = argparse.ArgumentParser()
ap.add_argument('--json_path', help='annotations json path', required=True)
args = ap.parse_args()

# WRITE YOUR SPLIT HERE
splits = {'train': ['set00', 'set01'], 'val': ['set02'], 'test': ['set03']}

# create Datumaro project
project = Project()

# add sources
project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'})

# create a dataset
dataset = project.make_dataset()
print(f'total images: {num_img(dataset)}')

for split_name, split_list in splits.items():
    # DEFINE SPLIT FUNCTION HERE
    dataset_split = dataset.select(
        lambda item: item.id.startswith(tuple(split_list)))

    # print some stats
    print(f'split: {split_name}')
    print(f'num images: {num_img(dataset_split)}')
    print(f'num images with annotations: {num_img_with_annots(dataset_split)}')
    print(f'num annotations: {num_annots(dataset_split)}')
コード例 #25
0
ファイル: test_project.py プロジェクト: benhoff/cvat
    def test_can_do_transform_with_custom_model(self):
        class TestExtractorSrc(Extractor):
            def __init__(self, url, n=2):
                super().__init__(length=n)
                self.n = n

            def __iter__(self):
                for i in range(self.n):
                    yield DatasetItem(id=i,
                                      subset='train',
                                      image=i,
                                      annotations=[LabelObject(i)])

            def subsets(self):
                return ['train']

        class TestLauncher(Launcher):
            def __init__(self, **kwargs):
                pass

            def launch(self, inputs):
                for inp in inputs:
                    yield [LabelObject(inp)]

        class TestConverter(Converter):
            def __call__(self, extractor, save_dir):
                for item in extractor:
                    with open(osp.join(save_dir, '%s.txt' % item.id),
                              'w+') as f:
                        f.write(str(item.subset) + '\n')
                        f.write(str(item.annotations[0].label) + '\n')

        class TestExtractorDst(Extractor):
            def __init__(self, url):
                super().__init__()
                self.items = [
                    osp.join(url, p) for p in sorted(os.listdir(url))
                ]

            def __iter__(self):
                for path in self.items:
                    with open(path, 'r') as f:
                        index = osp.splitext(osp.basename(path))[0]
                        subset = f.readline()[:-1]
                        label = int(f.readline()[:-1])
                        assert (subset == 'train')
                        yield DatasetItem(id=index,
                                          subset=subset,
                                          annotations=[LabelObject(label)])

            def __len__(self):
                return len(self.items)

            def subsets(self):
                return ['train']

        model_name = 'model'
        launcher_name = 'custom_launcher'
        extractor_name = 'custom_extractor'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.env.extractors.register(extractor_name, TestExtractorSrc)
        project.env.converters.register(extractor_name, TestConverter)
        project.add_model(model_name, {'launcher': launcher_name})
        project.add_source('source', {'format': extractor_name})

        with TestDir() as test_dir:
            project.make_dataset().transform(model_name, test_dir.path)

            result = Project.load(test_dir.path)
            result.env.extractors.register(extractor_name, TestExtractorDst)
            it = iter(result.make_dataset())
            item1 = next(it)
            item2 = next(it)
            self.assertEqual(0, item1.annotations[0].label)
            self.assertEqual(1, item2.annotations[0].label)