Esempio n. 1
0
    def test_project_compound_child_can_be_modified_recursively(self):
        with TestDir() as test_dir:
            child1 = Project({
                'project_dir': osp.join(test_dir, 'child1'),
            })
            child1.save()

            child2 = Project({
                'project_dir': osp.join(test_dir, 'child2'),
            })
            child2.save()

            parent = Project()
            parent.add_source('child1', {'url': child1.config.project_dir})
            parent.add_source('child2', {'url': child2.config.project_dir})
            dataset = parent.make_dataset()

            item1 = DatasetItem(id='ch1', path=['child1'])
            item2 = DatasetItem(id='ch2', path=['child2'])
            dataset.put(item1)
            dataset.put(item2)

            self.assertEqual(2, len(dataset))
            self.assertEqual(1, len(dataset.sources['child1']))
            self.assertEqual(1, len(dataset.sources['child2']))
Esempio n. 2
0
    def test_can_batch_launch_custom_model(self):
        dataset = Dataset.from_iterable([
            DatasetItem(id=i, subset='train', image=np.array([i]))
            for i in range(5)
        ],
                                        categories=['label'])

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for i, inp in enumerate(inputs):
                    yield [Label(0, attributes={'idx': i, 'data': inp.item()})]

        model_name = 'model'
        launcher_name = 'custom_launcher'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.add_model(model_name, {'launcher': launcher_name})
        model = project.make_executable_model(model_name)

        batch_size = 3
        executor = ModelTransform(dataset, model, batch_size=batch_size)

        for item in executor:
            self.assertEqual(1, len(item.annotations))
            self.assertEqual(
                int(item.id) % batch_size,
                item.annotations[0].attributes['idx'])
            self.assertEqual(int(item.id),
                             item.annotations[0].attributes['data'])
Esempio n. 3
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if path.endswith('.json') and osp.isfile(path):
            subset_paths = [path]
        else:
            subset_paths = glob(osp.join(path, '*.json'))

            if osp.basename(
                    osp.normpath(path)) != DatumaroPath.ANNOTATIONS_DIR:
                path = osp.join(path, DatumaroPath.ANNOTATIONS_DIR)
            subset_paths += glob(osp.join(path, '*.json'))

        if len(subset_paths) == 0:
            raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)

        for subset_path in subset_paths:
            if not osp.isfile(subset_path):
                continue

            log.info("Found a dataset at '%s'" % subset_path)

            subset_name = osp.splitext(osp.basename(subset_path))[0]

            project.add_source(
                subset_name, {
                    'url': subset_path,
                    'format': self.EXTRACTOR_NAME,
                    'options': dict(extra_params),
                })

        return project
Esempio n. 4
0
    def test_cant_merge_different_categories(self):
        class TestExtractor1(Extractor):
            def __iter__(self):
                return iter([])

            def categories(self):
                return {
                    AnnotationType.label:
                    LabelCategories.from_iterable(['a', 'b'])
                }

        class TestExtractor2(Extractor):
            def __iter__(self):
                return iter([])

            def categories(self):
                return {
                    AnnotationType.label:
                    LabelCategories.from_iterable(['b', 'a'])
                }

        e_name1 = 'e1'
        e_name2 = 'e2'

        project = Project()
        project.env.extractors.register(e_name1, TestExtractor1)
        project.env.extractors.register(e_name2, TestExtractor2)
        project.add_source('source1', {'format': e_name1})
        project.add_source('source2', {'format': e_name2})

        with self.assertRaisesRegex(Exception, "different categories"):
            project.make_dataset()
Esempio n. 5
0
    def test_custom_extractor_can_be_created(self):
        class CustomExtractor(Extractor):
            def __init__(self, url):
                super().__init__()

            def __iter__(self):
                return iter([
                    DatasetItem(id=0, subset='train'),
                    DatasetItem(id=1, subset='train'),
                    DatasetItem(id=2, subset='train'),
                    DatasetItem(id=3, subset='test'),
                ])

            def subsets(self):
                return ['train', 'test']

        extractor_name = 'ext1'
        project = Project()
        project.env.extractors.register(extractor_name, CustomExtractor)
        project.add_source('src1', {
            'url': 'path',
            'format': extractor_name,
        })
        project.set_subsets(['train'])

        dataset = project.make_dataset()

        self.assertEqual(3, len(dataset))
Esempio n. 6
0
    def test_project_can_merge_item_annotations(self):
        class TestExtractor1(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1,
                                  subset='train',
                                  annotations=[
                                      Label(2, id=3),
                                      Label(3, attributes={'x': 1}),
                                  ])

        class TestExtractor2(Extractor):
            def __iter__(self):
                yield DatasetItem(id=1,
                                  subset='train',
                                  annotations=[
                                      Label(3, attributes={'x': 1}),
                                      Label(4, id=4),
                                  ])

        project = Project()
        project.env.extractors.register('t1', TestExtractor1)
        project.env.extractors.register('t2', TestExtractor2)
        project.add_source('source1', {'format': 't1'})
        project.add_source('source2', {'format': 't2'})

        merged = project.make_dataset()

        self.assertEqual(1, len(merged))

        item = next(iter(merged))
        self.assertEqual(3, len(item.annotations))
Esempio n. 7
0
    def test_can_batch_launch_custom_model(self):
        class TestExtractor(Extractor):
            def __iter__(self):
                for i in range(5):
                    yield DatasetItem(id=i,
                                      subset='train',
                                      image=np.array([i]))

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for i, inp in enumerate(inputs):
                    yield [Label(attributes={'idx': i, 'data': inp.item()})]

        model_name = 'model'
        launcher_name = 'custom_launcher'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.add_model(model_name, {'launcher': launcher_name})
        model = project.make_executable_model(model_name)
        extractor = TestExtractor()

        batch_size = 3
        executor = InferenceWrapper(extractor, model, batch_size=batch_size)

        for item in executor:
            self.assertEqual(1, len(item.annotations))
            self.assertEqual(
                int(item.id) % batch_size,
                item.annotations[0].attributes['idx'])
            self.assertEqual(int(item.id),
                             item.annotations[0].attributes['data'])
Esempio n. 8
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if path.endswith('.data') and osp.isfile(path):
            config_paths = [path]
        else:
            config_paths = glob(osp.join(path, '*.data'))

        if not osp.exists(path) or not config_paths:
            raise Exception("Failed to find 'yolo' dataset at '%s'" % path)

        for config_path in config_paths:
            log.info("Found a dataset at '%s'" % config_path)

            source_name = '%s_%s' % (osp.basename(osp.dirname(config_path)),
                                     osp.splitext(
                                         osp.basename(config_path))[0])
            project.add_source(
                source_name, {
                    'url': config_path,
                    'format': 'yolo',
                    'options': dict(extra_params),
                })

        return project
Esempio n. 9
0
    def test_source_datasets_can_be_merged(self):
        class TestExtractor(Extractor):
            def __init__(self, url, n=0, s=0):
                super().__init__(length=n)
                self.n = n
                self.s = s

            def __iter__(self):
                for i in range(self.n):
                    yield DatasetItem(id=self.s + i, subset='train')

        e_name1 = 'e1'
        e_name2 = 'e2'
        n1 = 2
        n2 = 4

        project = Project()
        project.env.extractors.register(e_name1,
                                        lambda p: TestExtractor(p, n=n1))
        project.env.extractors.register(e_name2,
                                        lambda p: TestExtractor(p, n=n2, s=n1))
        project.add_source('source1', {'format': e_name1})
        project.add_source('source2', {'format': e_name2})

        dataset = project.make_dataset()

        self.assertEqual(n1 + n2, len(dataset))
Esempio n. 10
0
    def test_custom_extractor_can_be_created(self):
        class CustomExtractor(Extractor):
            def __iter__(self):
                return iter([
                    DatasetItem(id=0, subset='train'),
                    DatasetItem(id=1, subset='train'),
                    DatasetItem(id=2, subset='train'),
                    DatasetItem(id=3, subset='test'),
                    DatasetItem(id=4, subset='test'),
                    DatasetItem(id=1),
                    DatasetItem(id=2),
                    DatasetItem(id=3),
                ])

        extractor_name = 'ext1'
        project = Project()
        project.env.extractors.register(extractor_name, CustomExtractor)
        project.add_source('src1', {
            'url': 'path',
            'format': extractor_name,
        })

        dataset = project.make_dataset()

        compare_datasets(self, CustomExtractor(), dataset)
Esempio n. 11
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subset_paths = self.find_subsets(path)

        if len(subset_paths) == 0:
            raise Exception("Failed to find 'cvat' dataset at '%s'" % path)

        for subset_path in subset_paths:
            if not osp.isfile(subset_path):
                continue

            log.info("Found a dataset at '%s'" % subset_path)

            subset_name = osp.splitext(osp.basename(subset_path))[0]

            project.add_source(
                subset_name, {
                    'url': subset_path,
                    'format': self.EXTRACTOR_NAME,
                    'options': dict(extra_params),
                })

        return project
Esempio n. 12
0
    def test_project_own_dataset_can_be_modified(self):
        project = Project()
        dataset = project.make_dataset()

        item = DatasetItem(id=1)
        dataset.put(item)

        self.assertEqual(item, next(iter(dataset)))
Esempio n. 13
0
    def test_project_false_when_not_project_name(self):
        project_name = 'qwerty'
        project = Project({'project_name': project_name})
        target = ProjectTarget(project=project)

        status = target.test(project_name + '123')

        self.assertFalse(status)
Esempio n. 14
0
    def test_project_true_when_project_name(self):
        project_name = 'qwerty'
        project = Project({'project_name': project_name})
        target = ProjectTarget(project=project)

        status = target.test(project_name)

        self.assertTrue(status)
Esempio n. 15
0
    def test_source_false_when_source_doesnt_exist(self):
        source_name = 'qwerty'
        project = Project()
        project.add_source(source_name)
        target = SourceTarget(project=project)

        status = target.test(source_name + '123')

        self.assertFalse(status)
Esempio n. 16
0
    def test_source_true_when_source_exists(self):
        source_name = 'qwerty'
        project = Project()
        project.add_source(source_name)
        target = SourceTarget(project=project)

        status = target.test(source_name)

        self.assertTrue(status)
Esempio n. 17
0
    def test_can_do_transform_with_custom_model(self):
        class TestExtractorSrc(Extractor):
            def __iter__(self):
                for i in range(2):
                    yield DatasetItem(id=i,
                                      image=np.ones([2, 2, 3]) * i,
                                      annotations=[Label(i)])

        class TestLauncher(Launcher):
            def launch(self, inputs):
                for inp in inputs:
                    yield [Label(inp[0, 0, 0])]

        class TestConverter(Converter):
            def __call__(self, extractor, save_dir):
                for item in extractor:
                    with open(osp.join(save_dir, '%s.txt' % item.id),
                              'w') as f:
                        f.write(str(item.annotations[0].label) + '\n')

        class TestExtractorDst(Extractor):
            def __init__(self, url):
                super().__init__()
                self.items = [
                    osp.join(url, p) for p in sorted(os.listdir(url))
                ]

            def __iter__(self):
                for path in self.items:
                    with open(path, 'r') as f:
                        index = osp.splitext(osp.basename(path))[0]
                        label = int(f.readline().strip())
                        yield DatasetItem(id=index, annotations=[Label(label)])

        model_name = 'model'
        launcher_name = 'custom_launcher'
        extractor_name = 'custom_extractor'

        project = Project()
        project.env.launchers.register(launcher_name, TestLauncher)
        project.env.extractors.register(extractor_name, TestExtractorSrc)
        project.env.converters.register(extractor_name, TestConverter)
        project.add_model(model_name, {'launcher': launcher_name})
        project.add_source('source', {'format': extractor_name})

        with TestDir() as test_dir:
            project.make_dataset().apply_model(model=model_name,
                                               save_dir=test_dir)

            result = Project.load(test_dir)
            result.env.extractors.register(extractor_name, TestExtractorDst)
            it = iter(result.make_dataset())
            item1 = next(it)
            item2 = next(it)
            self.assertEqual(0, item1.annotations[0].label)
            self.assertEqual(1, item2.annotations[0].label)
Esempio n. 18
0
    def test_add_source(self):
        source_name = 'source'
        origin = Source({'url': 'path', 'format': 'ext'})
        project = Project()

        project.add_source(source_name, origin)

        added = project.get_source(source_name)
        self.assertIsNotNone(added)
        self.assertEqual(added, origin)
Esempio n. 19
0
    def test_project_true_when_project_file(self):
        with TestDir() as test_dir:
            path = osp.join(test_dir.path, 'test.jpg')
            Project().save(path)

            target = ProjectTarget()

            status = target.test(path)

            self.assertTrue(status)
Esempio n. 20
0
    def test_can_have_project_source(self):
        with TestDir() as test_dir:
            Project.generate(test_dir)

            project2 = Project()
            project2.add_source('project1', {
                'url': test_dir,
            })
            dataset = project2.make_dataset()

            self.assertTrue('project1' in dataset.sources)
Esempio n. 21
0
    def test_added_source_can_be_saved(self):
        source_name = 'source'
        origin = Source({
            'url': 'path',
        })
        project = Project()
        project.add_source(source_name, origin)

        saved = project.config

        self.assertEqual(origin, saved.sources[source_name])
Esempio n. 22
0
    def test_can_save_and_load_own_dataset(self):
        with TestDir() as test_dir:
            src_project = Project()
            src_dataset = src_project.make_dataset()
            item = DatasetItem(id=1)
            src_dataset.put(item)
            src_dataset.save(test_dir)

            loaded_project = Project.load(test_dir)
            loaded_dataset = loaded_project.make_dataset()

            self.assertEqual(list(src_dataset), list(loaded_dataset))
Esempio n. 23
0
    def test_can_dump_added_model(self):
        model_name = 'model'

        project = Project()
        saved = Model({'launcher': 'name'})
        project.add_model(model_name, saved)

        with TestDir() as test_dir:
            project.save(test_dir)

            loaded = Project.load(test_dir)
            loaded = loaded.get_model(model_name)
            self.assertEqual(saved, loaded)
Esempio n. 24
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        subsets = self.find_subsets(path)
        if len(subsets) == 0:
            raise Exception("Failed to find dataset at '%s'" % path)

        for desc in subsets:
            source_name = osp.splitext(osp.basename(desc['url']))[0]
            project.add_source(source_name, desc)

        return project
Esempio n. 25
0
    def test_added_source_can_be_dumped(self):
        source_name = 'source'
        origin = Source({
            'url': 'path',
        })
        project = Project()
        project.add_source(source_name, origin)

        with TestDir() as test_dir:
            project.save(test_dir)

            loaded = Project.load(test_dir)
            loaded = loaded.get_source(source_name)
            self.assertEqual(origin, loaded)
Esempio n. 26
0
    def test_project_filter_can_be_applied(self):
        class TestExtractor(Extractor):
            def __iter__(self):
                for i in range(10):
                    yield DatasetItem(id=i, subset='train')

        e_type = 'type'
        project = Project()
        project.env.extractors.register(e_type, TestExtractor)
        project.add_source('source', {'format': e_type})

        dataset = project.make_dataset().extract('/item[id < 5]')

        self.assertEqual(5, len(dataset))
Esempio n. 27
0
    def __call__(self, path):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        if not osp.exists(path):
            raise Exception("Failed to find 'datumaro' dataset at '%s'" % path)

        source_name = osp.splitext(osp.basename(path))[0]
        project.add_source(source_name, {
            'url': path,
            'format': self.EXTRACTOR_NAME,
        })

        return project
Esempio n. 28
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project # cyclic import
        project = Project()

        if not osp.isdir(path):
            raise Exception("Can't find a directory at '%s'" % path)

        source_name = osp.basename(osp.normpath(path))
        project.add_source(source_name, {
            'url': source_name,
            'format': self.EXTRACTOR_NAME,
            'options': dict(extra_params),
        })

        return project
Esempio n. 29
0
def check_data(json_path):
    # create Datumaro project
    project = Project()

    # add source
    project.add_source('src1', {
        'url': str(json_path),
        'format': 'coco_instances'
    })

    # create a dataset
    dataset = project.make_dataset()
    print(f'{json_path.stem}')

    print(f'num images: {num_img(dataset)}')
    print(f'num images with annotations: {num_img_with_annots(dataset)}')
    print(f'num annotations: {num_annots(dataset)}')
Esempio n. 30
0
    def __call__(self, path, **extra_params):
        from datumaro.components.project import Project  # cyclic import
        project = Project()

        sources = self.find_sources(osp.normpath(path))
        if len(sources) == 0:
            raise Exception("Failed to find dataset at '%s'" % path)

        for desc in sources:
            params = dict(extra_params)
            params.update(desc.get('options', {}))
            desc['options'] = params

            source_name = osp.splitext(osp.basename(desc['url']))[0]
            project.add_source(source_name, desc)

        return project