Python Dataset 예제들, datumaro.components.dataset.Dataset Python 예제들

예제 #1

0

파일 보기

파일: test_dataset.py 프로젝트: Eric2370/datumaro-1

    def test_flushes_changes_on_save(self):
        dataset = Dataset.from_iterable([])
        dataset.put(DatasetItem(1))

        self.assertTrue(dataset.is_modified)

        with TestDir() as test_dir:
            dataset.save(test_dir)

            self.assertFalse(dataset.is_modified)

예제 #2

0

파일 보기

    def setUp(self):
        self.source = Dataset.from_iterable([
            DatasetItem(id='1', subset='val',
                attributes={'qq': 1, 'x': 2},
                annotations=[ Label(0, attributes={ 'x': 1, 'y': 2 }) ]),

            DatasetItem(id='2', subset='val',
                attributes={'qq': 2},
                annotations=[ Label(0, attributes={ 'x': 1, 'y': 2 }) ]),
        ], categories=['a'])

예제 #3

0

파일 보기

파일: imagenet.py 프로젝트: quuhua911/cvat

def _export(dst_file, instance_data, save_images=False):
    dataset = Dataset.from_extractors(GetCVATDataExtractor(
        instance_data, include_images=save_images), env=dm_env)
    with TemporaryDirectory() as temp_dir:
        if save_images:
            dataset.export(temp_dir, 'imagenet', save_images=save_images)
        else:
            dataset.export(temp_dir, 'imagenet_txt', save_images=save_images)

        make_zip_archive(temp_dir, dst_file)

예제 #4

0

파일 보기

    def test_remap_labels_delete_unspecified(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id=1, annotations=[
                Label(0, id=0), # will be removed
                Label(1, id=1),
                Bbox(1, 2, 3, 4, label=None),
            ])
        ], categories=['label0', 'label1'])

        target_dataset = Dataset.from_iterable([
            DatasetItem(id=1, annotations=[
                Label(0, id=1),
            ]),
        ], categories=['label1'])

        actual = transforms.RemapLabels(source_dataset,
            mapping={ 'label1': 'label1' }, default='delete')

        compare_datasets(self, target_dataset, actual)

예제 #5

0

파일 보기

    def test_project_labels(self):
        source = Dataset.from_iterable([
            DatasetItem(id=1, annotations=[
                Label(1), # Label must be remapped
                Label(3), # Must be removed (extra label)
                Bbox(1, 2, 3, 4, label=None), # Must be kept (no label)
            ])
        ], categories=['a', 'b', 'c', 'd'])

        expected = Dataset.from_iterable([
            DatasetItem(id=1, annotations=[
                Label(2),
                Bbox(1, 2, 3, 4, label=None),
            ]),
        ], categories=['c', 'a', 'b'])

        actual = transforms.ProjectLabels(source, dst_labels=['c', 'a', 'b'])

        compare_datasets(self, expected, actual)

예제 #6

0

파일 보기

파일: test_coco_format.py 프로젝트: victorduun/datumaro

    def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id='кириллица с пробелом',
                        subset='train',
                        attributes={'id': 1}),
        ])

        with TestDir() as test_dir:
            self._test_save_and_load(expected_dataset,
                                     CocoImageInfoConverter.convert, test_dir)

예제 #7

0

파일 보기

    def test_can_save_dataset_with_image_info(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id=1,
                        image=Image(path='1.jpg', size=(10, 15)),
                        attributes={'id': 1}),
        ])

        with TestDir() as test_dir:
            self._test_save_and_load(expected_dataset,
                                     CocoImageInfoConverter.convert, test_dir)

예제 #8

0

파일 보기

    def test_can_import(self):
        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(id='1',
                            image=np.ones((8, 8, 3)),
                            annotations=[Label(0), Label(1)]),
                DatasetItem(
                    id='2', image=np.ones(
                        (10, 10, 3)), annotations=[Label(0)]),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable('label_' + str(label)
                                              for label in range(2)),
            })

        dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'imagenet')

        compare_datasets(self, expected_dataset, dataset, require_images=True)

예제 #9

0

파일 보기

파일: test_yolo_format.py 프로젝트: Eric2370/datumaro-1

    def test_relative_paths(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='1', subset='train', image=np.ones((4, 2, 3))),
            DatasetItem(
                id='subdir1/1', subset='train', image=np.ones((2, 6, 3))),
            DatasetItem(
                id='subdir2/1', subset='train', image=np.ones((5, 4, 3))),
        ],
                                               categories=[])

        for save_images in {True, False}:
            with self.subTest(save_images=save_images):
                with TestDir() as test_dir:
                    YoloConverter.convert(source_dataset,
                                          test_dir,
                                          save_images=save_images)
                    parsed_dataset = Dataset.import_from(test_dir, 'yolo')

                    compare_datasets(self, source_dataset, parsed_dataset)

예제 #10

0

파일 보기

    def test_dataset_with_save_dataset_meta_file(self):
        source_dataset = Dataset.from_iterable(
            [
                DatasetItem(id='1', subset='train', annotations=[Label(0)]),
                DatasetItem(id='2', subset='train', annotations=[Label(1)]),
            ],
            categories=['label_0', 'label_1'])

        with TestDir() as test_dir:
            ImagenetTxtConverter.convert(source_dataset,
                                         test_dir,
                                         save_images=False,
                                         save_dataset_meta=True)

            parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt')

            self.assertTrue(osp.isfile(osp.join(test_dir,
                                                'dataset_meta.json')))
            compare_datasets(self, source_dataset, parsed_dataset)

예제 #11

0

파일 보기

파일: icdar.py 프로젝트: nerdinand/cvat

def _export_recognition(dst_file, instance_data, save_images=False):
    dataset = Dataset.from_extractors(GetCVATDataExtractor(
        instance_data, include_images=save_images),
                                      env=dm_env)
    dataset.transform(LabelToCaption)
    with TemporaryDirectory() as temp_dir:
        dataset.export(temp_dir,
                       'icdar_word_recognition',
                       save_images=save_images)
        make_zip_archive(temp_dir, dst_file)

예제 #12

0

파일 보기

파일: test_lfw_format.py 프로젝트: openvinotoolkit/datumaro

    def test_can_import_without_people_file(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id='name0_0001',
                        subset='test',
                        image=np.ones((2, 5, 3)),
                        annotations=[
                            Label(0,
                                  attributes={
                                      'negative_pairs':
                                      ['name1/name1_0001', 'name1/name1_0002']
                                  }),
                            Points([0, 4, 3, 3, 2, 2, 1, 0, 3, 0], label=0),
                        ]),
            DatasetItem(id='name1_0001',
                        subset='test',
                        image=np.ones((2, 5, 3)),
                        annotations=[
                            Label(1,
                                  attributes={
                                      'positive_pairs': ['name1/name1_0002'],
                                  }),
                            Points([1, 6, 4, 6, 3, 3, 2, 1, 4, 1], label=1),
                        ]),
            DatasetItem(id='name1_0002',
                        subset='test',
                        image=np.ones((2, 5, 3)),
                        annotations=[
                            Label(1),
                            Points([0, 5, 3, 5, 2, 2, 1, 0, 3, 0], label=1),
                        ]),
        ],
                                                 categories=['name0', 'name1'])

        with TestDir() as test_dir:
            dataset_path = osp.join(test_dir, 'dataset')
            shutil.copytree(DUMMY_DATASET_DIR, dataset_path)
            os.remove(
                osp.join(dataset_path, 'test', 'annotations', 'people.txt'))

            dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'lfw')

            compare_datasets(self, expected_dataset, dataset)

예제 #13

0

파일 보기

    def test_can_save_and_load(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='1', subset='train', image=np.ones((8, 8, 3)),
                annotations=[
                    Bbox(0, 2, 4, 2, label=0, group=1),
                    Points([3.2, 3.12, 4.11, 3.2, 2.11,
                        2.5, 3.5, 2.11, 3.8, 2.13], label=0, group=1),
                ]
            ),
            DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)),
                annotations=[
                    Points([4.23, 4.32, 5.34, 4.45, 3.54,
                        3.56, 4.52, 3.51, 4.78, 3.34], label=1, group=1),
                ]
            ),
            DatasetItem(id='3', subset='train', image=np.ones((8, 8, 3)),
                annotations=[Label(2, group=1)]
            ),
            DatasetItem(id='4', subset='train', image=np.ones((10, 10, 3)),
                annotations=[
                    Bbox(0, 2, 4, 2, label=3, group=1),
                    Points([3.2, 3.12, 4.11, 3.2, 2.11,
                        2.5, 3.5, 2.11, 3.8, 2.13], label=3, group=1),
                ]
            ),
            DatasetItem(id='a/5', subset='train', image=np.ones((8, 8, 3)),
                annotations=[
                    Bbox(2, 2, 2, 2, group=1),
                ]
            ),
            DatasetItem(id='label_0', subset='train', image=np.ones((8, 8, 3)),
            ),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(
                [('label_%s' % i, 'class_%s' % i) for i in range(5)]),
        })

        with TestDir() as test_dir:
            VggFace2Converter.convert(source_dataset, test_dir, save_images=True)
            parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2')

            compare_datasets(self, source_dataset, parsed_dataset)

예제 #14

0

파일 보기

파일: test_extractor_tfds.py 프로젝트: openvinotoolkit/datumaro

    def test_can_extract_coco(self):
        tfds_example = {
            'image': encode_image(np.ones((20, 10)), '.png'),
            'image/filename': 'test.png',
            'image/id': 123,
            'objects': {
                'bbox': [[0.1, 0.2, 0.3, 0.4]],
                'label': [5],
                'is_crowd': [True],
            }
        }

        with mock_tfds_data(example=tfds_example):
            tfds_info = tfds.builder('coco/2014').info

            expected_dataset = Dataset.from_iterable(
                [
                    DatasetItem(
                        id='test',
                        subset='train',
                        image=np.ones((20, 10)),
                        annotations=[
                            Bbox(2,
                                 2,
                                 2,
                                 4,
                                 label=5,
                                 attributes={'is_crowd': True}),
                        ],
                        attributes={'id': 123},
                    ),
                ],
                categories=tfds_info.features['objects'].feature['label'].names
            )

            extractor = make_tfds_extractor('coco/2014')
            actual_dataset = Dataset(extractor)

            compare_datasets(self,
                             expected_dataset,
                             actual_dataset,
                             require_images=True)

예제 #15

0

파일 보기

파일: test_voc_format.py 프로젝트: openvinotoolkit/datumaro

    def test_export_to_voc_format(self):
        label_map = OrderedDict(('label_%s' % i, [None, [], []]) for i in range(10))
        label_map['background'] = [None, [], []]
        label_map.move_to_end('background', last=False)

        expected_dataset = Dataset.from_iterable([
            DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)),
                annotations=[
                    Bbox(0.0, 2.0, 4.0, 2.0,
                        attributes={
                            'difficult': False,
                            'truncated': False,
                            'occluded': False
                        },
                        id=1, label=3, group=1
                    ),
                    Bbox(3.0, 3.0, 2.0, 3.0,
                        attributes={
                            'difficult': False,
                            'truncated': False,
                            'occluded': False
                        },
                        id=2, label=5, group=2
                    )
                ]
            )
        ], categories=VOC.make_voc_categories(label_map))

        with TestDir() as test_dir:
            yolo_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))],
                'tests', 'assets', 'yolo_dataset')

            run(self, 'create', '-o', test_dir)
            run(self, 'import', '-p', test_dir, '-f', 'yolo', yolo_dir)

            voc_export = osp.join(test_dir, 'voc_export')
            run(self, 'export', '-p', test_dir, '-f', 'voc',
                '-o', voc_export, '--', '--save-images')

            parsed_dataset = Dataset.import_from(voc_export, format='voc')
            compare_datasets(self, expected_dataset, parsed_dataset,
                require_images=True)

예제 #16

0

파일 보기

파일: test_coco_format.py 프로젝트: soparin/datumaro

    def test_can_crop_covered_segments(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
                annotations=[
                    Mask(np.array([
                            [0, 0, 1, 1, 1],
                            [0, 0, 1, 1, 1],
                            [1, 1, 0, 1, 1],
                            [1, 1, 1, 0, 0],
                            [1, 1, 1, 0, 0]],
                        ),
                        label=2, id=1, z_order=0),
                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
                        label=1, id=2, z_order=1),
                ]
            ),
        ], categories=[str(i) for i in range(10)])

        target_dataset = Dataset.from_iterable([
            DatasetItem(id=1, image=np.zeros((5, 5, 3)),
                annotations=[
                    Mask(np.array([
                            [0, 0, 1, 1, 1],
                            [0, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 0],
                            [1, 1, 1, 0, 0]],
                        ),
                        attributes={ 'is_crowd': True },
                        label=2, id=1, group=1),

                    Polygon([1, 1, 4, 1, 4, 4, 1, 4],
                        label=1, id=2, group=2,
                        attributes={ 'is_crowd': False }),
                ], attributes={'id': 1}
            ),
        ], categories=[str(i) for i in range(10)])

        with TestDir() as test_dir:
            self._test_save_and_load(source_dataset,
                 partial(CocoInstancesConverter.convert, crop_covered=True),
                 test_dir, target_dataset=target_dataset)

예제 #17

0

파일 보기

def _export(dst_file, instance_data, save_images=False):
    dataset = Dataset.from_extractors(GetCVATDataExtractor(
        instance_data, include_images=save_images),
                                      env=dm_env)
    with TemporaryDirectory() as temp_dir:
        dataset.export(temp_dir,
                       'voc',
                       save_images=save_images,
                       label_map='source')

        make_zip_archive(temp_dir, dst_file)

예제 #18

0

파일 보기

    def test_can_import(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id=1,
                image=np.ones((16, 16, 3)),
                annotations=[
                    Bbox(0, 4, 4, 8, label=2, attributes={
                        'occluded': False,
                        'visibility': 1.0,
                        'ignored': False,
                    }),
                ]
            ),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(
                'label_' + str(label) for label in range(10)),
        })

        dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'mot_seq')

        compare_datasets(self, expected_dataset, dataset)

예제 #19

0

파일 보기

    def test_can_save_and_load_image_with_arbitrary_extension(self):
        dataset = Dataset.from_iterable([
            DatasetItem(id='a/1',
                        image=Image(path='a/1.JPEG', data=np.zeros(
                            (4, 3, 3)))),
            DatasetItem(id='b/c/d/2',
                        image=Image(path='b/c/d/2.bmp',
                                    data=np.zeros((3, 4, 3)))),
        ],
                                        categories=[])

        with TestDir() as test_dir:
            OpenImagesConverter.convert(dataset, test_dir, save_images=True)

            parsed_dataset = Dataset.import_from(test_dir, 'open_images')

            compare_datasets(self,
                             dataset,
                             parsed_dataset,
                             require_images=True)

예제 #20

0

파일 보기

파일: test_dataset.py 프로젝트: ITBOX-ITBOY/cvat-centos7-mysql

    def test_can_check_item_existence(self):
        dataset = Dataset.from_iterable(
            [DatasetItem(0, subset='a'),
             DatasetItem(1)])

        self.assertTrue(DatasetItem(0, subset='a') in dataset)
        self.assertFalse(DatasetItem(0, subset='b') in dataset)
        self.assertTrue((0, 'a') in dataset)
        self.assertFalse((0, 'b') in dataset)
        self.assertTrue(1 in dataset)
        self.assertFalse(0 in dataset)

예제 #21

0

파일 보기

파일: test_dataset.py 프로젝트: ITBOX-ITBOY/cvat-centos7-mysql

    def test_binds_on_save(self):
        dataset = Dataset.from_iterable([DatasetItem(1)])

        self.assertFalse(dataset.is_bound)

        with TestDir() as test_dir:
            dataset.save(test_dir)

            self.assertTrue(dataset.is_bound)
            self.assertEqual(dataset.data_path, test_dir)
            self.assertEqual(dataset.format, DEFAULT_FORMAT)

예제 #22

0

파일 보기

파일: test_dataset.py 프로젝트: ITBOX-ITBOY/cvat-centos7-mysql

    def test_can_track_modifications_on_addition(self):
        dataset = Dataset.from_iterable([
            DatasetItem(1),
            DatasetItem(2),
        ])

        self.assertFalse(dataset.is_modified)

        dataset.put(DatasetItem(3, subset='a'))

        self.assertTrue(dataset.is_modified)

예제 #23

0

파일 보기

파일: test_dataset.py 프로젝트: ITBOX-ITBOY/cvat-centos7-mysql

    def test_can_track_modifications_on_removal(self):
        dataset = Dataset.from_iterable([
            DatasetItem(1),
            DatasetItem(2),
        ])

        self.assertFalse(dataset.is_modified)

        dataset.remove(1)

        self.assertTrue(dataset.is_modified)

예제 #24

0

파일 보기

def _export(dst_file, instance_data, save_images=False):
    dataset = Dataset.from_extractors(GetCVATDataExtractor(
        instance_data=instance_data, include_images=save_images,
            dimension=DimensionType.DIM_3D), env=dm_env)

    if not save_images:
        dataset.transform(DeleteImagePath)
    with TemporaryDirectory() as tmp_dir:
        dataset.export(tmp_dir, 'datumaro', save_images=save_images)

        make_zip_archive(tmp_dir, dst_file)

예제 #25

0

파일 보기

    def test_can_save_and_load_without_saving_images(self):
        source_dataset = Dataset.from_iterable(
            [
                DatasetItem(id=0, subset='train', annotations=[Label(0)]),
                DatasetItem(id=1, subset='train', annotations=[Label(1)]),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable(
                    str(label) for label in range(10)),
            })

        with TestDir() as test_dir:
            MnistConverter.convert(source_dataset, test_dir, save_images=False)
            parsed_dataset = Dataset.import_from(test_dir, 'mnist')

            compare_datasets(self,
                             source_dataset,
                             parsed_dataset,
                             require_images=True)

예제 #26

0

파일 보기

    def test_can_import_with_meta_file(self):
        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(id='img0001',
                            subset='test',
                            image=np.ones((5, 5, 3)),
                            annotations=[Bbox(10, 5, 10, 2, label=0)]),
                DatasetItem(id='img0002',
                            subset='test',
                            image=np.ones((5, 5, 3)),
                            annotations=[
                                Bbox(11.5, 12, 10.2, 20.5, label=1),
                            ])
            ],
            categories=['helmet', 'person'])

        dataset = Dataset.import_from(DUMMY_DATASET_DIR_WITH_META_FILE,
                                      'vott_csv')

        compare_datasets(self, expected_dataset, dataset, require_images=True)

예제 #27

0

파일 보기

파일: test_cvat_format.py 프로젝트: openvinotoolkit/datumaro

    def test_relative_paths(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='1', image=np.ones((4, 2, 3))),
            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))),
            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))),
        ])

        target_dataset = Dataset.from_iterable([
            DatasetItem(id='1', image=np.ones((4, 2, 3)),
                attributes={'frame': 0}),
            DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)),
                attributes={'frame': 1}),
            DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)),
                attributes={'frame': 2}),
        ], categories=[])

        with TestDir() as test_dir:
            self._test_save_and_load(source_dataset,
                partial(CvatConverter.convert, save_images=True), test_dir,
                target_dataset=target_dataset, require_images=True)

예제 #28

0

파일 보기

파일: coco.py 프로젝트: gost-sniper/cvat

def _export(dst_file, task_data, save_images=False):
    dataset = Dataset.from_extractors(CvatTaskDataExtractor(
        task_data, include_images=save_images),
                                      env=dm_env)
    with TemporaryDirectory() as temp_dir:
        dataset.export(temp_dir,
                       'coco_instances',
                       save_images=save_images,
                       merge_images=True)

        make_zip_archive(temp_dir, dst_file)

예제 #29

0

파일 보기

파일: test_transform.py 프로젝트: openvinotoolkit/datumaro

    def test_can_transform_dataset_inplace(self):
        test_dir = scope_add(TestDir())
        Dataset.from_iterable([
            DatasetItem(1, annotations=[Label(0)]),
            DatasetItem(2, annotations=[Label(1)]),
        ],
                              categories=['a', 'b']).export(test_dir, 'coco')

        run(self, 'transform', '-t', 'remap_labels', '--overwrite',
            test_dir + ':coco', '--', '-l', 'a:cat', '-l', 'b:dog')

        expected_dataset = Dataset.from_iterable([
            DatasetItem(1, annotations=[Label(0, id=1, group=1)]),
            DatasetItem(2, annotations=[Label(1, id=2, group=2)]),
        ],
                                                 categories=['cat', 'dog'])
        compare_datasets(self,
                         expected_dataset,
                         Dataset.import_from(test_dir, 'coco'),
                         ignored_attrs='*')

예제 #30

0

파일 보기

    def test_can_save_and_load_image_with_arbitrary_extension(self):
        expected = Dataset.from_iterable([
            DatasetItem(id='c/0001_c1s1_000000_00', image=Image(
                    path='c/0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))),
                attributes={'camera_id': 0, 'person_id': '0001', 'track_id': 1,
                    'frame_id': 0, 'bbox_id': 0, 'query': False}
            ),
            DatasetItem(id='a/b/0002_c2s2_000001_00', image=Image(
                    path='a/b/0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))),
                attributes={'camera_id': 1, 'person_id': '0002', 'track_id': 2,
                    'frame_id': 1, 'bbox_id': 0, 'query': False}
            ),
        ])

        with TestDir() as test_dir:
            Market1501Converter.convert(expected, test_dir, save_images=True)
            parsed_dataset = Dataset.import_from(test_dir, 'market1501')

            compare_datasets(self, expected, parsed_dataset,
                require_images=True)