Example #1
0
    def test_can_reiterate_sequence(self):
        source = Dataset.from_iterable([
            DatasetItem('1', subset='a', annotations=[Label(0),
                                                      Label(1)]),
            DatasetItem('2', subset='a', annotations=[Label(1)]),
            DatasetItem('3', subset='a', annotations=[Label(2)]),
            DatasetItem('4', subset='a', annotations=[Label(1),
                                                      Label(2)]),
            DatasetItem('5', subset='b', annotations=[Label(0)]),
            DatasetItem('6', subset='b', annotations=[Label(0),
                                                      Label(2)]),
            DatasetItem('7', subset='b', annotations=[Label(1),
                                                      Label(2)]),
            DatasetItem('8', subset='b', annotations=[Label(2)]),
        ],
                                       categories=['a', 'b', 'c'])

        transformed = LabelRandomSampler(source, count=2)

        actual1 = Dataset.from_extractors(transformed)
        actual1.init_cache()

        actual2 = Dataset.from_extractors(transformed)
        actual2.init_cache()

        compare_datasets_strict(self, actual1, actual2)
Example #2
0
    def test_inplace_save_writes_only_updated_data_with_direct_changes(self):
        expected = Dataset.from_iterable([
            DatasetItem(1, subset='a'),
            DatasetItem(2, subset='a', image=np.ones((3, 2, 3))),
            DatasetItem(2, subset='b'),
        ])

        with TestDir() as path:
            # generate initial dataset
            dataset = Dataset.from_iterable([
                # modified subset
                DatasetItem(1, subset='a'),

                # unmodified subset
                DatasetItem(2, subset='b'),

                # removed subset
                DatasetItem(3, subset='c', image=np.ones((2, 2, 3))),
            ])
            dataset.save(path, save_images=True)

            dataset.put(DatasetItem(2, subset='a', image=np.ones((3, 2, 3))))
            dataset.remove(3, 'c')
            dataset.save(save_images=True)

            self.assertEqual({'a.json', 'b.json'},
                             set(os.listdir(osp.join(path, 'annotations'))))
            self.assertEqual({'2.jpg'},
                             set(os.listdir(osp.join(path, 'images', 'a'))))
            compare_datasets_strict(self, expected, Dataset.load(path))
Example #3
0
    def test_can_change_sequence(self):
        source = self._make_dataset({'a': 7, 'b': 3})

        actual1 = RandomSampler(source, 5, seed=1)
        actual2 = RandomSampler(source, 5, seed=2)

        with self.assertRaises(AssertionError):
            compare_datasets_strict(self, actual1, actual2)
Example #4
0
    def test_can_reproduce_sequence(self):
        source = self._make_dataset({'a': 7, 'b': 3})

        seed = 42
        actual1 = RandomSampler(source, 5, seed=seed)
        actual2 = RandomSampler(source, 5, seed=seed)

        compare_datasets_strict(self, actual1, actual2)
Example #5
0
    def test_inplace_save_writes_only_updated_data_with_transforms(self):
        with TestDir() as path:
            expected = Dataset.from_iterable([
                DatasetItem(2, subset='test'),
                DatasetItem(3, subset='train', image=np.ones((2, 2, 3))),
                DatasetItem(4, subset='train', image=np.ones((2, 3, 3))),
                DatasetItem(5,
                            subset='test',
                            point_cloud=osp.join(path, 'point_clouds', 'test',
                                                 '5.pcd'),
                            related_images=[
                                Image(data=np.ones((3, 4, 3)),
                                      path=osp.join(path, 'test', '5',
                                                    'image_0.jpg')),
                                osp.join(path, 'test', '5', 'a', '5.png'),
                            ]),
            ])
            dataset = Dataset.from_iterable([
                DatasetItem(1, subset='a'),
                DatasetItem(2, subset='b'),
                DatasetItem(3, subset='c', image=np.ones((2, 2, 3))),
                DatasetItem(4, subset='d', image=np.ones((2, 3, 3))),
                DatasetItem(5,
                            subset='e',
                            point_cloud='5.pcd',
                            related_images=[
                                np.ones((3, 4, 3)),
                                'a/5.png',
                            ]),
            ])

            dataset.save(path, save_images=True)

            dataset.filter('/item[id >= 2]')
            dataset.transform('random_split',
                              splits=(('train', 0.5), ('test', 0.5)),
                              seed=42)
            dataset.save(save_images=True)

            self.assertEqual(
                {'images', 'annotations', 'point_clouds', 'related_images'},
                set(os.listdir(path)))
            self.assertEqual({'train.json', 'test.json'},
                             set(os.listdir(osp.join(path, 'annotations'))))
            self.assertEqual({'3.jpg', '4.jpg'},
                             set(os.listdir(osp.join(path, 'images',
                                                     'train'))))
            self.assertEqual({'train', 'c', 'd'},
                             set(os.listdir(osp.join(path, 'images'))))
            self.assertEqual(set(),
                             set(os.listdir(osp.join(path, 'images', 'c'))))
            self.assertEqual(set(),
                             set(os.listdir(osp.join(path, 'images', 'd'))))
            self.assertEqual(
                {'image_0.jpg'},
                set(os.listdir(osp.join(path, 'related_images', 'test', '5'))))
            compare_datasets_strict(self, expected, Dataset.load(path))
Example #6
0
    def test_can_sample_when_subset_selected(self):
        source = self._make_dataset({'a': 7, 'b': 3})

        s = 'a'
        for k in [5, 7, 15]:
            with self.subTest(k=k, s=s):
                actual = RandomSampler(source, k, subset=s)

                self.assertEqual(min(k, len(source.get_subset(s))),
                                 len(actual.get_subset(s)))
                compare_datasets_strict(self, source.get_subset('b'),
                                        actual.get_subset('b'))
Example #7
0
    def test_can_pickle(self):
        formats = [
            'voc',
            'voc_classification',
            'voc_detection',
            'voc_action',
            'voc_layout',
            'voc_segmentation'
        ]

        for fmt in formats:
            with self.subTest(fmt=fmt):
                source = Dataset.import_from(DUMMY_DATASET_DIR, format=fmt)

                parsed = pickle.loads(pickle.dumps(source)) # nosec

                compare_datasets_strict(self, source, parsed)
    def _test_save_and_load(self,
                            source_dataset,
                            converter,
                            test_dir,
                            target_dataset=None,
                            importer_args=None):
        converter(source_dataset, test_dir)

        if importer_args is None:
            importer_args = {}
        parsed_dataset = Project.import_from(test_dir, 'datumaro',
                                             **importer_args).make_dataset()

        if target_dataset is None:
            target_dataset = source_dataset

        compare_datasets_strict(self,
                                expected=target_dataset,
                                actual=parsed_dataset)
Example #9
0
    def test_can_change_sequence(self):
        source = Dataset.from_iterable([
            DatasetItem('1', subset='a', annotations=[Label(0),
                                                      Label(1)]),
            DatasetItem('2', subset='a', annotations=[Label(1)]),
            DatasetItem('3', subset='a', annotations=[Label(2)]),
            DatasetItem('4', subset='a', annotations=[Label(1),
                                                      Label(2)]),
            DatasetItem('5', subset='b', annotations=[Label(0)]),
            DatasetItem('6', subset='b', annotations=[Label(0),
                                                      Label(2)]),
            DatasetItem('7', subset='b', annotations=[Label(1),
                                                      Label(2)]),
            DatasetItem('8', subset='b', annotations=[Label(2)]),
        ],
                                       categories=['a', 'b', 'c'])

        actual1 = LabelRandomSampler(source, count=2, seed=1)
        actual2 = LabelRandomSampler(source, count=2, seed=2)

        with self.assertRaises(AssertionError):
            compare_datasets_strict(self, actual1, actual2)
Example #10
0
    def test_can_reproduce_sequence(self):
        source = Dataset.from_iterable([
            DatasetItem('1', subset='a', annotations=[Label(0),
                                                      Label(1)]),
            DatasetItem('2', subset='a', annotations=[Label(1)]),
            DatasetItem('3', subset='a', annotations=[Label(2)]),
            DatasetItem('4', subset='a', annotations=[Label(1),
                                                      Label(2)]),
            DatasetItem('5', subset='b', annotations=[Label(0)]),
            DatasetItem('6', subset='b', annotations=[Label(0),
                                                      Label(2)]),
            DatasetItem('7', subset='b', annotations=[Label(1),
                                                      Label(2)]),
            DatasetItem('8', subset='b', annotations=[Label(2)]),
        ],
                                       categories=['a', 'b', 'c'])

        seed = 42
        actual1 = LabelRandomSampler(source, count=2, seed=seed)
        actual2 = LabelRandomSampler(source, count=2, seed=seed)

        compare_datasets_strict(self, actual1, actual2)
    def test_can_pickle(self):
        source = Dataset.import_from(DUMMY_DATASET_DIR, format='imagenet')

        parsed = pickle.loads(pickle.dumps(source)) # nosec

        compare_datasets_strict(self, source, parsed)