def test_can_reiterate_sequence(self): source = Dataset.from_iterable([ DatasetItem('1', subset='a', annotations=[Label(0), Label(1)]), DatasetItem('2', subset='a', annotations=[Label(1)]), DatasetItem('3', subset='a', annotations=[Label(2)]), DatasetItem('4', subset='a', annotations=[Label(1), Label(2)]), DatasetItem('5', subset='b', annotations=[Label(0)]), DatasetItem('6', subset='b', annotations=[Label(0), Label(2)]), DatasetItem('7', subset='b', annotations=[Label(1), Label(2)]), DatasetItem('8', subset='b', annotations=[Label(2)]), ], categories=['a', 'b', 'c']) transformed = LabelRandomSampler(source, count=2) actual1 = Dataset.from_extractors(transformed) actual1.init_cache() actual2 = Dataset.from_extractors(transformed) actual2.init_cache() compare_datasets_strict(self, actual1, actual2)
def test_inplace_save_writes_only_updated_data_with_direct_changes(self): expected = Dataset.from_iterable([ DatasetItem(1, subset='a'), DatasetItem(2, subset='a', image=np.ones((3, 2, 3))), DatasetItem(2, subset='b'), ]) with TestDir() as path: # generate initial dataset dataset = Dataset.from_iterable([ # modified subset DatasetItem(1, subset='a'), # unmodified subset DatasetItem(2, subset='b'), # removed subset DatasetItem(3, subset='c', image=np.ones((2, 2, 3))), ]) dataset.save(path, save_images=True) dataset.put(DatasetItem(2, subset='a', image=np.ones((3, 2, 3)))) dataset.remove(3, 'c') dataset.save(save_images=True) self.assertEqual({'a.json', 'b.json'}, set(os.listdir(osp.join(path, 'annotations')))) self.assertEqual({'2.jpg'}, set(os.listdir(osp.join(path, 'images', 'a')))) compare_datasets_strict(self, expected, Dataset.load(path))
def test_can_change_sequence(self): source = self._make_dataset({'a': 7, 'b': 3}) actual1 = RandomSampler(source, 5, seed=1) actual2 = RandomSampler(source, 5, seed=2) with self.assertRaises(AssertionError): compare_datasets_strict(self, actual1, actual2)
def test_can_reproduce_sequence(self): source = self._make_dataset({'a': 7, 'b': 3}) seed = 42 actual1 = RandomSampler(source, 5, seed=seed) actual2 = RandomSampler(source, 5, seed=seed) compare_datasets_strict(self, actual1, actual2)
def test_inplace_save_writes_only_updated_data_with_transforms(self): with TestDir() as path: expected = Dataset.from_iterable([ DatasetItem(2, subset='test'), DatasetItem(3, subset='train', image=np.ones((2, 2, 3))), DatasetItem(4, subset='train', image=np.ones((2, 3, 3))), DatasetItem(5, subset='test', point_cloud=osp.join(path, 'point_clouds', 'test', '5.pcd'), related_images=[ Image(data=np.ones((3, 4, 3)), path=osp.join(path, 'test', '5', 'image_0.jpg')), osp.join(path, 'test', '5', 'a', '5.png'), ]), ]) dataset = Dataset.from_iterable([ DatasetItem(1, subset='a'), DatasetItem(2, subset='b'), DatasetItem(3, subset='c', image=np.ones((2, 2, 3))), DatasetItem(4, subset='d', image=np.ones((2, 3, 3))), DatasetItem(5, subset='e', point_cloud='5.pcd', related_images=[ np.ones((3, 4, 3)), 'a/5.png', ]), ]) dataset.save(path, save_images=True) dataset.filter('/item[id >= 2]') dataset.transform('random_split', splits=(('train', 0.5), ('test', 0.5)), seed=42) dataset.save(save_images=True) self.assertEqual( {'images', 'annotations', 'point_clouds', 'related_images'}, set(os.listdir(path))) self.assertEqual({'train.json', 'test.json'}, set(os.listdir(osp.join(path, 'annotations')))) self.assertEqual({'3.jpg', '4.jpg'}, set(os.listdir(osp.join(path, 'images', 'train')))) self.assertEqual({'train', 'c', 'd'}, set(os.listdir(osp.join(path, 'images')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'images', 'c')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'images', 'd')))) self.assertEqual( {'image_0.jpg'}, set(os.listdir(osp.join(path, 'related_images', 'test', '5')))) compare_datasets_strict(self, expected, Dataset.load(path))
def test_can_sample_when_subset_selected(self): source = self._make_dataset({'a': 7, 'b': 3}) s = 'a' for k in [5, 7, 15]: with self.subTest(k=k, s=s): actual = RandomSampler(source, k, subset=s) self.assertEqual(min(k, len(source.get_subset(s))), len(actual.get_subset(s))) compare_datasets_strict(self, source.get_subset('b'), actual.get_subset('b'))
def test_can_pickle(self): formats = [ 'voc', 'voc_classification', 'voc_detection', 'voc_action', 'voc_layout', 'voc_segmentation' ] for fmt in formats: with self.subTest(fmt=fmt): source = Dataset.import_from(DUMMY_DATASET_DIR, format=fmt) parsed = pickle.loads(pickle.dumps(source)) # nosec compare_datasets_strict(self, source, parsed)
def _test_save_and_load(self, source_dataset, converter, test_dir, target_dataset=None, importer_args=None): converter(source_dataset, test_dir) if importer_args is None: importer_args = {} parsed_dataset = Project.import_from(test_dir, 'datumaro', **importer_args).make_dataset() if target_dataset is None: target_dataset = source_dataset compare_datasets_strict(self, expected=target_dataset, actual=parsed_dataset)
def test_can_change_sequence(self): source = Dataset.from_iterable([ DatasetItem('1', subset='a', annotations=[Label(0), Label(1)]), DatasetItem('2', subset='a', annotations=[Label(1)]), DatasetItem('3', subset='a', annotations=[Label(2)]), DatasetItem('4', subset='a', annotations=[Label(1), Label(2)]), DatasetItem('5', subset='b', annotations=[Label(0)]), DatasetItem('6', subset='b', annotations=[Label(0), Label(2)]), DatasetItem('7', subset='b', annotations=[Label(1), Label(2)]), DatasetItem('8', subset='b', annotations=[Label(2)]), ], categories=['a', 'b', 'c']) actual1 = LabelRandomSampler(source, count=2, seed=1) actual2 = LabelRandomSampler(source, count=2, seed=2) with self.assertRaises(AssertionError): compare_datasets_strict(self, actual1, actual2)
def test_can_reproduce_sequence(self): source = Dataset.from_iterable([ DatasetItem('1', subset='a', annotations=[Label(0), Label(1)]), DatasetItem('2', subset='a', annotations=[Label(1)]), DatasetItem('3', subset='a', annotations=[Label(2)]), DatasetItem('4', subset='a', annotations=[Label(1), Label(2)]), DatasetItem('5', subset='b', annotations=[Label(0)]), DatasetItem('6', subset='b', annotations=[Label(0), Label(2)]), DatasetItem('7', subset='b', annotations=[Label(1), Label(2)]), DatasetItem('8', subset='b', annotations=[Label(2)]), ], categories=['a', 'b', 'c']) seed = 42 actual1 = LabelRandomSampler(source, count=2, seed=seed) actual2 = LabelRandomSampler(source, count=2, seed=seed) compare_datasets_strict(self, actual1, actual2)
def test_can_pickle(self): source = Dataset.import_from(DUMMY_DATASET_DIR, format='imagenet') parsed = pickle.loads(pickle.dumps(source)) # nosec compare_datasets_strict(self, source, parsed)