def test_can_switch_eager_and_lazy_with_cm_global(self): iter_called = False class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True return iter([ DatasetItem(1), DatasetItem(2), ]) with eager_mode(): Dataset.from_extractors(TestExtractor()) self.assertTrue(iter_called)
def test_can_chain_lazy_tranforms(self): iter_called = False class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True return iter([ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), ]) dataset = Dataset.from_extractors(TestExtractor()) class TestTransform(Transform): def transform_item(self, item): return self.wrap_item(item, id=int(item.id) + 1) dataset.transform(TestTransform) dataset.transform(TestTransform) self.assertFalse(iter_called) self.assertEqual(4, len(dataset)) self.assertEqual(3, int(min(int(item.id) for item in dataset))) self.assertTrue(iter_called)
def test_can_do_lazy_put_and_remove(self): iter_called = False class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True return iter([ DatasetItem(1), DatasetItem(2), ]) dataset = Dataset.from_extractors(TestExtractor()) self.assertFalse(dataset.is_cache_initialized) dataset.put(DatasetItem(3)) dataset.remove(DatasetItem(1)) self.assertFalse(dataset.is_cache_initialized) self.assertFalse(iter_called) dataset.init_cache() self.assertTrue(dataset.is_cache_initialized) self.assertTrue(iter_called)
def test_can_join_annotations(self): a = Dataset.from_iterable([ DatasetItem(id=1, subset='train', annotations=[ Label(1, id=3), Label(2, attributes={'x': 1}), ]) ], categories=['a', 'b', 'c', 'd']) b = Dataset.from_iterable([ DatasetItem(id=1, subset='train', annotations=[ Label(2, attributes={'x': 1}), Label(3, id=4), ]) ], categories=['a', 'b', 'c', 'd']) expected = Dataset.from_iterable([ DatasetItem(id=1, subset='train', annotations=[ Label(1, id=3), Label(2, attributes={'x': 1}), Label(3, id=4), ]) ], categories=['a', 'b', 'c', 'd']) merged = Dataset.from_extractors(a, b) compare_datasets(self, expected, merged)
def _export_recognition(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(LabelToCaption) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def test_can_join_datasets(self): s1 = Dataset.from_iterable([DatasetItem(0), DatasetItem(1)]) s2 = Dataset.from_iterable([DatasetItem(1), DatasetItem(2)]) dataset = Dataset.from_extractors(s1, s2) self.assertEqual(3, len(dataset))
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'vgg_face2', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def _save_branch_project(self, extractor, save_dir=None): if not isinstance(extractor, Dataset): extractor = Dataset.from_extractors( extractor ) # apply lazy transforms to avoid repeating traversals # NOTE: probably this function should be in the ViewModel layer save_dir = osp.abspath(save_dir) if save_dir: dst_project = Project() else: if not self.config.project_dir: raise ValueError("Either a save directory or a project " "directory should be specified") save_dir = self.config.project_dir dst_project = Project(Config(self.config)) dst_project.config.remove('project_dir') dst_project.config.remove('sources') dst_project.config.project_name = osp.basename(save_dir) dst_dataset = dst_project.make_dataset() dst_dataset._categories = extractor.categories() dst_dataset.update(extractor) dst_dataset.save(save_dir=save_dir, merge=True)
def test_can_do_partial_caching_in_get_when_redefined(self): iter_called = 0 get_called = 0 class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called += 1 return iter([ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), ]) def get(self, id, subset=None): #pylint: disable=redefined-builtin nonlocal get_called get_called += 1 return DatasetItem(id, subset=subset) dataset = Dataset.from_extractors(TestExtractor()) dataset.get(3) dataset.get(4) self.assertEqual(0, iter_called) self.assertEqual(2, get_called)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.transform(LabelAttrToAttr, 'market-1501') dataset.export(temp_dir, 'market1501', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def _export(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(CvatTaskDataExtractor( task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'wider_face', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data=instance_data, include_images=save_images), env=dm_env) if not save_images: dataset.transform(DeleteImagePath) with TemporaryDirectory() as tmp_dir: dataset.export(tmp_dir, 'datumaro', save_images=save_images) make_zip_archive(tmp_dir, dst_file)
def _export_images(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( task_data, include_images=save_images, format_type='sly_pointcloud', dimension=DimensionType.DIM_3D), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def _export_segmentation(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(CvatTaskDataExtractor( task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def test_can_join_datasets(self): s1 = Dataset.from_iterable([DatasetItem(0), DatasetItem(1)]) s2 = Dataset.from_iterable([DatasetItem(1), DatasetItem(2)]) expected = Dataset.from_iterable( [DatasetItem(0), DatasetItem(1), DatasetItem(2)]) actual = Dataset.from_extractors(s1, s2) compare_datasets(self, expected, actual)
def _export(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(CvatTaskDataExtractor( task_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'coco_instances', save_images=save_images, merge_images=True) make_zip_archive(temp_dir, dst_file)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'voc_segmentation', save_images=save_images, apply_colormap=True, label_map=make_colormap(instance_data)) make_zip_archive(temp_dir, dst_file)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform(KeepTracks) # can only export tracks dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'mots_png', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def _export(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( task_data, include_images=save_images), env=dm_env) dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') dataset.transform('merge_instance_segments') with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'open_images', save_images=save_images) make_zip_archive(temp_dir, dst_file)
def _export_images(dst_file, task_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( task_data, include_images=save_images, format_type="kitti_raw", dimension=DimensionType.DIM_3D), env=dm_env) with TemporaryDirectory() as temp_dir: dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True) make_zip_archive(temp_dir, dst_file)
def test_can_do_lazy_get_on_updated_item(self): iter_called = False class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True return iter([ DatasetItem(1), DatasetItem(2), ]) dataset = Dataset.from_extractors(TestExtractor()) dataset.put(DatasetItem(2)) self.assertTrue((2, '') in dataset) self.assertFalse(iter_called)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) dataset.transform('polygons_to_masks') dataset.transform('boxes_to_masks') dataset.transform('merge_instance_segments') label_map = make_colormap(instance_data) with TemporaryDirectory() as temp_dir: dataset.export( temp_dir, 'camvid', save_images=save_images, apply_colormap=True, label_map={label: label_map[label][0] for label in label_map}) make_zip_archive(temp_dir, dst_file)
def _export(dst_file, instance_data, save_images=False): dataset = Dataset.from_extractors(GetCVATDataExtractor( instance_data, include_images=save_images), env=dm_env) with TemporaryDirectory() as tmp_dir: dataset.transform(RotatedBoxesToPolygons) dataset.transform('polygons_to_masks') dataset.transform('merge_instance_segments') dataset.export(tmp_dir, format='kitti', label_map={ k: v[0] for k, v in make_colormap(instance_data).items() }, apply_colormap=True, save_images=save_images) make_zip_archive(tmp_dir, dst_file)
def test_create_from_extractors(self): class SrcExtractor1(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, subset='train', annotations=[ Bbox(1, 2, 3, 4), Label(4), ]), DatasetItem(id=1, subset='val', annotations=[ Label(4), ]), ]) class SrcExtractor2(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, subset='val', annotations=[ Label(5), ]), ]) class DstExtractor(Extractor): def __iter__(self): return iter([ DatasetItem(id=1, subset='train', annotations=[ Bbox(1, 2, 3, 4), Label(4), ]), DatasetItem(id=1, subset='val', annotations=[ Label(4), Label(5), ]), ]) dataset = Dataset.from_extractors(SrcExtractor1(), SrcExtractor2()) compare_datasets(self, DstExtractor(), dataset)
def test_cant_do_partial_caching_in_get_when_default(self): iter_called = 0 class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called += 1 return iter([ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), ]) dataset = Dataset.from_extractors(TestExtractor()) dataset.get(3) dataset.get(4) self.assertEqual(1, iter_called)
def test_can_switch_eager_and_lazy_with_cm_local(self): iter_called = False class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True return iter([ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), ]) dataset = Dataset.from_extractors(TestExtractor()) with eager_mode(dataset=dataset): dataset.select(lambda item: int(item.id) < 3) dataset.select(lambda item: int(item.id) < 2) self.assertTrue(iter_called)
def test_can_do_lazy_select(self): iter_called = False class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True return iter([ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), ]) dataset = Dataset.from_extractors(TestExtractor()) dataset.select(lambda item: int(item.id) < 3) dataset.select(lambda item: int(item.id) < 2) self.assertFalse(iter_called) self.assertEqual(1, len(dataset)) self.assertTrue(iter_called)
def export(self, save_dir: str, format, \ **kwargs): #pylint: disable=redefined-builtin dataset = Dataset.from_extractors(self, env=self.env) dataset.export(save_dir, format, **kwargs)
def test_can_save_and_load(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id='a', subset='train', annotations=[Label(0, attributes={'score': 0.7})]), DatasetItem(id='b', subset='train', image=np.zeros((8, 8, 3)), annotations=[ Label(1), Label(2, attributes={'score': 0}), Bbox(label=0, x=4, y=3, w=2, h=3), Bbox(label=1, x=2, y=3, w=6, h=1, group=1, attributes={ 'score': 0.7, 'occluded': True, 'truncated': False, 'is_group_of': True, 'is_depiction': False, 'is_inside': False, }), Mask(label=0, image=np.eye(8)), Mask(label=1, image=np.ones((8, 8)), group=1, attributes={ 'box_id': '00000000', 'predicted_iou': 0.75, }), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable([ '/m/0', ('/m/1', '/m/0'), '/m/2', ]), }) expected_dataset = Dataset.from_extractors(source_dataset) expected_dataset.put( DatasetItem( id='b', subset='train', image=np.zeros((8, 8, 3)), annotations=[ # the converter assumes that annotations without a score # have a score of 100% Label(1, attributes={'score': 1}), Label(2, attributes={'score': 0}), # Box group numbers are reassigned sequentially. Bbox(label=0, x=4, y=3, w=2, h=3, group=1, attributes={'score': 1}), Bbox(label=1, x=2, y=3, w=6, h=1, group=2, attributes={ 'score': 0.7, 'occluded': True, 'truncated': False, 'is_group_of': True, 'is_depiction': False, 'is_inside': False, }), # Box IDs are autogenerated for masks that don't have them. # Group numbers are assigned to match the corresponding boxes, # if any. Mask(label=0, image=np.eye(8), attributes={ 'box_id': '00000001', }), Mask(label=1, image=np.ones((8, 8)), group=2, attributes={ 'box_id': '00000000', 'predicted_iou': 0.75, }), ]), ) with TestDir() as test_dir: OpenImagesConverter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'open_images') compare_datasets(self, expected_dataset, parsed_dataset, require_images=True)
def test_cant_join_different_categories(self): s1 = Dataset.from_iterable([], categories=['a', 'b']) s2 = Dataset.from_iterable([], categories=['b', 'a']) with self.assertRaisesRegex(Exception, "different categories"): Dataset.from_extractors(s1, s2)