def is_project_path(value): if value: try: Project.load(value) return True except Exception: pass return False
def main(args=None): parser = build_parser() args = parser.parse_args(args) project_path = args.project_dir if is_project_path(project_path): project = Project.load(project_path) else: project = None try: args.target = target_selector( ProjectTarget(is_default=True, project=project), SourceTarget(project=project), ExternalDatasetTarget(), ImageTarget() )(args.target) if args.target[0] == TargetKinds.project: if is_project_path(args.target[1]): args.project_dir = osp.dirname(osp.abspath(args.target[1])) except argparse.ArgumentTypeError as e: print(e) parser.print_help() return 1 return process_command(args.target, args.params, args)
def test_can_do_transform_with_custom_model(self): class TestExtractorSrc(Extractor): def __iter__(self): for i in range(2): yield DatasetItem(id=i, image=np.ones([2, 2, 3]) * i, annotations=[Label(i)]) class TestLauncher(Launcher): def launch(self, inputs): for inp in inputs: yield [Label(inp[0, 0, 0])] class TestConverter(Converter): def __call__(self, extractor, save_dir): for item in extractor: with open(osp.join(save_dir, '%s.txt' % item.id), 'w') as f: f.write(str(item.annotations[0].label) + '\n') class TestExtractorDst(Extractor): def __init__(self, url): super().__init__() self.items = [ osp.join(url, p) for p in sorted(os.listdir(url)) ] def __iter__(self): for path in self.items: with open(path, 'r') as f: index = osp.splitext(osp.basename(path))[0] label = int(f.readline().strip()) yield DatasetItem(id=index, annotations=[Label(label)]) model_name = 'model' launcher_name = 'custom_launcher' extractor_name = 'custom_extractor' project = Project() project.env.launchers.register(launcher_name, TestLauncher) project.env.extractors.register(extractor_name, TestExtractorSrc) project.env.converters.register(extractor_name, TestConverter) project.add_model(model_name, {'launcher': launcher_name}) project.add_source('source', {'format': extractor_name}) with TestDir() as test_dir: project.make_dataset().apply_model(model=model_name, save_dir=test_dir) result = Project.load(test_dir) result.env.extractors.register(extractor_name, TestExtractorDst) it = iter(result.make_dataset()) item1 = next(it) item2 = next(it) self.assertEqual(0, item1.annotations[0].label) self.assertEqual(1, item2.annotations[0].label)
def test_can_save_and_load_own_dataset(self): with TestDir() as test_dir: src_project = Project() src_dataset = src_project.make_dataset() item = DatasetItem(id=1) src_dataset.put(item) src_dataset.save(test_dir) loaded_project = Project.load(test_dir) loaded_dataset = loaded_project.make_dataset() self.assertEqual(list(src_dataset), list(loaded_dataset))
def loadDatasetFromProjFolder(self): if not hasattr(self, 'projectsPathListFromDataset'): importArgs = ImportArg() self.importDataset(importArgs) projectFolders = [path for path in self.projectsPathListFromDataset] projects = [ Project.load(projectFolder) for projectFolder in projectFolders ] self.datasets = [ customDataset(project.make_dataset()) for project in projects ] return self
def test_can_dump_added_model(self): model_name = 'model' project = Project() saved = Model({'launcher': 'name'}) project.add_model(model_name, saved) with TestDir() as test_dir: project.save(test_dir) loaded = Project.load(test_dir) loaded = loaded.get_model(model_name) self.assertEqual(saved, loaded)
def test_added_source_can_be_dumped(self): source_name = 'source' origin = Source({ 'url': 'path', }) project = Project() project.add_source(source_name, origin) with TestDir() as test_dir: project.save(test_dir) loaded = Project.load(test_dir) loaded = loaded.get_source(source_name) self.assertEqual(origin, loaded)
def _remote_export(self, save_dir, server_url=None): if self._dataset is None: self._init_dataset() os.makedirs(save_dir, exist_ok=True) self._dataset.save(save_dir=save_dir, save_images=False, merge=True) exported_project = Project.load(save_dir) source_name = 'task_%s_images' % self._db_task.id exported_project.add_source(source_name, { 'format': _TASK_IMAGES_REMOTE_EXTRACTOR, }) self._remote_image_converter( osp.join(save_dir, exported_project.local_source_dir(source_name)), server_url=server_url) exported_project.save() templates_dir = osp.join(osp.dirname(__file__), 'export_templates') target_dir = exported_project.config.project_dir os.makedirs(target_dir, exist_ok=True) shutil.copyfile( osp.join(templates_dir, 'README.md'), osp.join(target_dir, 'README.md')) templates_dir = osp.join(templates_dir, 'plugins') target_dir = osp.join(target_dir, exported_project.config.env_dir, exported_project.config.plugins_dir) os.makedirs(target_dir, exist_ok=True) shutil.copyfile( osp.join(templates_dir, _TASK_IMAGES_REMOTE_EXTRACTOR + '.py'), osp.join(target_dir, _TASK_IMAGES_REMOTE_EXTRACTOR + '.py')) # NOTE: put datumaro component to the archive so that # it was available to the user shutil.copytree(_DATUMARO_REPO_PATH, osp.join(save_dir, 'datumaro'), ignore=lambda src, names: ['__pycache__'] + [ n for n in names if sum([int(n.endswith(ext)) for ext in ['.pyx', '.pyo', '.pyd', '.pyc']]) ]) # include CVAT CLI module also cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') os.makedirs(cvat_utils_dst_dir) shutil.copytree(osp.join(_CVAT_ROOT_DIR, 'utils', 'cli'), osp.join(cvat_utils_dst_dir, 'cli'))
def _export(self, instance_data, save_dir, save_images=False): dataset = GetCVATDataExtractor(instance_data, include_images=save_images) db_instance = instance_data.db_project if isinstance( instance_data, ProjectData) else instance_data.db_task dm_env.converters.get('datumaro_project').convert( dataset, save_dir=save_dir, save_images=save_images, project_config={ 'project_name': db_instance.name, }) project = Project.load(save_dir) target_dir = project.config.project_dir os.makedirs(target_dir, exist_ok=True) shutil.copyfile(osp.join(self._TEMPLATES_DIR, 'README.md'), osp.join(target_dir, 'README.md')) if not save_images: # add remote links to images source_name = '{}_{}_images'.format( 'project' if isinstance(instance_data, ProjectData) else 'task', db_instance.id, ) project.add_source(source_name, { 'format': self._REMOTE_IMAGES_EXTRACTOR, }) self._save_image_info( osp.join(save_dir, project.local_source_dir(source_name)), instance_data) project.save() templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins') target_dir = osp.join(project.config.project_dir, project.config.env_dir, project.config.plugins_dir) os.makedirs(target_dir, exist_ok=True) shutil.copyfile( osp.join(templates_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'), osp.join(target_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py')) # Make CVAT CLI module available to the user cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') os.makedirs(cvat_utils_dst_dir) shutil.copytree(osp.join(BASE_DIR, 'utils', 'cli'), osp.join(cvat_utils_dst_dir, 'cli'))
def reindex(dataset_path, project_dir, start_index): """datum project transform -p {project_dir} -t reindex -- -s {start_index}""" print(f"Reindexing {dataset_path}") project = Project.load(project_dir) dataset = project.make_dataset() tmp_project_dir = project_path(dataset_path, ".reindexed") cleanup_paths.append(tmp_project_dir) reindex_xfrm = dataset.env.transforms.get("reindex") dataset.transform_project(method=reindex_xfrm, save_dir=tmp_project_dir, start=start_index) return tmp_project_dir, len(dataset)
def test_project_generate(self): src_config = Config({ 'project_name': 'test_project', 'format_version': 1, }) with TestDir() as test_dir: project_path = test_dir Project.generate(project_path, src_config) self.assertTrue(osp.isdir(project_path)) result_config = Project.load(project_path).config self.assertEqual(src_config.project_name, result_config.project_name) self.assertEqual(src_config.format_version, result_config.format_version)
def _export(self, task_data, save_dir, save_images=False): dataset = CvatTaskDataExtractor(task_data, include_images=save_images) converter = dm_env.make_converter('datumaro_project', save_images=save_images, config={ 'project_name': task_data.db_task.name, } ) converter(dataset, save_dir=save_dir) project = Project.load(save_dir) target_dir = project.config.project_dir os.makedirs(target_dir, exist_ok=True) shutil.copyfile( osp.join(self._TEMPLATES_DIR, 'README.md'), osp.join(target_dir, 'README.md')) if not save_images: # add remote links to images source_name = 'task_%s_images' % task_data.db_task.id project.add_source(source_name, { 'format': self._REMOTE_IMAGES_EXTRACTOR, }) self._save_image_info( osp.join(save_dir, project.local_source_dir(source_name)), task_data) project.save() templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins') target_dir = osp.join(project.config.project_dir, project.config.env_dir, project.config.plugins_dir) os.makedirs(target_dir, exist_ok=True) shutil.copyfile( osp.join(templates_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'), osp.join(target_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py')) # Make Datumaro and CVAT CLI modules available to the user shutil.copytree(DATUMARO_PATH, osp.join(save_dir, 'datumaro'), ignore=lambda src, names: ['__pycache__'] + [ n for n in names if sum([int(n.endswith(ext)) for ext in ['.pyx', '.pyo', '.pyd', '.pyc']]) ]) cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') os.makedirs(cvat_utils_dst_dir) shutil.copytree(osp.join(BASE_DIR, 'utils', 'cli'), osp.join(cvat_utils_dst_dir, 'cli'))
def merge(cleaned_datasets, output, save_images=False): """datum merge -o {output} {project_dirs}""" print(f"Merging datasets to {output}/") projects = [Project.load(p) for p in cleaned_datasets] datasets = [p.make_dataset() for p in projects] merged_project_dir = Path(output) # perform the merge merge_config = IntersectMerge.Conf( pairwise_dist=0.25, groups=[], output_conf_thresh=0.0, quorum=0, ) merged_dataset = IntersectMerge(conf=merge_config)(datasets) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) merged_dataset.save(save_dir=merged_project_dir, save_images=save_images)
def filter_empty_frames(dataset_path, project_dir): """datum project filter -p {project_dir} -m i+a '//*'""" print(f"Removing empty frames from {dataset_path}") project = Project.load(project_dir) dataset = project.make_dataset() tmp_project_dir = project_path(dataset_path, ".filtered") cleanup_paths.append(tmp_project_dir) dataset.filter_project( save_dir=tmp_project_dir, filter_expr="//*", filter_annotations=True, remove_empty=True, ) # catch empty datasets early? if not tmp_project_dir.joinpath("dataset", "annotations", "default.json").exists(): print(f"No annotated frames found in {dataset_path}, dropping dataset") return None return tmp_project_dir
def _load(self): self._project = Project.load(self._project_dir) self._project.env.extractors.register(_TASK_IMAGES_EXTRACTOR, lambda url: CvatImagesExtractor(url, FrameProvider(self._db_task.data)))
def load_project(project_dir): return Project.load(project_dir)
def load_project(project_dir, project_filename=None): if project_filename: project_dir = osp.join(project_dir, project_filename) return Project.load(project_dir)
def test_can_do_transform_with_custom_model(self): class TestExtractorSrc(Extractor): def __init__(self, url, n=2): super().__init__(length=n) self.n = n def __iter__(self): for i in range(self.n): yield DatasetItem(id=i, subset='train', image=i, annotations=[LabelObject(i)]) def subsets(self): return ['train'] class TestLauncher(Launcher): def __init__(self, **kwargs): pass def launch(self, inputs): for inp in inputs: yield [LabelObject(inp)] class TestConverter(Converter): def __call__(self, extractor, save_dir): for item in extractor: with open(osp.join(save_dir, '%s.txt' % item.id), 'w+') as f: f.write(str(item.subset) + '\n') f.write(str(item.annotations[0].label) + '\n') class TestExtractorDst(Extractor): def __init__(self, url): super().__init__() self.items = [ osp.join(url, p) for p in sorted(os.listdir(url)) ] def __iter__(self): for path in self.items: with open(path, 'r') as f: index = osp.splitext(osp.basename(path))[0] subset = f.readline()[:-1] label = int(f.readline()[:-1]) assert (subset == 'train') yield DatasetItem(id=index, subset=subset, annotations=[LabelObject(label)]) def __len__(self): return len(self.items) def subsets(self): return ['train'] model_name = 'model' launcher_name = 'custom_launcher' extractor_name = 'custom_extractor' project = Project() project.env.launchers.register(launcher_name, TestLauncher) project.env.extractors.register(extractor_name, TestExtractorSrc) project.env.converters.register(extractor_name, TestConverter) project.add_model(model_name, {'launcher': launcher_name}) project.add_source('source', {'format': extractor_name}) with TestDir() as test_dir: project.make_dataset().transform(model_name, test_dir.path) result = Project.load(test_dir.path) result.env.extractors.register(extractor_name, TestExtractorDst) it = iter(result.make_dataset()) item1 = next(it) item2 = next(it) self.assertEqual(0, item1.annotations[0].label) self.assertEqual(1, item2.annotations[0].label)
def explain_command(args): project_path = args.project_dir if is_project_path(project_path): project = Project.load(project_path) else: project = None args.target = target_selector( ProjectTarget(is_default=True, project=project), SourceTarget(project=project), ImageTarget())(args.target) if args.target[0] == TargetKinds.project: if is_project_path(args.target[1]): args.project_dir = osp.dirname(osp.abspath(args.target[1])) import cv2 from matplotlib import cm project = load_project(args.project_dir) model = project.make_executable_model(args.model) if str(args.algorithm).lower() != 'rise': raise NotImplementedError() from datumaro.components.algorithms.rise import RISE rise = RISE(model, max_samples=args.max_samples, mask_width=args.mask_width, mask_height=args.mask_height, prob=args.prob, iou_thresh=args.iou_thresh, nms_thresh=args.nms_iou_thresh, det_conf_thresh=args.det_conf_thresh, batch_size=args.batch_size) if args.target[0] == TargetKinds.image: image_path = args.target[1] image = load_image(image_path) log.info("Running inference explanation for '%s'" % image_path) heatmap_iter = rise.apply(image, progressive=args.display) image = image / 255.0 file_name = osp.splitext(osp.basename(image_path))[0] if args.display: for i, heatmaps in enumerate(heatmap_iter): for j, heatmap in enumerate(heatmaps): hm_painted = cm.jet(heatmap)[:, :, 2::-1] disp = (image + hm_painted) / 2 cv2.imshow('heatmap-%s' % j, hm_painted) cv2.imshow(file_name + '-heatmap-%s' % j, disp) cv2.waitKey(10) print("Iter", i, "of", args.max_samples, end='\r') else: heatmaps = next(heatmap_iter) if args.save_dir is not None: log.info("Saving inference heatmaps at '%s'" % args.save_dir) os.makedirs(args.save_dir, exist_ok=True) for j, heatmap in enumerate(heatmaps): save_path = osp.join(args.save_dir, file_name + '-heatmap-%s.png' % j) save_image(save_path, heatmap * 255.0) else: for j, heatmap in enumerate(heatmaps): disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2 cv2.imshow(file_name + '-heatmap-%s' % j, disp) cv2.waitKey(0) elif args.target[0] == TargetKinds.source or \ args.target[0] == TargetKinds.project: if args.target[0] == TargetKinds.source: source_name = args.target[1] dataset = project.make_source_project(source_name).make_dataset() log.info("Running inference explanation for '%s'" % source_name) else: project_name = project.config.project_name dataset = project.make_dataset() log.info("Running inference explanation for '%s'" % project_name) for item in dataset: image = item.image.data if image is None: log.warn( "Dataset item %s does not have image data. Skipping." % \ (item.id)) continue heatmap_iter = rise.apply(image) image = image / 255.0 heatmaps = next(heatmap_iter) if args.save_dir is not None: log.info("Saving inference heatmaps to '%s'" % args.save_dir) os.makedirs(args.save_dir, exist_ok=True) for j, heatmap in enumerate(heatmaps): save_image(osp.join(args.save_dir, item.id + '-heatmap-%s.png' % j), heatmap * 255.0, create_dir=True) if not args.save_dir or args.display: for j, heatmap in enumerate(heatmaps): disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2 cv2.imshow(item.id + '-heatmap-%s' % j, disp) cv2.waitKey(0) else: raise NotImplementedError() return 0
def _load(self): self._project = Project.load(self._project_dir) self._project.env.extractors.register(_TASK_IMAGES_EXTRACTOR, CvatImagesDirExtractor)