def test_can_release_resources_on_checkout(self, fxt_sample_video): test_dir = scope_add(TestDir()) project = scope_add(Project.init(test_dir)) src_url = osp.join(test_dir, 'src') src = Dataset.from_iterable([ DatasetItem(1), ], categories=['a']) src.save(src_url) project.add_source(src_url, 'datumaro') project.commit('commit 1') project.remove_source('src', keep_data=False) project.import_source('src', osp.dirname(fxt_sample_video), 'video_frames', rpath=osp.basename(fxt_sample_video)) project.commit('commit 2') assert len(project.working_tree.make_dataset()) == 4 assert osp.isdir(osp.join(test_dir, 'src')) project.checkout('HEAD~1') assert len(project.working_tree.make_dataset()) == 1
def test_ambiguous_format(self): test_dir = scope_add(TestDir()) dataset_url = osp.join(test_dir, 'source') # create an ambiguous dataset by merging annotations from # datasets in different formats annotation_dir = osp.join(dataset_url, 'training/street') assets_dir = osp.join(osp.dirname(__file__), '../assets') os.makedirs(annotation_dir) for asset in [ 'ade20k2017_dataset/dataset/training/street/1_atr.txt', 'ade20k2020_dataset/dataset/training/street/1.json', ]: shutil.copy(osp.join(assets_dir, asset), annotation_dir) with self.subTest("no context"): with self.assertRaises(WrongRevpathError) as cm: parse_full_revpath(dataset_url) self.assertEqual({ProjectNotFoundError, MultipleFormatsMatchError}, set(type(e) for e in cm.exception.problems)) proj_dir = osp.join(test_dir, 'proj') proj = scope_add(Project.init(proj_dir)) with self.subTest("in context"): with self.assertRaises(WrongRevpathError) as cm: parse_full_revpath(dataset_url, proj) self.assertEqual({UnknownTargetError, MultipleFormatsMatchError}, set(type(e) for e in cm.exception.problems))
def validate_command(args): has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') if 1 < pos: raise argparse.ArgumentError( None, message="Expected no more than 1 target argument") else: pos = 1 args.target = (args._positionals[:pos] or ['project'])[0] args.extra_args = args._positionals[pos + has_sep:] show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help and args.project_dir: raise if project is not None: env = project.env else: env = Environment() try: validator_type = env.validators[args.task] except KeyError: raise CliException("Validator type '%s' is not found" % args.task) extra_args = validator_type.parse_cmdline(args.extra_args) dataset, target_project = parse_full_revpath(args.target, project) if target_project: scope_add(target_project) dst_file_name = f'validation-report' if args.subset_name is not None: dataset = dataset.get_subset(args.subset_name) dst_file_name += f'-{args.subset_name}' validator = validator_type(**extra_args) report = validator.validate(dataset) def _make_serializable(d): for key, val in list(d.items()): # tuple key to str if isinstance(key, tuple): d[str(key)] = val d.pop(key) if isinstance(val, dict): _make_serializable(val) _make_serializable(report) dst_file = generate_next_file_name(dst_file_name, ext='.json') log.info("Writing project validation results to '%s'" % dst_file) dump_json_file(dst_file, report, indent=True, allow_numpy=True)
def info_command(args): project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if args.project_dir: raise try: # TODO: avoid computing working tree hashes dataset, target_project = parse_full_revpath(args.target, project) if target_project: scope_add(target_project) except DatasetMergeError as e: dataset = None dataset_problem = "Can't merge project sources automatically: %s " \ "Conflicting sources are: %s" % (e, ', '.join(e.sources)) except MissingObjectError as e: dataset = None dataset_problem = str(e) def print_dataset_info(dataset, indent=''): print("%slength:" % indent, len(dataset)) categories = dataset.categories() print("%scategories:" % indent, ', '.join(c.name for c in categories)) for cat_type, cat in categories.items(): print("%s %s:" % (indent, cat_type.name)) if cat_type == AnnotationType.label: print("%s count:" % indent, len(cat.items)) count_threshold = 10 if args.all: count_threshold = len(cat.items) labels = ', '.join(c.name for c in cat.items[:count_threshold]) if count_threshold < len(cat.items): labels += " (and %s more)" % (len(cat.items) - count_threshold) print("%s labels:" % indent, labels) if dataset is not None: print_dataset_info(dataset) subsets = dataset.subsets() print("subsets:", ', '.join(subsets)) for subset_name in subsets: subset = dataset.get_subset(subset_name) print(" '%s':" % subset_name) print_dataset_info(subset, indent=" ") else: print("Dataset info is not available: ", dataset_problem) return 0
def add_command(args): show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help and args.project_dir: raise if project is not None: env = project.env else: env = Environment() name = args.name if name: if name in project.models: raise CliException("Model '%s' already exists" % name) else: name = generate_next_name(list(project.models), 'model', sep='-', default=0) try: launcher = env.launchers[args.launcher] except KeyError: raise CliException("Launcher '%s' is not found" % args.launcher) cli_plugin = getattr(launcher, 'cli_plugin', launcher) model_args = cli_plugin.parse_cmdline(args.extra_args) if args.copy: log.info("Copying model data") model_dir = project.model_data_dir(name) os.makedirs(model_dir, exist_ok=False) on_error_do(rmtree, model_dir, ignore_errors=True) try: cli_plugin.copy_model(model_dir, model_args) except (AttributeError, NotImplementedError): raise NotImplementedError( "Can't copy: copying is not available for '%s' models. " % args.launcher) project.add_model(name, launcher=args.launcher, options=model_args) on_error_do(project.remove_model, name, ignore_errors=True) if not args.no_check: log.info("Checking the model...") project.make_model(name) project.save() log.info("Model '%s' with launcher '%s' has been added to project", name, args.launcher) return 0
def remove_command(args): project = scope_add(load_project(args.project_dir)) project.remove_model(args.name) project.save() return 0
def detect_format_command(args): project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if args.project_dir: raise if project is not None: env = project.env else: env = Environment() report = {'rejected_formats': {}} def rejection_callback( format_name: str, reason: RejectionReason, human_message: str, ): report['rejected_formats'][format_name] = { 'reason': reason.name, 'message': human_message, } detected_formats = detect_dataset_format( ((format_name, importer.detect) for format_name, importer in env.importers.items.items()), args.url, rejection_callback=rejection_callback, ) report['detected_formats'] = detected_formats if len(detected_formats) == 1: print(f"Detected format: {detected_formats[0]}") elif len(detected_formats) == 0: print("Unable to detect the format") else: print("Ambiguous dataset; detected the following formats:") print() for format_name in sorted(detected_formats): print(f"- {format_name}") if args.show_rejections: print() if report['rejected_formats']: print("The following formats were rejected:") print() for format_name, rejection in sorted( report['rejected_formats'].items()): print(f"{format_name}:") for line in rejection['message'].split('\n'): print(f" {line}") else: print("No formats were rejected.") if args.json_report: dump_json_file(args.json_report, report, indent=True)
def test_can_release_resources_on_remove(self, fxt_sample_video): test_dir = scope_add(TestDir()) project = scope_add(Project.init(test_dir)) project.import_source('src', osp.dirname(fxt_sample_video), 'video_frames', rpath=osp.basename(fxt_sample_video)) project.commit('commit 1') assert len(project.working_tree.make_dataset()) == 4 assert osp.isdir(osp.join(test_dir, 'src')) project.remove_source('src', keep_data=False) assert not osp.exists(osp.join(test_dir, 'src'))
def info_command(args): project = scope_add(load_project(args.project_dir)) if args.name: print(project.models[args.name]) else: for name, conf in project.models.items(): print(name) if args.verbose: print(dict(conf))
def patch_command(args): project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if args.project_dir: raise if project is not None: env = project.env else: env = Environment() target_dataset, _project = parse_full_revpath(args.target, project) if _project is not None: scope_add(_project) try: converter = env.converters[target_dataset.format] except KeyError: raise CliException("Converter for format '%s' is not found" % \ args.format) extra_args = converter.parse_cmdline(args.extra_args) dst_dir = args.dst_dir or target_dataset.data_path if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) dst_dir = osp.abspath(dst_dir) patch_dataset, _project = parse_full_revpath(args.patch, project) if _project is not None: scope_add(_project) target_dataset.update(patch_dataset) target_dataset.save(save_dir=dst_dir, **extra_args) log.info("Patched dataset has been saved to '%s'" % dst_dir) return 0
def log_command(args): project = scope_add(load_project(args.project_dir)) revisions = project.history(args.max_count) if revisions: for rev, message in revisions: print('%s %s' % (rev, message)) else: print("(Project history is empty)") return 0
def info_command(args): project = scope_add(load_project(args.project_dir)) if args.name: source = project.working_tree.sources[args.name] print(source) else: for name, conf in project.working_tree.sources.items(): print(name) if args.verbose: print(conf)
def test_can_chain_transforms_in_working_tree_without_hashing(self): test_dir = scope_add(TestDir()) source_url = osp.join(test_dir, 'test_repo') dataset = Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0)]), DatasetItem(2, annotations=[Label(1)]), ], categories=['a', 'b']) dataset.save(source_url) project_dir = osp.join(test_dir, 'proj') run(self, 'create', '-o', project_dir) run(self, 'import', '-p', project_dir, '-n', 'source1', '--format', DEFAULT_FORMAT, source_url) run(self, 'filter', '-p', project_dir, '-e', '/item/annotation[label="b"]') run(self, 'transform', '-p', project_dir, '-t', 'rename', '--', '-e', '|2|qq|') run(self, 'transform', '-p', project_dir, '-t', 'remap_labels', '--', '-l', 'a:cat', '-l', 'b:dog') project = scope_add(Project(project_dir)) built_dataset = project.working_tree.make_dataset() expected_dataset = Dataset.from_iterable([ DatasetItem('qq', annotations=[Label(1)]), ], categories=['cat', 'dog']) compare_datasets(self, expected_dataset, built_dataset) with self.assertRaises(Exception): compare_dirs(self, source_url, project.source_data_dir('source1')) source1_target = project.working_tree.build_targets['source1'] self.assertEqual(4, len(source1_target.stages)) self.assertEqual('', source1_target.stages[0].hash) self.assertEqual('', source1_target.stages[1].hash) self.assertEqual('', source1_target.stages[2].hash)
def remove_command(args): project = scope_add(load_project(args.project_dir)) if not args.names: raise CliException("Expected source name") for name in args.names: project.remove_source(name, force=args.force, keep_data=args.keep_data) project.working_tree.save() log.info("Sources '%s' have been removed from the project" % \ ', '.join(args.names)) return 0
def run_command(args): dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('%s-inference' % args.model_name) dst_dir = osp.abspath(dst_dir) project = scope_add(load_project(args.project_dir)) dataset, target_project = parse_full_revpath(args.target, project) if target_project: scope_add(target_project) model = project.make_model(args.model_name) inference = dataset.run_model(model) inference.save(dst_dir) log.info("Inference results have been saved to '%s'" % dst_dir) return 0
def test_can_split_video(self): on_exit_do(MediaManager.get_instance().clear) test_dir = scope_add(TestDir()) video_path = osp.join(test_dir, 'video.avi') make_sample_video(video_path, frames=10) output_dir = osp.join(test_dir, 'result') run(TestCase(), 'util', 'split_video', '-i', video_path, '-o', output_dir, '--image-ext', '.jpg', '--start-frame', '2', '--end-frame', '8', '--step', '2') assert set(os.listdir(output_dir)) == {'%06d.jpg' % n for n in range(2, 8, 2)}
def stats_command(args): project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if args.project_dir: raise dataset, target_project = parse_full_revpath(args.target, project) if target_project: scope_add(target_project) if args.subset: dataset = dataset.get_subset(args.subset) stats = {} if args.image_stats: stats.update(compute_image_statistics(dataset)) if args.ann_stats: stats.update(compute_ann_statistics(dataset)) dst_file = generate_next_file_name('statistics', ext='.json') log.info("Writing project statistics to '%s'" % dst_file) dump_json_file(dst_file, stats, indent=True)
def status_command(args): project = scope_add(load_project(args.project_dir)) statuses = project.status() if project.branch: print("On branch '%s', commit %s" % (project.branch, project.head_rev)) else: print("HEAD is detached at commit %s" % project.head_rev) if statuses: for target, status in statuses.items(): print('%s\t%s' % (status.name, target)) else: print("Working directory clean") return 0
def test_can_split_and_load(self, fxt_sample_video): test_dir = scope_add(TestDir()) on_exit_do(MediaManager.get_instance().clear) expected = Dataset.from_iterable([ DatasetItem('frame_%06d' % i, image=np.ones((4, 6, 3)) * i) for i in range(4) ]) dataset = Dataset.import_from(fxt_sample_video, 'video_frames', start_frame=0, end_frame=4, name_pattern='frame_%06d') dataset.export(format='image_dir', save_dir=test_dir, image_ext='.jpg') actual = Dataset.import_from(test_dir, 'image_dir') compare_datasets(TestCase(), expected, actual)
def commit_command(args): project = scope_add(load_project(args.project_dir)) old_tree = project.head new_commit = project.commit(args.message, allow_empty=args.allow_empty, allow_foreign=args.allow_foreign, no_cache=args.no_cache) new_tree = project.working_tree diff = project.diff(old_tree, new_tree) print("Moved to commit '%s' %s" % (new_commit, args.message)) print(" %s targets changed" % len(diff)) for t, s in diff.items(): print(" %s %s" % (s.name, t)) return 0
def info_command(args): project = scope_add(load_project(args.project_dir)) rev = project.get_rev(args.revision) env = rev.env print("Project:") print(" location:", project._root_dir) print("Plugins:") print(" extractors:", ', '.join(sorted(set(env.extractors) | set(env.importers)))) print(" converters:", ', '.join(env.converters)) print(" launchers:", ', '.join(env.launchers)) print("Models:") for model_name, model in project.models.items(): print(" model '%s':" % model_name) print(" type:", model.launcher) print("Sources:") for source_name, source in rev.sources.items(): print(" '%s':" % source_name) print(" format:", source.format) print(" url:", osp.abspath(source.url) if source.url else '') print( " location:", osp.abspath( osp.join(project.source_data_dir(source_name), source.path))) print(" options:", source.options) print(" stages:") for stage in rev.build_targets[source_name].stages: print(" '%s':" % stage.name) print(" type:", stage.type) print(" hash:", stage.hash) print(" cached:", project.is_obj_cached(stage.hash) if stage.hash else 'n/a') if stage.kind: print(" kind:", stage.kind) if stage.params: print(" parameters:", stage.params) return 0
def test_can_transform_dataset_inplace(self): test_dir = scope_add(TestDir()) Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0)]), DatasetItem(2, annotations=[Label(1)]), ], categories=['a', 'b']).export(test_dir, 'coco') run(self, 'transform', '-t', 'remap_labels', '--overwrite', test_dir + ':coco', '--', '-l', 'a:cat', '-l', 'b:dog') expected_dataset = Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0, id=1, group=1)]), DatasetItem(2, annotations=[Label(1, id=2, group=2)]), ], categories=['cat', 'dog']) compare_datasets(self, expected_dataset, Dataset.import_from(test_dir, 'coco'), ignored_attrs='*')
def checkout_command(args): has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') if 1 < pos: raise argparse.ArgumentError( None, message="Expected no more than 1 revision argument") else: pos = 1 args.rev = (args._positionals[:pos] or [None])[0] args.sources = args._positionals[pos + has_sep:] if has_sep and not args.sources: raise argparse.ArgumentError( 'sources', message="When '--' is used, " "at least 1 source name must be specified") project = scope_add(load_project(args.project_dir)) project.checkout(rev=args.rev, sources=args.sources, force=args.force) return 0
def explain_command(args): from matplotlib import cm import cv2 project = scope_add(load_project(args.project_dir)) model = project.working_tree.models.make_executable_model(args.model) if str(args.algorithm).lower() != 'rise': raise NotImplementedError() from datumaro.components.algorithms.rise import RISE rise = RISE(model, max_samples=args.max_samples, mask_width=args.mask_width, mask_height=args.mask_height, prob=args.prob, iou_thresh=args.iou_thresh, nms_thresh=args.nms_iou_thresh, det_conf_thresh=args.det_conf_thresh, batch_size=args.batch_size) if args.target and is_image(args.target): image_path = args.target image = load_image(image_path) log.info("Running inference explanation for '%s'" % image_path) heatmap_iter = rise.apply(image, progressive=args.display) image = image / 255.0 file_name = osp.splitext(osp.basename(image_path))[0] if args.display: for i, heatmaps in enumerate(heatmap_iter): for j, heatmap in enumerate(heatmaps): hm_painted = cm.jet(heatmap)[:, :, 2::-1] disp = (image + hm_painted) / 2 cv2.imshow('heatmap-%s' % j, hm_painted) cv2.imshow(file_name + '-heatmap-%s' % j, disp) cv2.waitKey(10) print("Iter", i, "of", args.max_samples, end='\r') else: heatmaps = next(heatmap_iter) if args.save_dir is not None: log.info("Saving inference heatmaps at '%s'" % args.save_dir) os.makedirs(args.save_dir, exist_ok=True) for j, heatmap in enumerate(heatmaps): save_path = osp.join(args.save_dir, file_name + '-heatmap-%s.png' % j) save_image(save_path, heatmap * 255.0) else: for j, heatmap in enumerate(heatmaps): disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2 cv2.imshow(file_name + '-heatmap-%s' % j, disp) cv2.waitKey(0) else: dataset, target_project = \ parse_full_revpath(args.target or 'project', project) if target_project: scope_add(target_project) log.info("Running inference explanation for '%s'" % args.target) for item in dataset: image = item.image.data if image is None: log.warning("Item %s does not have image data. Skipping.", item.id) continue heatmap_iter = rise.apply(image) image = image / 255.0 heatmaps = next(heatmap_iter) if args.save_dir is not None: log.info("Saving inference heatmaps to '%s'" % args.save_dir) os.makedirs(args.save_dir, exist_ok=True) for j, heatmap in enumerate(heatmaps): save_image(osp.join(args.save_dir, item.id + '-heatmap-%s.png' % j), heatmap * 255.0, create_dir=True) if not args.save_dir or args.display: for j, heatmap in enumerate(heatmaps): disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2 cv2.imshow(item.id + '-heatmap-%s' % j, disp) cv2.waitKey(0) return 0
def add_command(args): # Workaround. Required positionals consume positionals from the end args._positionals += join_cli_args(args, 'path', 'extra_args') has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') else: pos = 1 args.path = (args._positionals[:pos] or [''])[0] args.extra_args = args._positionals[pos + has_sep:] show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help: raise if project is not None: env = project.env else: env = Environment() fmt = args.format if fmt in env.importers: arg_parser = env.importers[fmt] elif fmt in env.extractors: arg_parser = env.extractors[fmt] else: raise CliException("Unknown format '%s'. A format can be added" " by providing an Extractor and Importer plugins" % fmt) extra_args = arg_parser.parse_cmdline(args.extra_args) if fmt == 'video_frames': show_video_import_warning() name, _ = project.add_source(args.path, format=args.format, options=extra_args, rpath=args.rpath) on_error_do(project.remove_source, name, ignore_errors=True, kwargs={ 'force': True, 'keep_data': True }) if not args.no_check: log.info("Checking the source...") project.working_tree.make_dataset(name) project.working_tree.save() log.info("Source '%s' with format '%s' has been added to the project", name, args.format) return 0
def test_can_parse(self): test_dir = scope_add(TestDir()) dataset_url = osp.join(test_dir, 'source') Dataset.from_iterable([DatasetItem(1)]).save(dataset_url) proj_dir = osp.join(test_dir, 'proj') proj = scope_add(Project.init(proj_dir)) proj.import_source('source-1', dataset_url, format=DEFAULT_FORMAT) ref = proj.commit("second commit", allow_empty=True) with self.subTest("project"): dataset, project = parse_full_revpath(proj_dir) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("project ref"): dataset, project = parse_full_revpath(f"{proj_dir}@{ref}") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("project ref source"): dataset, project = parse_full_revpath(f"{proj_dir}@{ref}:source-1") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("project ref source stage"): dataset, project = parse_full_revpath( f"{proj_dir}@{ref}:source-1.root") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertTrue(isinstance(project, Project)) with self.subTest("ref"): dataset, project = parse_full_revpath(ref, proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("ref source"): dataset, project = parse_full_revpath(f"{ref}:source-1", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("ref source stage"): dataset, project = parse_full_revpath(f"{ref}:source-1.root", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("source"): dataset, project = parse_full_revpath("source-1", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("source stage"): dataset, project = parse_full_revpath("source-1.root", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset (in context)"): dataset, project = parse_full_revpath(dataset_url, proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset format (in context)"): dataset, project = parse_full_revpath(f"{dataset_url}:datumaro", proj) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset (no context)"): dataset, project = parse_full_revpath(dataset_url) if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project) with self.subTest("dataset format (no context)"): dataset, project = parse_full_revpath(f"{dataset_url}:datumaro") if project: scope_add(project) self.assertTrue(isinstance(dataset, IDataset)) self.assertEqual(None, project)
def diff_command(args): dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('diff') dst_dir = osp.abspath(dst_dir) if not osp.exists(dst_dir): on_error_do(rmtree, dst_dir, ignore_errors=True) os.makedirs(dst_dir) project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if args.project_dir: raise try: if not args.second_target: first_dataset = project.working_tree.make_dataset() second_dataset, target_project = \ parse_full_revpath(args.first_target, project) if target_project: scope_add(target_project) else: first_dataset, target_project = \ parse_full_revpath(args.first_target, project) if target_project: scope_add(target_project) second_dataset, target_project = \ parse_full_revpath(args.second_target, project) if target_project: scope_add(target_project) except Exception as e: raise CliException(str(e)) if args.method is ComparisonMethod.equality: if args.ignore_field: args.ignore_field = eq_default_if comparator = ExactComparator(match_images=args.match_images, ignored_fields=args.ignore_field, ignored_attrs=args.ignore_attr, ignored_item_attrs=args.ignore_item_attr) matches, mismatches, a_extra, b_extra, errors = \ comparator.compare_datasets(first_dataset, second_dataset) output = { "mismatches": mismatches, "a_extra_items": sorted(a_extra), "b_extra_items": sorted(b_extra), "errors": errors, } if args.all: output["matches"] = matches output_file = osp.join( dst_dir, generate_next_file_name('diff', ext='.json', basedir=dst_dir)) log.info("Saving diff to '%s'" % output_file) dump_json_file(output_file, output, indent=True) print("Found:") print("The first project has %s unmatched items" % len(a_extra)) print("The second project has %s unmatched items" % len(b_extra)) print("%s item conflicts" % len(errors)) print("%s matching annotations" % len(matches)) print("%s mismatching annotations" % len(mismatches)) elif args.method is ComparisonMethod.distance: comparator = DistanceComparator(iou_threshold=args.iou_thresh) with DiffVisualizer(save_dir=dst_dir, comparator=comparator, output_format=args.format) as visualizer: log.info("Saving diff to '%s'" % dst_dir) visualizer.save(first_dataset, second_dataset) return 0
def export_command(args): has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') if 1 < pos: raise argparse.ArgumentError( None, message="Expected no more than 1 target argument") else: pos = 1 args.target = (args._positionals[:pos] or \ [ProjectBuildTargets.MAIN_TARGET])[0] args.extra_args = args._positionals[pos + has_sep:] show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help: raise if project is not None: env = project.env else: env = Environment() try: converter = env.converters[args.format] except KeyError: raise CliException("Converter for format '%s' is not found" % \ args.format) extra_args = converter.parse_cmdline(args.extra_args) dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('export-%s' % \ make_file_name(args.format)) dst_dir = osp.abspath(dst_dir) if args.filter: filter_args = FilterModes.make_filter_args(args.filter_mode) filter_expr = args.filter log.info("Loading the project...") dataset = project.working_tree.make_dataset(args.target) if args.filter: dataset.filter(filter_expr, **filter_args) log.info("Exporting...") dataset.export(save_dir=dst_dir, format=converter, **extra_args) log.info("Results have been saved to '%s'" % dst_dir) return 0
def filter_command(args): project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if args.project_dir: raise filter_args = FilterModes.make_filter_args(args.mode) filter_expr = args.filter if args.dry_run: dataset, _project = parse_full_revpath(args.target, project) if _project: scope_add(_project) dataset = dataset.filter(expr=filter_expr, **filter_args) for item in dataset: encoded_item = DatasetItemEncoder.encode(item, dataset.categories()) xml_item = DatasetItemEncoder.to_string(encoded_item) print(xml_item) return 0 if not args.filter: raise CliException("Expected a filter expression ('-e' argument)") is_target = project is not None and \ args.target in project.working_tree.build_targets if is_target: if not args.dst_dir and args.stage and (args.target != \ ProjectBuildTargets.strip_target_name(args.target)): raise CliException("Adding a stage is only allowed for " "project targets, not their stages.") if args.target == ProjectBuildTargets.MAIN_TARGET: targets = list(project.working_tree.sources) else: targets = [args.target] build_tree = project.working_tree.clone() for target in targets: build_tree.build_targets.add_filter_stage(target, expr=filter_expr, params=filter_args) if args.apply: log.info("Filtering...") if is_target and not args.dst_dir: for target in targets: dataset = project.working_tree.make_dataset( build_tree.make_pipeline(target)) # Source might be missing in the working dir, so we specify # the output directory. # We specify save_images here as a heuristic. It can probably # be improved by checking if there are images in the dataset # directory. dataset.save(project.source_data_dir(target), save_images=True) log.info("Finished") else: dataset, _project = parse_full_revpath(args.target, project) if _project: scope_add(_project) dst_dir = args.dst_dir or dataset.data_path if not args.overwrite and osp.isdir(dst_dir) and os.listdir( dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) dst_dir = osp.abspath(dst_dir) dataset.filter(filter_expr, *filter_args) dataset.save(dst_dir, save_images=True) log.info("Results have been saved to '%s'" % dst_dir) if is_target and args.stage: project.working_tree.config.update(build_tree.config) project.working_tree.save() return 0
def transform_command(args): has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') if 1 < pos: raise argparse.ArgumentError( None, message="Expected no more than 1 target argument") else: pos = 1 args.target = (args._positionals[:pos] or \ [ProjectBuildTargets.MAIN_TARGET])[0] args.extra_args = args._positionals[pos + has_sep:] show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help and args.project_dir: raise if project is not None: env = project.env else: env = Environment() try: transform = env.transforms[args.transform] except KeyError: raise CliException("Transform '%s' is not found" % args.transform) extra_args = transform.parse_cmdline(args.extra_args) is_target = project is not None and \ args.target in project.working_tree.build_targets if is_target: if not args.dst_dir and args.stage and (args.target != \ ProjectBuildTargets.strip_target_name(args.target)): raise CliException("Adding a stage is only allowed for " "project targets, not their stages.") if args.target == ProjectBuildTargets.MAIN_TARGET: targets = list(project.working_tree.sources) else: targets = [args.target] build_tree = project.working_tree.clone() for target in targets: build_tree.build_targets.add_transform_stage(target, args.transform, params=extra_args) if args.apply: log.info("Transforming...") if is_target and not args.dst_dir: for target in targets: dataset = project.working_tree.make_dataset( build_tree.make_pipeline(target)) # Source might be missing in the working dir, so we specify # the output directory # We specify save_images here as a heuristic. It can probably # be improved by checking if there are images in the dataset # directory. dataset.save(project.source_data_dir(target), save_images=True) log.info("Finished") else: dataset, _project = parse_full_revpath(args.target, project) if _project: scope_add(_project) dst_dir = args.dst_dir or dataset.data_path if not args.overwrite and osp.isdir(dst_dir) and os.listdir( dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) dst_dir = osp.abspath(dst_dir) dataset.transform(args.transform, **extra_args) dataset.save(dst_dir, save_images=True) log.info("Results have been saved to '%s'" % dst_dir) if is_target and args.stage: project.working_tree.config.update(build_tree.config) project.working_tree.save() return 0