def detect_format(dataset_dir, args): """try to figure out what type of dataset we may have""" # datumaro gets confused with 2 other formats, but the others # don't have the `.datumaro/config.yaml` file. if dataset_dir.joinpath(".datumaro", "config.yaml").exists(): return "datumaro" env = Environment() matches = [] for format_name in env.importers.items: importer = env.make_importer(format_name) try: match = importer.detect(os.fspath(dataset_dir)) if match: matches.append(format_name) except NotImplementedError: pass if args.verbose: print(f"Found {matches} for {dataset_dir}") # everything seems to match 'image_dir'.... matches = [m for m in matches if not m == "image_dir"] if len(matches) == 1: return matches[0] assert False, f"Unrecognized dataset format, {matches}" return None
def import_command(args): project_dir = osp.abspath(args.dst_dir) project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir) if osp.isdir(project_env_dir) and os.listdir(project_env_dir): if not args.overwrite: raise CliException("Directory '%s' already exists " "(pass --overwrite to force creation)" % project_env_dir) else: shutil.rmtree(project_env_dir, ignore_errors=True) own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir) if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir): if not args.overwrite: raise CliException("Directory '%s' already exists " "(pass --overwrite to force creation)" % own_dataset_dir) else: # NOTE: remove the dir to avoid using data from previous project shutil.rmtree(own_dataset_dir) project_name = args.name if project_name is None: project_name = osp.basename(project_dir) try: env = Environment() importer = env.make_importer(args.format) except KeyError: raise CliException("Importer for format '%s' is not found" % \ args.format) extra_args = {} if hasattr(importer, 'from_cmdline'): extra_args = importer.from_cmdline(args.extra_args) log.info("Importing project from '%s' as '%s'" % \ (args.source, args.format)) source = osp.abspath(args.source) project = importer(source, **extra_args) project.config.project_name = project_name project.config.project_dir = project_dir if not args.skip_check or args.copy: log.info("Checking the dataset...") dataset = project.make_dataset() if args.copy: log.info("Cloning data...") dataset.save(merge=True, save_images=True) else: project.save() log.info("Project has been created at '%s'" % project_dir) return 0
def import_command(args): project_dir = osp.abspath(args.dst_dir) project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir) if osp.isdir(project_env_dir) and os.listdir(project_env_dir): if not args.overwrite: raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % project_env_dir) else: shutil.rmtree(project_env_dir, ignore_errors=True) own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir) if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir): if not args.overwrite: raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % own_dataset_dir) else: # NOTE: remove the dir to avoid using data from previous project shutil.rmtree(own_dataset_dir) project_name = args.name if project_name is None: project_name = osp.basename(project_dir) env = Environment() log.info("Importing project from '%s'" % args.source) extra_args = {} if not args.format: if args.extra_args: raise CliException("Extra args can not be used without format") log.info("Trying to detect dataset format...") matches = [] for format_name in env.importers.items: log.debug("Checking '%s' format...", format_name) importer = env.make_importer(format_name) try: match = importer.detect(args.source) if match: log.debug("format matched") matches.append((format_name, importer)) except NotImplementedError: log.debug("Format '%s' does not support auto detection.", format_name) if len(matches) == 0: log.error("Failed to detect dataset format automatically. " "Try to specify format with '-f/--format' parameter.") return 1 elif len(matches) != 1: log.error("Multiple formats match the dataset: %s. " "Try to specify format with '-f/--format' parameter.", ', '.join(m[0] for m in matches)) return 2 format_name, importer = matches[0] args.format = format_name else: try: importer = env.make_importer(args.format) if hasattr(importer, 'from_cmdline'): extra_args = importer.from_cmdline(args.extra_args) except KeyError: raise CliException("Importer for format '%s' is not found" % \ args.format) log.info("Importing project as '%s'" % args.format) source = osp.abspath(args.source) project = importer(source, **extra_args) project.config.project_name = project_name project.config.project_dir = project_dir if not args.skip_check or args.copy: log.info("Checking the dataset...") dataset = project.make_dataset() if args.copy: log.info("Cloning data...") dataset.save(merge=True, save_images=True) else: project.save() log.info("Project has been created at '%s'" % project_dir) return 0
def convert_command(args): env = Environment() try: converter = env.converters.get(args.output_format) except KeyError: raise CliException("Converter for format '%s' is not found" % \ args.output_format) extra_args = converter.from_cmdline(args.extra_args) def converter_proxy(extractor, save_dir): return converter.convert(extractor, save_dir, **extra_args) filter_args = FilterModes.make_filter_args(args.filter_mode) if not args.input_format: matches = [] for format_name in env.importers.items: log.debug("Checking '%s' format...", format_name) importer = env.make_importer(format_name) try: match = importer.detect(args.source) if match: log.debug("format matched") matches.append((format_name, importer)) except NotImplementedError: log.debug("Format '%s' does not support auto detection.", format_name) if len(matches) == 0: log.error( "Failed to detect dataset format. " "Try to specify format with '-if/--input-format' parameter.") return 1 elif len(matches) != 1: log.error( "Multiple formats match the dataset: %s. " "Try to specify format with '-if/--input-format' parameter.", ', '.join(m[0] for m in matches)) return 2 format_name, importer = matches[0] args.input_format = format_name log.info("Source dataset format detected as '%s'", args.input_format) else: try: importer = env.make_importer(args.input_format) if hasattr(importer, 'from_cmdline'): extra_args = importer.from_cmdline() except KeyError: raise CliException("Importer for format '%s' is not found" % \ args.input_format) source = osp.abspath(args.source) dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('%s-%s' % \ (osp.basename(source), make_file_name(args.output_format))) dst_dir = osp.abspath(dst_dir) project = importer(source) dataset = project.make_dataset() log.info("Exporting the dataset") dataset.export_project(save_dir=dst_dir, converter=converter_proxy, filter_expr=args.filter, **filter_args) log.info("Dataset exported to '%s' as '%s'" % \ (dst_dir, args.output_format)) return 0