Ejemplo n.º 1
0
def import_command(args):
    project_dir = osp.abspath(args.dst_dir)

    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" %
                               project_env_dir)
        else:
            shutil.rmtree(project_env_dir, ignore_errors=True)

    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" %
                               own_dataset_dir)
        else:
            # NOTE: remove the dir to avoid using data from previous project
            shutil.rmtree(own_dataset_dir)

    project_name = args.name
    if project_name is None:
        project_name = osp.basename(project_dir)

    env = Environment()
    log.info("Importing project from '%s'" % args.source)

    extra_args = {}
    if not args.format:
        if args.extra_args:
            raise CliException("Extra args can not be used without format")

        log.info("Trying to detect dataset format...")

        matches = []
        for format_name in env.importers.items:
            log.debug("Checking '%s' format...", format_name)
            importer = env.make_importer(format_name)
            try:
                match = importer.detect(args.source)
                if match:
                    log.debug("format matched")
                    matches.append((format_name, importer))
            except NotImplementedError:
                log.debug("Format '%s' does not support auto detection.",
                          format_name)

        if len(matches) == 0:
            log.error("Failed to detect dataset format automatically. "
                      "Try to specify format with '-f/--format' parameter.")
            return 1
        elif len(matches) != 1:
            log.error(
                "Multiple formats match the dataset: %s. "
                "Try to specify format with '-f/--format' parameter.",
                ', '.join(m[0] for m in matches))
            return 2

        format_name, importer = matches[0]
        args.format = format_name
    else:
        try:
            importer = env.make_importer(args.format)
            if hasattr(importer, 'from_cmdline'):
                extra_args = importer.from_cmdline(args.extra_args)
        except KeyError:
            raise CliException("Importer for format '%s' is not found" % \
                args.format)

    log.info("Importing project as '%s'" % args.format)

    source = osp.abspath(args.source)
    project = importer(source, **extra_args)
    project.config.project_name = project_name
    project.config.project_dir = project_dir

    if not args.skip_check or args.copy:
        log.info("Checking the dataset...")
        dataset = project.make_dataset()
    if args.copy:
        log.info("Cloning data...")
        dataset.save(merge=True, save_images=True)
    else:
        project.save()

    log.info("Project has been created at '%s'" % project_dir)

    return 0
Ejemplo n.º 2
0
def build_export_parser(parser_ctor=argparse.ArgumentParser):
    builtins = sorted(Environment().converters.items)

    parser = parser_ctor(help="Export project",
                         description="""
            Exports the project dataset in some format. Optionally, a filter
            can be passed, check 'filter' command description for more info.
            Each dataset format has its own options, which
            are passed after '--' separator (see examples), pass '-- -h'
            for more info. If not stated otherwise, by default
            only annotations are exported, to include images pass
            '--save-images' parameter.|n
            |n
            Formats:|n
            In Datumaro dataset formats are supported by Converter-s.
            A Converter produces a dataset of a specific format
            from dataset items. It is possible to add a custom Converter.
            To do this, you need to put a Converter
            definition script to <project_dir>/.datumaro/converters.|n
            |n
            List of builtin dataset formats: %s|n
            |n
            Examples:|n
            - Export project as a VOC-like dataset, include images:|n
            |s|sexport -f voc -- --save-images|n
            |n
            - Export project as a COCO-like dataset in other directory:|n
            |s|sexport -f coco -o path/I/like/
        """ % ', '.join(builtins),
                         formatter_class=MultilineFormatter)

    parser.add_argument('-e',
                        '--filter',
                        default=None,
                        help="Filter expression for dataset items")
    parser.add_argument('--filter-mode', default=FilterModes.i.name,
        type=FilterModes.parse,
        help="Filter mode (options: %s; default: %s)" % \
            (', '.join(FilterModes.list_options()) , '%(default)s'))
    parser.add_argument(
        '-o',
        '--output-dir',
        dest='dst_dir',
        default=None,
        help="Directory to save output (default: a subdir in the current one)")
    parser.add_argument('--overwrite',
                        action='store_true',
                        help="Overwrite existing files in the save directory")
    parser.add_argument(
        '-p',
        '--project',
        dest='project_dir',
        default='.',
        help="Directory of the project to operate on (default: current dir)")
    parser.add_argument('-f', '--format', required=True, help="Output format")
    parser.add_argument(
        'extra_args',
        nargs=argparse.REMAINDER,
        default=None,
        help="Additional arguments for converter (pass '-- -h' for help)")
    parser.set_defaults(command=export_command)

    return parser
Ejemplo n.º 3
0
def import_command(args):
    project_dir = osp.abspath(args.dst_dir)

    project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir)
    if osp.isdir(project_env_dir) and os.listdir(project_env_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" %
                               project_env_dir)
        else:
            shutil.rmtree(project_env_dir, ignore_errors=True)

    own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir)
    if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir):
        if not args.overwrite:
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" %
                               own_dataset_dir)
        else:
            # NOTE: remove the dir to avoid using data from previous project
            shutil.rmtree(own_dataset_dir)

    project_name = args.name
    if project_name is None:
        project_name = osp.basename(project_dir)

    env = Environment()
    log.info("Importing project from '%s'" % args.source)

    extra_args = {}
    fmt = args.format
    if not args.format:
        if args.extra_args:
            raise CliException("Extra args can not be used without format")

        log.info("Trying to detect dataset format...")

        matches = env.detect_dataset(args.source)
        if len(matches) == 0:
            log.error("Failed to detect dataset format. "
                      "Try to specify format with '-f/--format' parameter.")
            return 1
        elif len(matches) != 1:
            log.error(
                "Multiple formats match the dataset: %s. "
                "Try to specify format with '-f/--format' parameter.",
                ', '.join(matches))
            return 1

        fmt = matches[0]
    elif args.extra_args:
        if fmt in env.importers:
            arg_parser = env.importers[fmt]
        elif fmt in env.extractors:
            arg_parser = env.extractors[fmt]
        else:
            raise CliException(
                "Unknown format '%s'. A format can be added"
                "by providing an Extractor and Importer plugins" % fmt)

        if hasattr(arg_parser, 'parse_cmdline'):
            extra_args = arg_parser.parse_cmdline(args.extra_args)
        else:
            raise CliException("Format '%s' does not accept "
                               "extra parameters" % fmt)

    log.info("Importing project as '%s'" % fmt)

    project = Project.import_from(osp.abspath(args.source), fmt, **extra_args)
    project.config.project_name = project_name
    project.config.project_dir = project_dir

    if not args.skip_check or args.copy:
        log.info("Checking the dataset...")
        dataset = project.make_dataset()
    if args.copy:
        log.info("Cloning data...")
        dataset.save(merge=True, save_images=True)
    else:
        project.save()

    log.info("Project has been created at '%s'" % project_dir)

    return 0
Ejemplo n.º 4
0
def build_import_parser(parser_ctor=argparse.ArgumentParser):
    builtins = sorted(Environment().importers.items)

    parser = parser_ctor(help="Create project from an existing dataset",
                         description="""
            Creates a project from an existing dataset. The source can be:|n
            - a dataset in a supported format (check 'formats' section below)|n
            - a Datumaro project|n
            |n
            Formats:|n
            Datasets come in a wide variety of formats. Each dataset
            format defines its own data structure and rules on how to
            interpret the data. For example, the following data structure
            is used in COCO format:|n
            /dataset/|n
            - /images/<id>.jpg|n
            - /annotations/|n
            |n
            In Datumaro dataset formats are supported by
            Extractor-s and Importer-s.
            An Extractor produces a list of dataset items corresponding
            to the dataset. An Importer creates a project from the
            data source location.
            It is possible to add a custom Extractor and Importer.
            To do this, you need to put an Extractor and
            Importer implementation scripts to
            <project_dir>/.datumaro/extractors
            and <project_dir>/.datumaro/importers.|n
            |n
            List of builtin dataset formats: %s|n
            |n
            Examples:|n
            - Create a project from VOC dataset in the current directory:|n
            |s|simport -f voc -i path/to/voc|n
            |n
            - Create a project from COCO dataset in other directory:|n
            |s|simport -f coco -i path/to/coco -o path/I/like/
        """ % ', '.join(builtins),
                         formatter_class=MultilineFormatter)

    parser.add_argument(
        '-o',
        '--output-dir',
        default='.',
        dest='dst_dir',
        help="Directory to save the new project to (default: current dir)")
    parser.add_argument(
        '-n',
        '--name',
        default=None,
        help="Name of the new project (default: same as project dir)")
    parser.add_argument('--copy',
                        action='store_true',
                        help="Copy the dataset instead of saving source links")
    parser.add_argument('--skip-check',
                        action='store_true',
                        help="Skip source checking")
    parser.add_argument('--overwrite',
                        action='store_true',
                        help="Overwrite existing files in the save directory")
    parser.add_argument('-i',
                        '--input-path',
                        required=True,
                        dest='source',
                        help="Path to import project from")
    parser.add_argument(
        '-f',
        '--format',
        help="Source project format. Will try to detect, if not specified.")
    parser.add_argument(
        'extra_args',
        nargs=argparse.REMAINDER,
        help="Additional arguments for importer (pass '-- -h' for help)")
    parser.set_defaults(command=import_command)

    return parser
Ejemplo n.º 5
0
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT

from datumaro.components.project import Environment
from cvat.apps.engine.models import DimensionType


dm_env = Environment()

class _Format:
    NAME = ''
    EXT = ''
    VERSION = ''
    DISPLAY_NAME = '{NAME} {VERSION}'
    ENABLED = True

class Exporter(_Format):
    def __call__(self, dst_file, task_data, **options):
        raise NotImplementedError()

class Importer(_Format):
    def __call__(self, src_file, task_data, **options):
        raise NotImplementedError()

def _wrap_format(f_or_cls, klass, name, version, ext, display_name, enabled, dimension=DimensionType.DIM_2D):
    import inspect
    assert inspect.isclass(f_or_cls) or inspect.isfunction(f_or_cls)
    if inspect.isclass(f_or_cls):
        assert hasattr(f_or_cls, '__call__')
Ejemplo n.º 6
0
def convert_command(args):
    env = Environment()

    try:
        converter = env.converters.get(args.output_format)
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.output_format)
    extra_args = converter.from_cmdline(args.extra_args)

    def converter_proxy(extractor, save_dir):
        return converter.convert(extractor, save_dir, **extra_args)

    filter_args = FilterModes.make_filter_args(args.filter_mode)

    if not args.input_format:
        matches = []
        for format_name in env.importers.items:
            log.debug("Checking '%s' format...", format_name)
            importer = env.make_importer(format_name)
            try:
                match = importer.detect(args.source)
                if match:
                    log.debug("format matched")
                    matches.append((format_name, importer))
            except NotImplementedError:
                log.debug("Format '%s' does not support auto detection.",
                          format_name)

        if len(matches) == 0:
            log.error(
                "Failed to detect dataset format. "
                "Try to specify format with '-if/--input-format' parameter.")
            return 1
        elif len(matches) != 1:
            log.error(
                "Multiple formats match the dataset: %s. "
                "Try to specify format with '-if/--input-format' parameter.",
                ', '.join(m[0] for m in matches))
            return 2

        format_name, importer = matches[0]
        args.input_format = format_name
        log.info("Source dataset format detected as '%s'", args.input_format)
    else:
        try:
            importer = env.make_importer(args.input_format)
            if hasattr(importer, 'from_cmdline'):
                extra_args = importer.from_cmdline()
        except KeyError:
            raise CliException("Importer for format '%s' is not found" % \
                args.input_format)

    source = osp.abspath(args.source)

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('%s-%s' % \
            (osp.basename(source), make_file_name(args.output_format)))
    dst_dir = osp.abspath(dst_dir)

    project = importer(source)
    dataset = project.make_dataset()

    log.info("Exporting the dataset")
    dataset.export_project(save_dir=dst_dir,
                           converter=converter_proxy,
                           filter_expr=args.filter,
                           **filter_args)

    log.info("Dataset exported to '%s' as '%s'" % \
        (dst_dir, args.output_format))

    return 0
Ejemplo n.º 7
0
    def mergeDataset(self, import_args: Arg, filter_arg: Arg):
        config = setConfig(import_args['format'])
        source_datasets = dict([(path, Environment().make_importer(
            import_args['format'])(str(path)).make_dataset())
                                for path in self.datasetPathList])
        itemIdsAndPath = reduce(lambda x, y: x + y,
                                [[(item.id, path) for item in dataset]
                                 for path, dataset in source_datasets.items()])
        # for itemId, path in itemIdsAndPath:
        for path, dataset in source_datasets.items():
            itemIdsInPath = set(
                [itemId for itemId, _path in itemIdsAndPath if _path == path])
            itemIdsOutPath = set(
                [itemId for itemId, _path in itemIdsAndPath if _path != path])
            if itemIdsInPath & itemIdsOutPath:
                for subsetName, subset in dataset.subsets().items():
                    imgDir: Path = path / config.getImgDir(subsetName)
                    _subset = deepcopy(subset.items)
                    for item in _subset.values():
                        imgFile = Path(item.image.path)
                        relPath = imgFile.relative_to(imgDir)
                        newPath = imgDir / path.name / relPath
                        oldItemId = item.id
                        newItemId = item.id = str(path.name / relPath.parent /
                                                  relPath.stem).replace(
                                                      '\\', '/')
                        item.image._path = str(newPath)
                        del subset.items[oldItemId]
                        subset.items[newItemId] = item
                        newPath.parent.mkdir(parents=True, exist_ok=True)

                        if item.image.has_data:
                            move(str(imgFile),
                                 str(imgDir / path.name / relPath))

        mergePath = (self.projectsPath / self.mergeFolderName)
        if mergePath.is_dir():
            rmtree(mergePath, onerror=remove_readonly)
        mergePath.mkdir(exist_ok=True, parents=True)
        dst_dir = str(mergePath)

        merger = IntersectMerge(conf=IntersectMerge.Conf())
        merged_dataset = merger(list(source_datasets.values()))

        merged_project = Project()
        output_dataset = merged_project.make_dataset()
        output_dataset.define_categories(merged_dataset.categories())
        merged_dataset = output_dataset.update(merged_dataset)
        if filter_arg['no_anno_filter'].lower() == 'y':
            filtered_dataset = Project().make_dataset()
            filtered_dataset.define_categories(merged_dataset.categories())
            merged_dataset = filtered_dataset.update(
                merged_dataset.select(lambda item: len(item.annotations) != 0))
        annoId = 1
        imageIdName = config.imageIdName
        for idx, item in tqdm(enumerate(merged_dataset), desc='datasets'):
            if imageIdName is not None:
                item.attributes[imageIdName] = idx + 1
            for anno in item.annotations:
                anno.id = annoId
                annoId += 1
        merged_dataset.save(save_dir=dst_dir, save_images=True)

        # for subsetName, subset in tqdm(merged_dataset.subsets().items(), desc='datasets'):
        #     for idx, itemId in tqdm(enumerate(itemIds), desc='items'):
        #         if imageIdName is not None:
        #             merged_dataset.get(itemId,subset=subsetName).attributes[imageIdName] = idx+1
        #         for anno in merged_dataset.get(itemId, subset=subsetName).annotations:
        #             anno.id = annoId
        #             annoId += 1
        #     merged_dataset.save(save_dir=dst_dir, save_images=True)
        return self