예제 #1
0
def download_command(args):
    env = Environment()

    if args.dataset_id.startswith('tfds:'):
        if TFDS_EXTRACTOR_AVAILABLE:
            tfds_ds_name = args.dataset_id[5:]
            tfds_ds_metadata = AVAILABLE_TFDS_DATASETS.get(tfds_ds_name)
            if tfds_ds_metadata:
                default_converter_name = tfds_ds_metadata.default_converter_name
                extractor_factory = lambda: make_tfds_extractor(tfds_ds_name)
            else:
                raise CliException(f"Unsupported TFDS dataset '{tfds_ds_name}'")
        else:
            raise CliException(
                "TFDS datasets are not available, because TFDS and/or "
                    "TensorFlow are not installed.\n"
                "You can install them with: pip install datumaro[tf,tfds]")
    else:
        raise CliException(f"Unknown dataset ID '{args.dataset_id}'")

    output_format = args.output_format or default_converter_name

    try:
        converter = env.converters[output_format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" %
            output_format)
    extra_args = converter.parse_cmdline(args.extra_args)

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('%s-%s' % (
            make_file_name(args.dataset_id),
            make_file_name(output_format),
        ))
    dst_dir = osp.abspath(dst_dir)

    log.info("Downloading the dataset")
    extractor = extractor_factory()

    log.info("Exporting the dataset")
    converter.convert(extractor, dst_dir,
        default_image_ext='.png', **extra_args)

    log.info("Dataset exported to '%s' as '%s'" % (dst_dir, output_format))
예제 #2
0
def convert_command(args):
    env = Environment()

    try:
        converter = env.converters[args.output_format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.output_format)
    extra_args = converter.parse_cmdline(args.extra_args)

    filter_args = FilterModes.make_filter_args(args.filter_mode)

    fmt = args.input_format
    if not args.input_format:
        matches = env.detect_dataset(args.source)
        if len(matches) == 0:
            log.error(
                "Failed to detect dataset format. "
                "Try to specify format with '-if/--input-format' parameter.")
            return 1
        elif len(matches) != 1:
            log.error(
                "Multiple formats match the dataset: %s. "
                "Try to specify format with '-if/--input-format' parameter.",
                ', '.join(matches))
            return 2

        fmt = matches[0]
        log.info("Source dataset format detected as '%s'", args.input_format)

    source = osp.abspath(args.source)

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('%s-%s' % \
            (osp.basename(source), make_file_name(args.output_format)))
    dst_dir = osp.abspath(dst_dir)

    dataset = Dataset.import_from(source, fmt)

    log.info("Exporting the dataset")
    if args.filter:
        dataset = dataset.filter(args.filter, **filter_args)
    dataset.export(format=args.output_format, save_dir=dst_dir, **extra_args)

    log.info("Dataset exported to '%s' as '%s'" % \
        (dst_dir, args.output_format))

    return 0
예제 #3
0
def normalize_label(label):
    label = make_file_name(label)  # basically, convert to ASCII lowercase
    label = label.replace('-', '_')
    return label
예제 #4
0
    def _save_item_annotations(
        self,
        item,
        label_description_writer,
        bbox_description_writer,
        mask_description_writer,
        label_categories,
        image_meta,
    ):
        next_box_id = 0

        existing_box_ids = {
            annotation.attributes['box_id']
            for annotation in item.annotations
            if annotation.type is AnnotationType.mask
            if 'box_id' in annotation.attributes
        }

        for instance in find_instances(item.annotations):
            instance_box = next(
                (a for a in instance if a.type is AnnotationType.bbox),
                None)

            for annotation in instance:
                if annotation.type is AnnotationType.label:
                    label_description_writer.writerow({
                        'ImageID': item.id,
                        'LabelName': label_categories[annotation.label].name,
                        'Confidence': str(annotation.attributes.get('score', 1)),
                    })
                elif annotation.type is AnnotationType.bbox:
                    if item.has_image and item.image.size is not None:
                        image_meta[item.id] = item.image.size
                        height, width = item.image.size
                    else:
                        log.warning(
                            "Can't encode box for item '%s' due to missing image file",
                            item.id)
                        continue

                    bbox_description_writer.writerow({
                        'ImageID': item.id,
                        'LabelName': label_categories[annotation.label].name,
                        'Confidence': str(annotation.attributes.get('score', 1)),
                        'XMin': annotation.x / width,
                        'YMin': annotation.y / height,
                        'XMax': (annotation.x + annotation.w) / width,
                        'YMax': (annotation.y + annotation.h) / height,
                        **{
                            bool_attr.oid_name:
                                int(annotation.attributes.get(bool_attr.datumaro_name, -1))
                            for bool_attr in OpenImagesPath.BBOX_BOOLEAN_ATTRIBUTES
                        },
                    })
                elif annotation.type is AnnotationType.mask:
                    mask_dir = osp.join(self._save_dir, OpenImagesPath.MASKS_DIR, item.subset)

                    box_id_str = annotation.attributes.get('box_id')

                    if box_id_str:
                        if _RE_INVALID_PATH_COMPONENT.fullmatch(box_id_str):
                            raise UnsupportedBoxIdError(item_id=item.id, box_id=box_id_str)
                    else:
                        # find a box ID that isn't used in any other annotations
                        while True:
                            box_id_str = format(next_box_id, "08x")
                            next_box_id += 1
                            if box_id_str not in existing_box_ids:
                                break

                    label_name = label_categories[annotation.label].name
                    mask_file_name = '%s_%s_%s.png' % (
                        make_file_name(item.id), make_file_name(label_name), box_id_str,
                    )

                    box_coords = {}

                    if instance_box is not None:
                        if item.has_image and item.image.size is not None:
                            image_meta[item.id] = item.image.size
                            height, width = item.image.size

                            box_coords = {
                                'BoxXMin': instance_box.x / width,
                                'BoxXMax': (instance_box.x + instance_box.w) / width,
                                'BoxYMin': instance_box.y / height,
                                'BoxYMax': (instance_box.y + instance_box.h) / height,
                            }
                        else:
                            log.warning(
                                "Can't encode box coordinates for a mask"
                                    " for item '%s' due to missing image file",
                                item.id)

                    mask_description_writer.writerow({
                        'MaskPath': mask_file_name,
                        'ImageID': item.id,
                        'LabelName': label_name,
                        'BoxID': box_id_str,
                        **box_coords,
                        'PredictedIoU':
                            annotation.attributes.get('predicted_iou', ''),
                    })

                    save_image(osp.join(mask_dir, mask_file_name),
                        annotation.image, create_dir=True)
예제 #5
0
def export_command(args):
    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if 1 < pos:
            raise argparse.ArgumentError(
                None, message="Expected no more than 1 target argument")
    else:
        pos = 1
    args.target = (args._positionals[:pos] or \
        [ProjectBuildTargets.MAIN_TARGET])[0]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    try:
        converter = env.converters[args.format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.format)

    extra_args = converter.parse_cmdline(args.extra_args)

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('export-%s' % \
            make_file_name(args.format))
    dst_dir = osp.abspath(dst_dir)

    if args.filter:
        filter_args = FilterModes.make_filter_args(args.filter_mode)
        filter_expr = args.filter

    log.info("Loading the project...")

    dataset = project.working_tree.make_dataset(args.target)
    if args.filter:
        dataset.filter(filter_expr, **filter_args)

    log.info("Exporting...")

    dataset.export(save_dir=dst_dir, format=converter, **extra_args)

    log.info("Results have been saved to '%s'" % dst_dir)

    return 0
예제 #6
0
def export(dst_format,
           task_id=None,
           project_id=None,
           server_url=None,
           save_images=False):
    try:
        if task_id is not None:
            db_instance = Task.objects.get(pk=task_id)
            logger = slogger.task[task_id]
            cache_ttl = TASK_CACHE_TTL
            export_fn = task.export_task
        else:
            db_instance = Project.objects.get(pk=project_id)
            logger = slogger.project[project_id]
            cache_ttl = PROJECT_CACHE_TTL
            export_fn = project.export_project

        cache_dir = get_export_cache_dir(db_instance)

        exporter = EXPORT_FORMATS[dst_format]
        output_base = '%s_%s' % ('dataset' if save_images else 'annotations',
                                 make_file_name(to_snake_case(dst_format)))
        output_path = '%s.%s' % (output_base, exporter.EXT)
        output_path = osp.join(cache_dir, output_path)

        instance_time = timezone.localtime(
            db_instance.updated_date).timestamp()
        if isinstance(db_instance, Project):
            tasks_update = list(
                map(
                    lambda db_task: timezone.localtime(db_task.updated_date).
                    timestamp(), db_instance.tasks.all()))
            instance_time = max(tasks_update + [instance_time])
        if not (osp.exists(output_path) and \
                instance_time <= osp.getmtime(output_path)):
            os.makedirs(cache_dir, exist_ok=True)
            with tempfile.TemporaryDirectory(dir=cache_dir) as temp_dir:
                temp_file = osp.join(temp_dir, 'result')
                export_fn(db_instance.id,
                          temp_file,
                          dst_format,
                          server_url=server_url,
                          save_images=save_images)
                os.replace(temp_file, output_path)

            archive_ctime = osp.getctime(output_path)
            scheduler = django_rq.get_scheduler()
            cleaning_job = scheduler.enqueue_in(time_delta=cache_ttl,
                                                func=clear_export_cache,
                                                task_id=task_id,
                                                file_path=output_path,
                                                file_ctime=archive_ctime)
            logger.info(
                "The {} '{}' is exported as '{}' at '{}' "
                "and available for downloading for the next {}. "
                "Export cache cleaning job is enqueued, id '{}'".format(
                    "project" if isinstance(db_instance, Project) else 'task',
                    db_instance.name, dst_format, output_path, cache_ttl,
                    cleaning_job.id))

        return output_path
    except Exception:
        log_exception(logger)
        raise