Beispiel #1
0
def transform(dataset_reader, data_dir, output_dir, splits, only_classes,
              only_images, limit_examples, limit_classes, seed, overrides,
              debug):
    """
    Prepares dataset for ingestion.

    Converts the dataset into different (one per split) TFRecords files.
    """
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # We forcefully save modified datasets into subfolders to avoid
    # overwriting and/or unnecessary clutter.
    output_subfolder = get_output_subfolder(
        only_classes, only_images, limit_examples, limit_classes
    )
    if output_subfolder:
        output_dir = os.path.join(output_dir, output_subfolder)

    try:
        reader = get_reader(dataset_reader)
    except ValueError as e:
        tf.logging.error('Error getting reader: {}'.format(e))
        return

    # All splits must have a consistent set of classes.
    classes = None

    reader_kwargs = parse_override(overrides)

    try:
        for split in splits:
            # Create instance of reader.
            split_reader = reader(
                data_dir, split,
                only_classes=only_classes, only_images=only_images,
                limit_examples=limit_examples, limit_classes=limit_classes,
                seed=seed, **reader_kwargs
            )

            if classes is None:
                # "Save" classes from the first split reader
                classes = split_reader.classes
            else:
                # Overwrite classes after first split for consistency.
                split_reader.set_classes(classes)

            # We assume we are saving object detection objects, but it should
            # be easy to modify once we have different types of objects.
            writer = ObjectDetectionWriter(split_reader, output_dir, split)
            writer.save()
    except InvalidDataDirectory as e:
        tf.logging.error('Error reading dataset: {}'.format(e))
Beispiel #2
0
def transform(dataset_reader, data_dir, output_dir, splits, only_classes,
              only_images, limit_examples, class_examples, overrides, debug):
    """
    Prepares dataset for ingestion.

    Converts the dataset into different (one per split) TFRecords files.
    """
    tf.logging.set_verbosity(tf.logging.INFO)
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)

    try:
        reader = get_reader(dataset_reader)
    except ValueError as e:
        tf.logging.error('Error getting reader: {}'.format(e))
        return

    # All splits must have a consistent set of classes.
    classes = None

    reader_kwargs = parse_override(overrides)

    try:
        for split in splits:
            # Create instance of reader.
            split_reader = reader(data_dir,
                                  split,
                                  only_classes=only_classes,
                                  only_images=only_images,
                                  limit_examples=limit_examples,
                                  class_examples=class_examples,
                                  **reader_kwargs)

            if classes is None:
                # "Save" classes from the first split reader
                classes = split_reader.classes
            else:
                # Overwrite classes after first split for consistency.
                split_reader.classes = classes

            # We assume we are saving object detection objects, but it should
            # be easy to modify once we have different types of objects.
            writer = ObjectDetectionWriter(split_reader, output_dir, split)
            writer.save()

            tf.logging.info('Composition per class ({}):'.format(split))
            for label, count in split_reader._per_class_counter.most_common():
                tf.logging.info('\t%s: %d', split_reader.pretty_name(label),
                                count)

    except InvalidDataDirectory as e:
        tf.logging.error('Error reading dataset: {}'.format(e))