Ejemplo n.º 1
0
def test_mapillary_dataset_population_without_labels(
        mapillary_image_size, mapillary_input_config,
        mapillary_sample_without_labels_dir, mapillary_nb_images,
        mapillary_temp_dir):
    """Fail at populating a Mapillary dataset without labelled images
    """
    d = MapillaryDataset(mapillary_image_size, mapillary_input_config)
    with pytest.raises(FileNotFoundError) as excinfo:
        d.populate(str(mapillary_temp_dir),
                   mapillary_sample_without_labels_dir,
                   nb_images=mapillary_nb_images)
    assert str(
        excinfo.value).split(':')[0] == "[Errno 2] No such file or directory"
Ejemplo n.º 2
0
def test_mapillary_dataset_population(mapillary_image_size,
                                      mapillary_raw_sample,
                                      mapillary_nb_images, mapillary_nb_labels,
                                      mapillary_input_config, mapillary_config,
                                      mapillary_temp_dir):
    """Populate a Mapillary dataset
    """
    d = MapillaryDataset(mapillary_image_size, mapillary_input_config)
    d.populate(str(mapillary_temp_dir),
               mapillary_raw_sample,
               nb_images=mapillary_nb_images)
    d.save(str(mapillary_config))
    assert d.get_nb_labels(see_all=True) == mapillary_nb_labels
    assert d.get_nb_images() == mapillary_nb_images
    assert os.path.isfile(str(mapillary_config))
    assert all(
        len(os.listdir(os.path.join(str(mapillary_temp_dir), tmp_dir))) ==
        mapillary_nb_images for tmp_dir in ["images", "labels"])
Ejemplo n.º 3
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        logger.error(
            "Unsupported dataset type. Please choose amongst %s",
            AVAILABLE_DATASETS,
        )
        sys.exit(1)

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["training_config"],
            ))
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["validation_config"],
            ))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["testing_config"],
            ))
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
    sys.exit(0)
Ejemplo n.º 4
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)
    if (args.dataset in GEOGRAPHIC_DATASETS
            and (args.nb_training_image > 0 or args.nb_validation_image > 0)
            and args.nb_tiles_per_image is None):
        raise ValueError(
            "The amount of tiles per image must be specified for "
            f"the {args.dataset} dataset, if training and/or validation images "
            "are required. See 'deepo datagen -h' for more details.")

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        raise ValueError(
            f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}"
        )

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["training_config"],
            )
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["validation_config"],
            )
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["testing_config"],
            )
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
Ejemplo n.º 5
0
    else:
        logger.error("Unsupported dataset type. Please choose amongst %s",
                     AVAILABLE_DATASETS)
        sys.exit(1)

    # Dataset populating/loading (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['training_config']))
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(prepro_folder["training"],
                                   input_image_dir,
                                   nb_images=args.nb_training_image,
                                   aggregate=args.aggregate_label)
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['validation_config']))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(prepro_folder["validation"],
                                        input_image_dir,
                                        nb_images=args.nb_validation_image,
                                        aggregate=args.aggregate_label)
Ejemplo n.º 6
0
        logger.error("Unsupported dataset type. Please choose amongst %s",
                     AVAILABLE_DATASETS)
        sys.exit(1)

    # Dataset populating/loading (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['training_config']))
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(prepro_folder["training"],
                                   input_image_dir,
                                   nb_images=args.nb_training_image,
                                   aggregate=args.aggregate_label,
                                   nb_processes=int(
                                       config.get("running", "processes")))
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['validation_config']))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(prepro_folder["validation"],
                                        input_image_dir,
                                        nb_images=args.nb_validation_image,