예제 #1
0
def test_tanzania_dataset_creation(tanzania_image_size, tanzania_nb_labels):
    """Create a Tanzania dataset
    """
    d = TanzaniaDataset(tanzania_image_size)
    assert d.image_size == tanzania_image_size
    assert d.get_nb_labels() == tanzania_nb_labels
    assert d.get_nb_images() == 0
예제 #2
0
def test_tanzania_testing_dataset_population(
    tanzania_image_size,
    tanzania_testing_temp_dir,
    tanzania_raw_sample,
    tanzania_nb_images,
    tanzania_testing_config,
    tanzania_nb_labels,
    tanzania_nb_output_testing_images,
):
    """Populate a Tanzania dataset
    """
    d = TanzaniaDataset(tanzania_image_size)
    d.populate(
        str(tanzania_testing_temp_dir),
        tanzania_raw_sample,
        nb_images=tanzania_nb_images,
        labelling=False,
    )
    d.save(str(tanzania_testing_config))
    assert d.get_nb_labels() == tanzania_nb_labels
    assert d.get_nb_images() == tanzania_nb_output_testing_images
    assert os.path.isfile(str(tanzania_testing_config))
    assert (len(
        os.listdir(os.path.join(str(tanzania_testing_temp_dir),
                                "images"))) == d.get_nb_images())
예제 #3
0
def test_tanzania_training_dataset_population(
    tanzania_image_size,
    tanzania_training_temp_dir,
    tanzania_raw_sample,
    tanzania_nb_images,
    nb_tiles_per_image,
    tanzania_training_config,
    tanzania_nb_labels,
    tanzania_nb_output_training_images,
):
    """Populate a Tanzania dataset
    """
    d = TanzaniaDataset(tanzania_image_size)
    d.populate(
        str(tanzania_training_temp_dir),
        tanzania_raw_sample,
        nb_images=tanzania_nb_images,
        nb_tiles_per_image=nb_tiles_per_image,
    )
    d.save(str(tanzania_training_config))
    assert d.get_nb_labels() == tanzania_nb_labels
    assert d.get_nb_images() >= 0.1 * tanzania_nb_output_training_images
    assert d.get_nb_images() <= tanzania_nb_output_training_images + 3
    assert os.path.isfile(str(tanzania_training_config))
    assert all(
        len(os.listdir(os.path.join(str(tanzania_training_temp_dir), tmp_dir)))
        == d.get_nb_images() for tmp_dir in ["images", "labels"])
예제 #4
0
def test_tanzania_testing_dataset_loading(tanzania_image_size,
                                          tanzania_testing_config,
                                          tanzania_nb_labels,
                                          tanzania_nb_output_testing_images):
    """Load images into a Tanzania dataset
    """
    d = TanzaniaDataset(tanzania_image_size)
    d.load(tanzania_testing_config)
    assert d.get_nb_labels() == tanzania_nb_labels
    assert d.get_nb_images() == tanzania_nb_output_testing_images
예제 #5
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        logger.error(
            "Unsupported dataset type. Please choose amongst %s",
            AVAILABLE_DATASETS,
        )
        sys.exit(1)

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["training_config"],
            ))
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["validation_config"],
            ))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["testing_config"],
            ))
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
    sys.exit(0)
예제 #6
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)
    if (args.dataset in GEOGRAPHIC_DATASETS
            and (args.nb_training_image > 0 or args.nb_validation_image > 0)
            and args.nb_tiles_per_image is None):
        raise ValueError(
            "The amount of tiles per image must be specified for "
            f"the {args.dataset} dataset, if training and/or validation images "
            "are required. See 'deepo datagen -h' for more details.")

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        raise ValueError(
            f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}"
        )

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["training_config"],
            )
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["validation_config"],
            )
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["testing_config"],
            )
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
예제 #7
0
        config_path = os.path.join(input_folder, config_name)
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        logger.error("Unsupported dataset type. Please choose amongst %s",
                     AVAILABLE_DATASETS)
        sys.exit(1)

    # Dataset populating/loading (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['training_config']))