예제 #1
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)
    if (args.dataset in GEOGRAPHIC_DATASETS
            and (args.nb_training_image > 0 or args.nb_validation_image > 0)
            and args.nb_tiles_per_image is None):
        raise ValueError(
            "The amount of tiles per image must be specified for "
            f"the {args.dataset} dataset, if training and/or validation images "
            "are required. See 'deepo datagen -h' for more details.")

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        raise ValueError(
            f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}"
        )

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["training_config"],
            )
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["validation_config"],
            )
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["testing_config"],
            )
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
예제 #2
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        logger.error(
            "Unsupported dataset type. Please choose amongst %s",
            AVAILABLE_DATASETS,
        )
        sys.exit(1)

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["training_config"],
            ))
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["validation_config"],
            ))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["testing_config"],
            ))
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
    sys.exit(0)
예제 #3
0
                                    args.nb_validation_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['validation_config']))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(prepro_folder["validation"],
                                        input_image_dir,
                                        nb_images=args.nb_validation_image,
                                        aggregate=args.aggregate_label)
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info(("No existing configuration file for this dataset. "
                         "Create %s.", prepro_folder['testing_config']))
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(prepro_folder["testing"],
                                  input_image_dir,
                                  nb_images=args.nb_testing_image,
                                  aggregate=args.aggregate_label,
                                  labelling=False)
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
    sys.exit(0)