def test_tanzania_dataset_creation(tanzania_image_size, tanzania_nb_labels): """Create a Tanzania dataset """ d = TanzaniaDataset(tanzania_image_size) assert d.image_size == tanzania_image_size assert d.get_nb_labels() == tanzania_nb_labels assert d.get_nb_images() == 0
def test_tanzania_testing_dataset_population( tanzania_image_size, tanzania_testing_temp_dir, tanzania_raw_sample, tanzania_nb_images, tanzania_testing_config, tanzania_nb_labels, tanzania_nb_output_testing_images, ): """Populate a Tanzania dataset """ d = TanzaniaDataset(tanzania_image_size) d.populate( str(tanzania_testing_temp_dir), tanzania_raw_sample, nb_images=tanzania_nb_images, labelling=False, ) d.save(str(tanzania_testing_config)) assert d.get_nb_labels() == tanzania_nb_labels assert d.get_nb_images() == tanzania_nb_output_testing_images assert os.path.isfile(str(tanzania_testing_config)) assert (len( os.listdir(os.path.join(str(tanzania_testing_temp_dir), "images"))) == d.get_nb_images())
def test_tanzania_training_dataset_population( tanzania_image_size, tanzania_training_temp_dir, tanzania_raw_sample, tanzania_nb_images, nb_tiles_per_image, tanzania_training_config, tanzania_nb_labels, tanzania_nb_output_training_images, ): """Populate a Tanzania dataset """ d = TanzaniaDataset(tanzania_image_size) d.populate( str(tanzania_training_temp_dir), tanzania_raw_sample, nb_images=tanzania_nb_images, nb_tiles_per_image=nb_tiles_per_image, ) d.save(str(tanzania_training_config)) assert d.get_nb_labels() == tanzania_nb_labels assert d.get_nb_images() >= 0.1 * tanzania_nb_output_training_images assert d.get_nb_images() <= tanzania_nb_output_training_images + 3 assert os.path.isfile(str(tanzania_training_config)) assert all( len(os.listdir(os.path.join(str(tanzania_training_temp_dir), tmp_dir))) == d.get_nb_images() for tmp_dir in ["images", "labels"])
def test_tanzania_testing_dataset_loading(tanzania_image_size, tanzania_testing_config, tanzania_nb_labels, tanzania_nb_output_testing_images): """Load images into a Tanzania dataset """ d = TanzaniaDataset(tanzania_image_size) d.load(tanzania_testing_config) assert d.get_nb_labels() == tanzania_nb_labels assert d.get_nb_images() == tanzania_nb_output_testing_images
def main(args): # Data path and repository management input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size) # Dataset creation if args.dataset == "mapillary": config_path = os.path.join(input_folder, "config_aggregate.json") train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: logger.error( "Unsupported dataset type. Please choose amongst %s", AVAILABLE_DATASETS, ) sys.exit(1) # Dataset populating/loading # (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["training_config"], )) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate( prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, nb_processes=int(config.get("running", "processes")), ) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["validation_config"], )) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate( prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, nb_processes=int(config.get("running", "processes")), ) validation_dataset.save(prepro_folder["validation_config"]) if args.nb_testing_image > 0: if os.path.isfile(prepro_folder["testing_config"]): test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["testing_config"], )) input_image_dir = os.path.join(input_folder, "testing") test_dataset.populate( prepro_folder["testing"], input_image_dir, nb_images=args.nb_testing_image, labelling=False, nb_processes=int(config.get("running", "processes")), ) test_dataset.save(prepro_folder["testing_config"]) glossary = pd.DataFrame(train_dataset.labels) glossary["popularity"] = train_dataset.get_label_popularity() logger.info("Data glossary:\n%s", glossary) sys.exit(0)
def main(args): # Data path and repository management input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size) if (args.dataset in GEOGRAPHIC_DATASETS and (args.nb_training_image > 0 or args.nb_validation_image > 0) and args.nb_tiles_per_image is None): raise ValueError( "The amount of tiles per image must be specified for " f"the {args.dataset} dataset, if training and/or validation images " "are required. See 'deepo datagen -h' for more details.") # Dataset creation if args.dataset == "mapillary": config_path = os.path.join(input_folder, "config_aggregate.json") train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: raise ValueError( f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}" ) # Dataset populating/loading # (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["training_config"], ) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate( prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, nb_processes=int(config.get("running", "processes")), nb_tiles_per_image=args.nb_tiles_per_image, ) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["validation_config"], ) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate( prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, nb_processes=int(config.get("running", "processes")), nb_tiles_per_image=args.nb_tiles_per_image, ) validation_dataset.save(prepro_folder["validation_config"]) if args.nb_testing_image > 0: if os.path.isfile(prepro_folder["testing_config"]): test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["testing_config"], ) input_image_dir = os.path.join(input_folder, "testing") test_dataset.populate( prepro_folder["testing"], input_image_dir, nb_images=args.nb_testing_image, labelling=False, nb_processes=int(config.get("running", "processes")), ) test_dataset.save(prepro_folder["testing_config"]) glossary = pd.DataFrame(train_dataset.labels) glossary["popularity"] = train_dataset.get_label_popularity() logger.info("Data glossary:\n%s", glossary)
config_path = os.path.join(input_folder, config_name) train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: logger.error("Unsupported dataset type. Please choose amongst %s", AVAILABLE_DATASETS) sys.exit(1) # Dataset populating/loading (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info(("No existing configuration file for this dataset. " "Create %s.", prepro_folder['training_config']))