def test_mapillary_dataset_population_without_labels( mapillary_image_size, mapillary_input_config, mapillary_sample_without_labels_dir, mapillary_nb_images, mapillary_temp_dir): """Fail at populating a Mapillary dataset without labelled images """ d = MapillaryDataset(mapillary_image_size, mapillary_input_config) with pytest.raises(FileNotFoundError) as excinfo: d.populate(str(mapillary_temp_dir), mapillary_sample_without_labels_dir, nb_images=mapillary_nb_images) assert str( excinfo.value).split(':')[0] == "[Errno 2] No such file or directory"
def test_mapillary_dataset_population(mapillary_image_size, mapillary_raw_sample, mapillary_nb_images, mapillary_nb_labels, mapillary_input_config, mapillary_config, mapillary_temp_dir): """Populate a Mapillary dataset """ d = MapillaryDataset(mapillary_image_size, mapillary_input_config) d.populate(str(mapillary_temp_dir), mapillary_raw_sample, nb_images=mapillary_nb_images) d.save(str(mapillary_config)) assert d.get_nb_labels(see_all=True) == mapillary_nb_labels assert d.get_nb_images() == mapillary_nb_images assert os.path.isfile(str(mapillary_config)) assert all( len(os.listdir(os.path.join(str(mapillary_temp_dir), tmp_dir))) == mapillary_nb_images for tmp_dir in ["images", "labels"])
def main(args): # Data path and repository management input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size) # Dataset creation if args.dataset == "mapillary": config_path = os.path.join(input_folder, "config_aggregate.json") train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: logger.error( "Unsupported dataset type. Please choose amongst %s", AVAILABLE_DATASETS, ) sys.exit(1) # Dataset populating/loading # (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["training_config"], )) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate( prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, nb_processes=int(config.get("running", "processes")), ) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["validation_config"], )) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate( prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, nb_processes=int(config.get("running", "processes")), ) validation_dataset.save(prepro_folder["validation_config"]) if args.nb_testing_image > 0: if os.path.isfile(prepro_folder["testing_config"]): test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["testing_config"], )) input_image_dir = os.path.join(input_folder, "testing") test_dataset.populate( prepro_folder["testing"], input_image_dir, nb_images=args.nb_testing_image, labelling=False, nb_processes=int(config.get("running", "processes")), ) test_dataset.save(prepro_folder["testing_config"]) glossary = pd.DataFrame(train_dataset.labels) glossary["popularity"] = train_dataset.get_label_popularity() logger.info("Data glossary:\n%s", glossary) sys.exit(0)
def main(args): # Data path and repository management input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size) if (args.dataset in GEOGRAPHIC_DATASETS and (args.nb_training_image > 0 or args.nb_validation_image > 0) and args.nb_tiles_per_image is None): raise ValueError( "The amount of tiles per image must be specified for " f"the {args.dataset} dataset, if training and/or validation images " "are required. See 'deepo datagen -h' for more details.") # Dataset creation if args.dataset == "mapillary": config_path = os.path.join(input_folder, "config_aggregate.json") train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: raise ValueError( f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}" ) # Dataset populating/loading # (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["training_config"], ) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate( prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, nb_processes=int(config.get("running", "processes")), nb_tiles_per_image=args.nb_tiles_per_image, ) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["validation_config"], ) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate( prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, nb_processes=int(config.get("running", "processes")), nb_tiles_per_image=args.nb_tiles_per_image, ) validation_dataset.save(prepro_folder["validation_config"]) if args.nb_testing_image > 0: if os.path.isfile(prepro_folder["testing_config"]): test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["testing_config"], ) input_image_dir = os.path.join(input_folder, "testing") test_dataset.populate( prepro_folder["testing"], input_image_dir, nb_images=args.nb_testing_image, labelling=False, nb_processes=int(config.get("running", "processes")), ) test_dataset.save(prepro_folder["testing_config"]) glossary = pd.DataFrame(train_dataset.labels) glossary["popularity"] = train_dataset.get_label_popularity() logger.info("Data glossary:\n%s", glossary)
else: logger.error("Unsupported dataset type. Please choose amongst %s", AVAILABLE_DATASETS) sys.exit(1) # Dataset populating/loading (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info(("No existing configuration file for this dataset. " "Create %s.", prepro_folder['training_config'])) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate(prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, aggregate=args.aggregate_label) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info(("No existing configuration file for this dataset. " "Create %s.", prepro_folder['validation_config'])) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate(prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, aggregate=args.aggregate_label)
logger.error("Unsupported dataset type. Please choose amongst %s", AVAILABLE_DATASETS) sys.exit(1) # Dataset populating/loading (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info(("No existing configuration file for this dataset. " "Create %s.", prepro_folder['training_config'])) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate(prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, aggregate=args.aggregate_label, nb_processes=int( config.get("running", "processes"))) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info(("No existing configuration file for this dataset. " "Create %s.", prepro_folder['validation_config'])) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate(prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image,