def test_input_folder(datapath_repo): """Test the existence of the raw dataset directory when using ̀utils.prepare_input_folder` """ datapath = str(datapath_repo) dataset = "shapes" prepare_input_folder(datapath, dataset) assert os.path.isdir(os.path.join(datapath, dataset, "input"))
def get_image_features(datapath, dataset, filename): """Retrieve geotiff image features with GDAL Use the `GetGeoTransform` method, that provides the following values: + East/West location of Upper Left corner + East/West pixel resolution + 0.0 + North/South location of Upper Left corner + 0.0 + North/South pixel resolution A GDAL dataset is opened during the function execution. The corresponding variable is set to None at the end of the function so as to free memory. See GDAL documentation (https://www.gdal.org/gdal_tutorial.html) Parameters ---------- datapath : str dataset : str filename : str Name of the image file from which coordinates are extracted Returns ------- dict Bounding box of the image (west, south, east, north coordinates), srid, and size (in pixels) """ input_folder = utils.prepare_input_folder(datapath, dataset) filepath = os.path.join( input_folder, "testing", "images", filename + ".tif" ) ds = gdal.Open(filepath) width = ds.RasterXSize height = ds.RasterYSize gt = ds.GetGeoTransform() minx = gt[0] miny = gt[3] + height * gt[5] maxx = gt[0] + width * gt[1] maxy = gt[3] srid = int(ds.GetProjection().split('"')[-2]) ds = None # Free memory used by the GDAL Dataset return { "west": minx, "south": miny, "east": maxx, "north": maxy, "srid": srid, "width": width, "height": height, }
def main(args): # Data path and repository management input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size) # Dataset creation if args.dataset == "mapillary": config_path = os.path.join(input_folder, "config_aggregate.json") train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: logger.error( "Unsupported dataset type. Please choose amongst %s", AVAILABLE_DATASETS, ) sys.exit(1) # Dataset populating/loading # (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["training_config"], )) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate( prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, nb_processes=int(config.get("running", "processes")), ) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["validation_config"], )) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate( prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, nb_processes=int(config.get("running", "processes")), ) validation_dataset.save(prepro_folder["validation_config"]) if args.nb_testing_image > 0: if os.path.isfile(prepro_folder["testing_config"]): test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image) else: logger.info(( "No existing configuration file for this dataset. " "Create %s.", prepro_folder["testing_config"], )) input_image_dir = os.path.join(input_folder, "testing") test_dataset.populate( prepro_folder["testing"], input_image_dir, nb_images=args.nb_testing_image, labelling=False, nb_processes=int(config.get("running", "processes")), ) test_dataset.save(prepro_folder["testing_config"]) glossary = pd.DataFrame(train_dataset.labels) glossary["popularity"] = train_dataset.get_label_popularity() logger.info("Data glossary:\n%s", glossary) sys.exit(0)
help=("Number of validation images")) return parser if __name__ == '__main__': # Parse command-line arguments parser = argparse.ArgumentParser( description=("Convolutional Neural Netw" "ork on street-scene images")) parser = add_instance_arguments(parser) args = parser.parse_args() # Data path and repository management aggregate_value = "full" if not args.aggregate_label else "aggregated" input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size, aggregate_value) # Dataset creation if args.dataset == "mapillary": config_name = "config.json" if not args.aggregate_label else "config_aggregate.json" config_path = os.path.join(input_folder, config_name) train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size)
def main(args): # Data path and repository management input_folder = utils.prepare_input_folder(args.datapath, args.dataset) prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size) if (args.dataset in GEOGRAPHIC_DATASETS and (args.nb_training_image > 0 or args.nb_validation_image > 0) and args.nb_tiles_per_image is None): raise ValueError( "The amount of tiles per image must be specified for " f"the {args.dataset} dataset, if training and/or validation images " "are required. See 'deepo datagen -h' for more details.") # Dataset creation if args.dataset == "mapillary": config_path = os.path.join(input_folder, "config_aggregate.json") train_dataset = MapillaryDataset(args.image_size, config_path) validation_dataset = MapillaryDataset(args.image_size, config_path) test_dataset = MapillaryDataset(args.image_size, config_path) elif args.dataset == "shapes": train_dataset = ShapeDataset(args.image_size) validation_dataset = ShapeDataset(args.image_size) test_dataset = ShapeDataset(args.image_size) os.makedirs(os.path.join(prepro_folder["testing"], "labels"), exist_ok=True) elif args.dataset == "aerial": train_dataset = AerialDataset(args.image_size) validation_dataset = AerialDataset(args.image_size) test_dataset = AerialDataset(args.image_size) elif args.dataset == "tanzania": train_dataset = TanzaniaDataset(args.image_size) validation_dataset = TanzaniaDataset(args.image_size) test_dataset = TanzaniaDataset(args.image_size) else: raise ValueError( f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}" ) # Dataset populating/loading # (depends on the existence of a specification file) if args.nb_training_image > 0: if os.path.isfile(prepro_folder["training_config"]): train_dataset.load(prepro_folder["training_config"], args.nb_training_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["training_config"], ) input_image_dir = os.path.join(input_folder, "training") train_dataset.populate( prepro_folder["training"], input_image_dir, nb_images=args.nb_training_image, nb_processes=int(config.get("running", "processes")), nb_tiles_per_image=args.nb_tiles_per_image, ) train_dataset.save(prepro_folder["training_config"]) if args.nb_validation_image > 0: if os.path.isfile(prepro_folder["validation_config"]): validation_dataset.load(prepro_folder["validation_config"], args.nb_validation_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["validation_config"], ) input_image_dir = os.path.join(input_folder, "validation") validation_dataset.populate( prepro_folder["validation"], input_image_dir, nb_images=args.nb_validation_image, nb_processes=int(config.get("running", "processes")), nb_tiles_per_image=args.nb_tiles_per_image, ) validation_dataset.save(prepro_folder["validation_config"]) if args.nb_testing_image > 0: if os.path.isfile(prepro_folder["testing_config"]): test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image) else: logger.info( "No existing configuration file for this dataset. Create %s.", prepro_folder["testing_config"], ) input_image_dir = os.path.join(input_folder, "testing") test_dataset.populate( prepro_folder["testing"], input_image_dir, nb_images=args.nb_testing_image, labelling=False, nb_processes=int(config.get("running", "processes")), ) test_dataset.save(prepro_folder["testing_config"]) glossary = pd.DataFrame(train_dataset.labels) glossary["popularity"] = train_dataset.get_label_popularity() logger.info("Data glossary:\n%s", glossary)