def loadRaster(raster_file, md_dest=None): ''' Load a raster dataset, and optionally reproject Args: raster_file: Path to a GeoTiff or .vrt file. md_dest: A metadata file from sen2mosaic.Metadata(). Returns: A numpy array ''' # Load landcover map ds_source = gdal.Open(raster_file, 0) # If no reprojection required, return array if md_dest is None: return ds_source.GetRasterBand(1) # Else reproject else: geo_t = ds_source.GetGeoTransform() # Get extent and resolution of input raster nrows = ds_source.RasterXSize ncols = ds_source.RasterYSize ulx = float(geo_t[0]) uly = float(geo_t[3]) xres = float(geo_t[1]) yres = float(geo_t[5]) lrx = ulx + (xres * ncols) lry = uly + (yres * nrows) extent = [ulx, lry, lrx, uly] # Get EPSG proj = ds_source.GetProjection() srs = osr.SpatialReference(wkt=proj) srs.AutoIdentifyEPSG() EPSG = int(srs.GetAttrValue("AUTHORITY", 1)) # Add source metadata to a dictionary md_source = sen2mosaic.Metadata(extent, xres, EPSG) # Build an empty destination dataset ds_dest = createGdalDataset( md_dest, nodata=ds_source.GetRasterBand(1).GetNoDataValue(), dtype=1) # And reproject landcover dataset to match input image im_rep = np.squeeze( _reprojectImage(ds_source, ds_dest, md_source, md_dest)) return im_rep
def main(source_files, target_extent, resolution, EPSG_code, training_data, forest_values, nonforest_values, level = '2A', field_name = '', n_processes = 1, max_images = 0, max_pixels = 5000, output_dir = os.getcwd(), output_name = 'S2', verbose = True): '''main(source_files, training_data, target_extent, resolution, EPSG_code, n_processes = 1, max_pixels = 5000, output_dir = os.getcwd(), output_name = 'S2') Extract pixel values from source_files and output as a np.savez() file. This is the function that is initiated from the command line. Args: source_files: A list of directories for Sentinel-2 input tiles. target_extent: Extent of search area, in format [xmin, ymin, xmax, ymax] resolution: Resolution to re-sample search area, in meters. Best to be 10 m, 20 m or 60 m to match Sentinel-2 resolution. EPSG_code: EPSG code of search area. training_data: A GeoTiff, .vrt of .shp file containing training pixels/polygons. forest_values: A list of raster classes (integers) or shapefile attribute values (str) indicating forest in training_data. nonforest_values: A list of raster classes (integers) or shapefile attribute values (str) indicating nonforest in training_data. field_name: Shapefile attribute under which forest_values and nonforest_values can be found n_processes: Number of processes, defaults to 1. max_images: Maximum number of input tiles to extract data from. Defaults to 0, meaning all valid tiles. max_pixels: Maximum number of pixels to extract for each class from each image. Defaults to 5000. output_dir: Directory to output classifier predictors. Defaults to current working directory. output_name: Name to precede output file. Defaults to 'S2'. ''' assert type(n_processes) == int and n_processes > 0, "n_processes must be an integer > 0." # Get output metadata md_dest = sen2mosaic.Metadata(target_extent, resolution, EPSG_code) # Load and sort input scenes scenes = sen2mosaic.IO.loadSceneList(source_files, md_dest = md_dest, level = level, sort_by = 'date')#, verbose = verbose) # Reduce number of inputs to max_images if max_images > 0 and len(scenes) > max_images: scenes = [scenes[i] for i in sorted(random.sample(range(len(scenes)), max_images))] assert len(scenes) > 0, "No valid input files found at specified location." # Extract pixel values forest_px, nonforest_px = deforest.extraction.extractData(scenes, training_data, md_dest, forest_values, nonforest_values, field = field_name, subset = max_pixels, n_processes = n_processes, output = True, output_dir = output_dir, output_name = output_name, verbose = verbose)
def _extractData(input_list): ''' Multiprocessing requires some gymnastics. This is a wrapper function to initiate extractData() for multiprocessing. Args: input_list: A list of inputs for a single source_file, in the format: [source_file. trainging_data, target_extent, resolution, EPSG_code, subset]. Returns: A tuple with (a list of forest pixel values, a list of nonforest pixel values) ''' source_file = input_list[0] s2_res = input_list[1] training_data = input_list[2] target_extent = input_list[3] resolution = input_list[4] EPSG_code = input_list[5] forest_values = input_list[6] nonforest_values = input_list[7] field_name = input_list[8] subset = input_list[9] verbose = input_list[10] # Load input scene md_dest = sen2mosaic.Metadata(target_extent, resolution, EPSG_code) # Load scene with sen2mosaic scene = sen2mosaic.core.LoadScene(source_file, resolution=s2_res) return extractData([scene], training_data, md_dest, forest_values, nonforest_values, field=field_name, subset=subset)