def test_data_window_masked_file(): with rasterio.open('tests/data/RGB.byte.tif') as src: window = windows.get_data_window(src.read(1, masked=True)) assert window == windows.Window.from_slices((3, 714), (13, 770)) window = windows.get_data_window(src.read(masked=True)) assert window == windows.Window.from_slices((3, 714), (13, 770))
def test_data_window_nodata(data): window = windows.get_data_window(data, nodata=0) assert window == windows.Window.from_ranges(*DATA_WINDOW) window = windows.get_data_window(np.ones_like(data), nodata=0) assert window == windows.Window.from_ranges((0, data.shape[0]), (0, data.shape[1]))
def test_data_window_masked_file(): with rasterio.open('tests/data/RGB.byte.tif') as src: window = windows.get_data_window(src.read(1, masked=True)) assert window == windows.Window.from_ranges((3, 714), (13, 770)) window = windows.get_data_window(src.read(masked=True)) assert window == windows.Window.from_ranges((3, 714), (13, 770))
def test_data_window_maskedarray(): """Get window of masked arr.""" arr = np.ones((3, 3)) arr[0, :] = 0 arr = np.ma.masked_array(arr, arr == 0) window = get_data_window(arr) assert window == Window.from_slices((1, 3), (0, 3))
def test_data_window_nodata_disjunct(): data = np.zeros((3, 10, 10), dtype='uint8') data[0, :4, 1:4] = 1 data[1, 2:5, 2:8] = 1 data[2, 1:6, 1:6] = 1 window = windows.get_data_window(data, nodata=0) assert window == windows.Window.from_slices((0, 6), (1, 8))
def test_data_window_nodata_disjunct(): data = np.zeros((3, 10, 10), dtype='uint8') data[0, :4, 1:4] = 1 data[1, 2:5, 2:8] = 1 data[2, 1:6, 1:6] = 1 window = windows.get_data_window(data, nodata=0) assert window == windows.Window.from_ranges((0, 6), (1, 8))
def test_data_window_unmasked(data, recwarn): warnings.simplefilter('always') old = get_data_window(data) assert len(recwarn) == 1 assert recwarn.pop(DeprecationWarning) new = windows.get_data_window(data) assert len(recwarn) == 0 assert old == new
def load_from_geotif(src, band, roi=None): if roi is None: roi = get_data_window(src.read(band, masked=True)) img = src.read(band, window=roi) width = src.width height = src.height transform = src.transform return img, (width, height), transform
def main(cadastre_shp_filepath, agglom_extent_filepath, dst_filepath, dst_res, dst_nodata, dst_dtype): logger = logging.getLogger(__name__) cadastre_gdf = gpd.read_file(cadastre_shp_filepath, bbox=(WEST, SOUTH, EAST, NORTH)) # rasterize the cadastre cadastre_arr, cadastre_transform = rasterize_cadastre( cadastre_gdf, dst_res, dst_nodata, dst_dtype) logger.info("rasterized cadastre vector LULC dataset to shape %s", str(cadastre_arr.shape)) # TODO; crop it to the extent agglom_extent_geom_nodata = 0 agglom_extent_geom = gpd.read_file( agglom_extent_filepath)['geometry'].iloc[:1] agglom_extent_mask = features.rasterize(agglom_extent_geom, out_shape=cadastre_arr.shape, fill=agglom_extent_geom_nodata, transform=cadastre_transform) # get window and transform of valid data points, i.e., the computed extent extent_window = windows.get_data_window(agglom_extent_mask, nodata=agglom_extent_geom_nodata) extent_transform = windows.transform(extent_window, cadastre_transform) dst_arr = np.where(agglom_extent_mask, cadastre_arr, dst_nodata)[windows.window_index(extent_window)] # dump it with rio.open( dst_filepath, 'w', driver='GTiff', width=extent_window.width, height=extent_window.height, count=1, crs=CRS, # cadastre_gdf.crs transform=extent_transform, dtype=dst_dtype, nodata=dst_nodata) as dst: dst.write(dst_arr, 1) logger.info("dumped rasterized dataset to %s", dst_filepath)
def get_data_window(arr, nodata=None): warnings.warn("Deprecated; Use rasterio.windows instead", DeprecationWarning) return windows.get_data_window(arr, nodata)
def test_data_window_masked(data): data = np.ma.masked_array(data, data == 0) window = windows.get_data_window(data) assert window == DATA_WINDOW
def test_data_window_masked(data): data = np.ma.masked_array(data, data == 0) window = windows.get_data_window(data) assert window == windows.Window.from_ranges(*DATA_WINDOW)
def test_data_window_nodata(): """Get window of arr with nodata.""" arr = np.ones((3, 3)) arr[0, :] = 0 window = get_data_window(arr, nodata=0) assert window == ((1, 3), (0, 3))
def test_data_window_unmasked(data): window = windows.get_data_window(data) assert window == ((0, data.shape[0]), (0, data.shape[1]))
# if masks are all false for this window, we can skip this step if not np.asarray(masks).sum(): continue # stack mask bits and layer_bits along inner dimension so that we have a shape of # (height, width, total_bits) data_bits = np.dstack(masks + layer_bits) # packbits must be in little order to read the whole array properly in JS packed = np.squeeze(np.packbits(data_bits, axis=-1, bitorder="little")) window_shape = (window.height, window.width) # fill remaining bytes up to dtype bytes fill = np.zeros(shape=window_shape, dtype="uint8") encoded = np.dstack([packed] + ([fill] * (num_bytes - packed.shape[-1]))) out[window.toslices()] = encoded.view(dtype).reshape(window_shape) # determine the window where data are available, and write out a smaller output print("Calculating data window...") data_window = get_data_window(out, nodata=0) out = out[data_window.toslices()] transform = blueprint.window_transform(data_window) print("Writing GeoTIFF...") outfilename = out_dir / f"indicators_{i}.tif" write_raster(outfilename, out, transform=transform, crs=blueprint.crs, nodata=0) add_overviews(outfilename)
def test_data_window_nodata_3d(): """Get window of 3d arr with nodata.""" arr = np.ones((3, 3, 3)) arr[:, 0, :] = 0 window = get_data_window(arr, nodata=0) assert window == Window.from_slices((1, 3), (0, 3))
def test_data_window_masked(data): data = np.ma.masked_array(data, data == 0) window = windows.get_data_window(data) assert window == windows.Window.from_slices(*DATA_WINDOW)
def test_data_window_novalid(): """Get window of arr with nodata.""" arr = np.ones((3, 3)) arr[:, :] = 0 window = get_data_window(arr, nodata=0) assert window == Window.from_slices((0, 0), (0, 0))
def test_data_window_full(): """Get window of entirely valid data array.""" arr = np.ones((3, 3)) window = get_data_window(arr) assert window == Window.from_slices((0, 3), (0, 3))
def test_data_window_invalid_arr_dims(): """An array of more than 3 dimensions is invalid.""" arr = np.ones((3, 3, 3, 3)) with pytest.raises(WindowError): get_data_window(arr)
def stack_and_clip_rasters(raster_files, use_common=True, output_tif=None): """Combine multiple single band files into a single multi band raster where one band represents one file. The filename will be saved in the bands tag. Optionally a minimum common area mask can be applied. All input rasters MUST be of the same coordinate system and pixel size. Args: raster_files (List[str]): list of raster files to process use_common (bool): if true input rasters will be masked to the minimum common data area output_tif (str): output tif name. if omitted will be created in TEMPDIR Returns: str(): The output tif file list(): A list of the band tags. (ie order of allocation to bands) """ # if output_tif doesn't include a path then add tempdir as well as overwriting it if output_tif is not None and not os.path.isabs(output_tif): output_tif = os.path.join(TEMPDIR, output_tif) if output_tif is None or config.get_debug_mode(): # get a unique name, it will create, open the file and delete after saving the variable with NamedTemporaryFile(prefix='{}_'.format( inspect.getframeinfo(inspect.currentframe())[2]), suffix='.tif', dir=TEMPDIR) as new_file: output_tif = new_file.name del new_file if not isinstance(raster_files, list): raise TypeError('Invalid Type: raster_files should be a list') if len(raster_files) == 0: raise TypeError('Invalid Type: Empty list of raster files') not_exists = [ my_file for my_file in raster_files if not os.path.exists(my_file) ] if len(not_exists) > 0: raise IOError('raster_files: {} raster file(s) do ' 'not exist\n\t({})'.format(len(not_exists), '\n\t'.join(not_exists))) check_pixelsize = [] check_crs = [] for ea_raster in raster_files: with rasterio.open(ea_raster) as src: if src.crs is None: check_crs.append(ea_raster) if src.res not in check_pixelsize: check_pixelsize.append(src.res) if len(check_pixelsize) == 1: resolution = check_pixelsize[0] else: raise TypeError( "raster_files are of different pixel sizes - {}".format( list(set(check_pixelsize)))) if len(check_crs) > 0: raise TypeError("{} raster(s) don't have coordinates " "systems assigned \n\t{}".format( len(check_crs), '\n\t'.join(check_crs))) start_time = time.time() step_time = time.time() # Make sure ALL masks are saved inside the TIFF file not as a sidecar. gdal.SetConfigOption('GDAL_TIFF_INTERNAL_MASK', 'YES') try: # Find minimum overlapping extent of all rasters ------------------------------------ for i, ea_raster in enumerate(raster_files, start=1): with rasterio.open(ea_raster) as src: band1 = src.read(1, masked=True) # get minimum data extent data_window = get_data_window(band1) # find the intersection of all the windows if i == 1: min_window = data_window else: # create a new window using the last coordinates based on this image in case # extents/pixel origins are different min_img_window = from_bounds(*min_bbox, transform=src.transform) # find the intersection of the windows. min_window = intersection( min_img_window, data_window).round_lengths('ceil') # convert the window co coordinates min_bbox = src.window_bounds(min_window) del min_window if config.get_debug_mode(): LOGGER.info('{:<30} {:<15} {dur} {}'.format( 'Found Common Extent', '', min_bbox, dur=timedelta(seconds=time.time() - step_time))) except rasterio.errors.WindowError as e: # reword 'windows do not intersect' error message if not e.args: e.args = ('', ) e.args = ("Rasters (Images) do not overlap", ) raise # re-raise current exception # Create the metadata for the overlapping extent image. --------------------------------------- transform, width, height, bbox = create_raster_transform( min_bbox, resolution[0], buffer_by_pixels=5) # update the new image metadata --------------------------------------------------------------- kwargs = rasterio.open(raster_files[0]).meta.copy() kwargs.update({ 'count': len(raster_files), 'nodata': -9999, 'height': height, 'width': width, 'transform': transform }) ''' combine individual rasters into one multi-band tiff using reproject will fit each raster to the same pixel origin will crop to the min bbox standardise the nodata values apply nodata values to entire image including internal blocks''' with rasterio.open(output_tif, 'w', **kwargs) as dst: band_list = [] for i, ea_raster in enumerate(raster_files, start=1): image = os.path.basename(os.path.splitext(ea_raster)[0]) band_list.append(image) with rasterio.open(ea_raster) as src: reproject(source=rasterio.band(src, 1), destination=rasterio.band(dst, i), src_transform=src.transform, dst_transform=transform, resampling=Resampling.nearest) dst.update_tags(i, **{'name': image}) dst.descriptions = band_list if config.get_debug_mode(): LOGGER.info('{:<30} {:<15} {dur}'.format( 'Rasters Combined', '', dur=timedelta(seconds=time.time() - step_time))) # find the common data area ------------------------------------------------------- if use_common: with rasterio.open(output_tif, 'r+') as src: # find common area across all bands as there maybe internal nodata values in some bands. mask = [] # loop through all all the masks for ea_mask in src.read_masks(): if len(mask) == 0: mask = ea_mask else: mask = mask & ea_mask # change mask values to 0 = nodata, 1 = valid data mask[mask == 255] = 1 # apply mask to all bands of file src.write_mask(mask) if config.get_debug_mode(): # write mask to new file tmp_file = os.path.join( TEMPDIR, output_tif.replace('.tif', '_commonarea.tif')) kwargs_tmp = kwargs.copy() kwargs_tmp.update({ 'count': 1, 'nodata': 0, 'dtype': rasterio.dtypes.get_minimum_dtype(mask) }) with rasterio.open(tmp_file, 'w', **kwargs_tmp) as tmp_dst: tmp_dst.write(mask, 1) LOGGER.info('{:<30} {:<15} {dur}'.format( 'Rasters Combined and clipped', '', dur=timedelta(seconds=time.time() - start_time))) gdal.SetConfigOption('GDAL_TIFF_INTERNAL_MASK', None) return output_tif, band_list
def handle(self, zoneset, variables, clear, *args, **kwargs): if zoneset is None or zoneset.strip() == "": sources = ZoneSource.objects.all().order_by("name") if len(sources) == 0: raise CommandError("No zonesets available to analyze") else: sources = ZoneSource.objects.filter(name__in=zoneset.split(",")).order_by("name") if len(sources) == 0: raise CommandError("No zonesets available to analyze that match --zones values") if variables is None: variables = VARIABLES else: variables = [v for v in variables.split(",") if v in set(VARIABLES)] if len(variables) == 0: raise CommandError("No variables available to analyze that match --variables values") existing_limits = TransferLimit.objects.filter(zone__zone_source__in=[s.id for s in sources]) if clear: message = "WARNING: This will replace ALL transfer limits. Do you want to continue? [y/n]" if input(message).lower() not in {"y", "yes"}: return TransferLimit.objects.all().delete() elif existing_limits.exists(): message = 'WARNING: This will replace "{}" transfer limits. Do you want to continue? [y/n]'.format( [s.name for s in sources] ) if input(message).lower() not in {"y", "yes"}: return existing_limits.delete() for time_period in PERIODS: self.transfers_by_source = {} for source in sources: zones = source.seedzone_set.all().order_by("zone_id") with ZoneConfig(source.name) as config, ElevationDataset() as elevation_ds, ClimateDatasets( period=time_period, variables=variables ) as climate: for zone in Bar( "Processing {} zones for {}".format(source.name, time_period), max=source.seedzone_set.count(), ).iter(zones): regions = get_regions_for_zone(zone) if elevation_ds.region in regions: region = regions.pop(regions.index(elevation_ds.region)) else: try: region = regions.pop(0) except IndexError: raise CommandError( "The following seedzone has no suitable region: {}".format(zone.zone_uid) ) while True: elevation_ds.load_region(region.name) climate.load_region(region.name) window, coords = elevation_ds.get_read_window(zone.polygon.extent) transform = coords.affine elevation = elevation_ds.data[window] zone_mask = rasterize( (json.loads(zone.polygon.geojson),), out_shape=elevation.shape, transform=transform, fill=1, # mask is True OUTSIDE the zone default_value=0, dtype=numpy.dtype("uint8"), ).astype("bool") # if zone_mask is empty (all True), try again with all_touched=True if zone_mask.all(): zone_mask = rasterize( (json.loads(zone.polygon.geojson),), out_shape=elevation.shape, transform=transform, fill=1, # mask is True OUTSIDE the zone default_value=0, dtype=numpy.dtype("uint8"), all_touched=True, ).astype("bool") if zone_mask.all(): break # extract all data not masked out as nodata or outside zone nodata_mask = elevation == elevation_ds.nodata_value mask = nodata_mask | zone_mask # Create a 2D array for extracting to new dataset, in integer feet elevation2d = ( numpy.where(~mask, elevation / 0.3048, elevation_ds.nodata_value).round().astype("int") ) # Create a 1D array for quantitative analysis, in integer feet elevation = (elevation[~mask] / 0.3048).round().astype("int") if elevation.size == 0 and regions: region = regions.pop(0) continue break # if still note, this is not really a valid zone if zone_mask.all(): continue # If there are no pixels in the mask, skip this zone if elevation.size == 0: continue min_elevation = max(math.floor(numpy.nanmin(elevation)), 0) max_elevation = math.ceil(numpy.nanmax(elevation)) bands = list(config.get_elevation_bands(zone, min_elevation, max_elevation)) if not bands: # min / max elevation outside defined bands continue for band in bands: low, high = band[:2] band_data_mask = (elevation >= low) & (elevation <= high) if not numpy.any(band_data_mask): continue # extract 2D version of elevation within the band band_elevation2d = numpy.where( (elevation2d != elevation_ds.nodata_value) & (elevation2d >= low) & (elevation2d <= high), elevation2d, elevation_ds.nodata_value, ) # extract data window for a smaller output dataset band_window = ( windows.get_data_window(band_elevation2d, elevation_ds.nodata_value) .round_offsets(op="floor") .round_lengths(op="ceil") ) if band_window.height > 1 and band_window.width > 1: band_elevation2d = band_elevation2d[band_window.toslices()] band_coords = coords.slice_by_window(Window(*band_window.toslices())) else: # if band is too small, just use the original mask band_coords = coords elevation_service = self._create_elevation_service( zone, band, band_elevation2d, elevation_ds.nodata_value, band_coords, ) for variable, ds in climate.items(): # extract data with same shape as elevation above data = ds.data[window][~mask] # extract data within elevation range band_data = data[band_data_mask] # then apply variable's nodata mask band_data = band_data[band_data != ds.nodata_value] # no data in band, skip this band if not band_data.size: continue self._write_limit( variable, time_period, zone, band_data, band, elevation_service, ) # Calculate average transfer limit across zones in source for variable in variables: transfers = TransferLimit.objects.filter( zone__zone_source=source, variable=variable, time_period=time_period, ).all() avg_transfer = transfers.aggregate(avg_transfer=Avg("transfer"))["avg_transfer"] transfers.update(avg_transfer=avg_transfer)
def test_data_window_unmasked(data): window = windows.get_data_window(data) assert window == windows.Window.from_slices((0, data.shape[0]), (0, data.shape[1]))
def test_data_window_nodata(data): window = windows.get_data_window(data, nodata=0) assert window == windows.Window.from_slices(*DATA_WINDOW) window = windows.get_data_window(np.ones_like(data), nodata=0) assert window == windows.Window.from_slices((0, data.shape[0]), (0, data.shape[1]))
def test_data_window_empty_result(): data = np.zeros((3, 10, 10), dtype='uint8') window = windows.get_data_window(data, nodata=0) assert window == windows.Window.from_slices((0, 0), (0, 0))
def test_data_window_nodata(data): window = windows.get_data_window(data, nodata=0) assert window == DATA_WINDOW window = windows.get_data_window(np.ones_like(data), nodata=0) assert window == ((0, data.shape[0]), (0, data.shape[1]))
def test_data_window_unmasked(data): window = windows.get_data_window(data) assert window == windows.Window.from_ranges((0, data.shape[0]), (0, data.shape[1]))
def to_netcdf( files, output, variable, dtype, src_crs, x_name, y_name, z_name, datetime_pattern, netcdf3, compress, packed, xy_dtype, # z_dtype, calendar, autocrop): """ Convert rasters to NetCDF and stack them according to a dimension. X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y otherwise) unless specified. Will overwrite an existing NetCDF file. Only the first band of the input will be turned into a NetCDF file. """ # TODO: add format string template to this to parse out components filenames = list(glob.glob(files)) if not filenames: raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES') z_values = [] if datetime_pattern is not None: datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames) # Sort both datimes and filenames by datetimes z_values, filenames = [list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0)))] items = tuple(enumerate(filenames)) has_z = len(filenames) > 1 if has_z and not z_name: raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z') if src_crs: src_crs = CRS.from_string(src_crs) template_ds = rasterio.open(filenames[0]) src_crs = template_ds.crs or src_crs if not src_crs: raise click.BadParameter('Required when no CRS information available in source files', param='--src-crs', param_hint='--src-crs') prj = Proj(**src_crs.to_dict()) bounds = template_ds.bounds width = template_ds.width height = template_ds.height window = None src_dtype = numpy.dtype(template_ds.dtypes[0]) dtype = numpy.dtype(dtype) if dtype else src_dtype if dtype == src_dtype: fill_value = template_ds.nodata if src_dtype.kind in ('u', 'i'): # nodata always comes from rasterio as floating point fill_value = int(fill_value) else: fill_value = get_fill_value(dtype) x_name = x_name or ('lon' if src_crs.is_geographic else 'x') y_name = y_name or ('lat' if src_crs.is_geographic else 'y') var_kwargs = { 'fill_value': fill_value } format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' with Dataset(output, 'w', format=format) as out: if packed or autocrop: mins = [] maxs = [] windows = [] click.echo('Inspecting input datasets...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True) if packed: mins.append(data.min()) maxs.append(data.max()) if autocrop: data_window = get_data_window(data) if data_window != ((0, height), (0, width)): windows.append(data_window) if packed: min_value = min(mins) max_value = max(maxs) scale, offset = get_pack_atts(dtype, min_value, max_value) if autocrop and windows: window = union(windows) bounds = template_ds.window_bounds(window) height = window[0][1] - window[0][0] width = window[1][1] - window[1][0] coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype) coords.add_to_dataset(out, x_name, y_name, zlib=compress) var_dimensions = [y_name, x_name] shape = list(coords.shape) if has_z: shape.insert(0, len(filenames)) out.createDimension(z_name, shape[0]) var_dimensions.insert(0, z_name) if z_values: dates = DateVariable(numpy.array(z_values), units_start_date=z_values[0], calendar=calendar) dates.add_to_dataset(out, z_name) click.echo('Creating {0}:{1} with shape {2}'.format(output, variable, shape)) out_var = out.createVariable(variable, dtype, dimensions=var_dimensions, zlib=compress, **var_kwargs) set_crs(out, variable, prj, set_proj4_att=True) if packed: out_var.setncattr('scale_factor', scale) out_var.setncattr('add_offset', offset) click.echo('Copying data from input files...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True, window=window) if has_z: out_var[index, :] = data else: out_var[:] = data out.sync()
def main(mosaic, data, dest, ntl, bbox, country): os.makedirs(dest, exist_ok=True) os.makedirs(dest + '/pre-event', exist_ok=True) os.makedirs(dest + '/post-event', exist_ok=True) # create raster mosaic for rasters with same name (~ same area) print('creating mosaic of overlapping rasters') if mosaic: create_raster_mosaic(data, dest) # filter pre-event rasters print('filtering pre-event rasters') # filter by bounding box (if provided) if bbox != '': bbox_tuple = [float(x) for x in bbox.split(',')] bbox = box(bbox_tuple[0], bbox_tuple[1], bbox_tuple[2], bbox_tuple[3]) geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(4326)) coords = getFeatures(geo) print('filtering on bbox:') print(coords) # loop over images and filter for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')): raster = raster.replace('\\', '/') raster_or = raster out_name = raster.split('.')[0] + '-bbox.tif' with rasterio.open(raster) as src: print('cropping on bbox') try: out_img, out_transform = mask(dataset=src, shapes=coords, crop=True) out_meta = src.meta.copy() out_meta.update({ 'height': out_img.shape[1], 'width': out_img.shape[2], 'transform': out_transform }) print('saving', out_name) with rasterio.open(out_name, 'w', **out_meta) as dst: dst.write(out_img) except: print('empty raster, discard') os.remove(raster_or) # filter by nighttime lights # load nighttime light mask ntl_shapefile = 'input/ntl_mask_extended.shp' if ntl: # filter mask by country (if provided) if country != '': country_ntl_shapefile = ntl_shapefile.split( '.')[0] + '_' + country.lower() + '.shp' if not os.path.exists(country_ntl_shapefile): ntl_world = gpd.read_file(ntl_shapefile) ntl_world.crs = {'init': 'epsg:4326'} ntl_world = ntl_world.to_crs("EPSG:4326") world = gpd.read_file( gpd.datasets.get_path('naturalearth_lowres')) country_shape = world[world.name == country] if country_shape.empty: print('WARNING: country', country, 'not found!!!') print('available countries:') print(world.name.unique()) print('proceeding with global mask') country_ntl_shapefile = ntl_shapefile else: country_shape = country_shape.reset_index() country_shape.at[0, 'geometry'] = box( *country_shape.at[0, 'geometry'].bounds) country_shape.geometry = country_shape.geometry.scale( xfact=1.1, yfact=1.1) ntl_country = gpd.clip(ntl_world, country_shape) ntl_country.to_file(country_ntl_shapefile) with fiona.open(country_ntl_shapefile, "r") as shapefile: shapes = [feature["geometry"] for feature in shapefile] else: with fiona.open(ntl_shapefile, "r") as shapefile: shapes = [feature["geometry"] for feature in shapefile] # loop over images and filter for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')): raster = raster.replace('\\', '/') raster_or = raster out_name = raster.split('.')[0] + '-ntl.tif' if 'ntl' in raster: continue crop_next = True print('processing', raster) out_name_ntl = raster.split('.')[0] + '-ntl-mask.tif' try: with rasterio.open(raster) as src: shapes_r = [ x for x in shapes if not rasterio.coords.disjoint_bounds( src.bounds, rasterio.features.bounds(x)) ] if len(shapes_r) == 0: print('no ntl present, discard') crop_next = False else: print('ntl present, creating mask') out_image, out_transform = rasterio.mask.mask( src, shapes_r, crop=True) out_meta = src.meta out_meta.update({ "driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform }) # save temporary ntl file print('saving mask', out_name_ntl) with rasterio.open(out_name_ntl, "w", **out_meta) as dst: dst.write(out_image) crop_next = True raster = out_name_ntl if crop_next: with rasterio.open(raster) as src: print('cropping nan on', raster) window = get_data_window(src.read(1, masked=True)) kwargs = src.meta.copy() kwargs.update({ 'height': window.height, 'width': window.width, 'transform': rasterio.windows.transform(window, src.transform) }) print('saving', out_name) try: with rasterio.open(out_name, 'w', **kwargs) as dst: dst.write(src.read(window=window)) except: print('empty raster, discard') # remove temporary ntl file os.remove(raster) # remove original raster os.remove(raster_or) except: print('error loading raster, skipping')
def test_data_window_empty_result(): data = np.zeros((3, 10, 10), dtype='uint8') window = windows.get_data_window(data, nodata=0) assert window == windows.Window.from_ranges((0, 0), (0, 0))
def filter_by_ntl(country, ntl_shapefile, dest): # filter mask by country (if provided) if country != '': country_ntl_shapefile = ntl_shapefile.split( '.')[0] + '_' + country.lower() + '.shp' if not os.path.exists(country_ntl_shapefile): ntl_world = gpd.read_file(ntl_shapefile) ntl_world.crs = {'init': 'epsg:4326'} ntl_world = ntl_world.to_crs("EPSG:4326") world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) country_shape = world[world.name == country] if country_shape.empty: logger.info(f"WARNING: country {country} not found!!!") logger.info('available countries:') logger.info(world.name.unique()) logger.info('proceeding with global mask') country_ntl_shapefile = ntl_shapefile else: country_shape = country_shape.reset_index() country_shape.at[0, 'geometry'] = box( *country_shape.at[0, 'geometry'].bounds) country_shape.geometry = country_shape.geometry.scale( xfact=1.1, yfact=1.1) ntl_country = gpd.clip(ntl_world, country_shape) ntl_country.to_file(country_ntl_shapefile) with fiona.open(country_ntl_shapefile, "r") as shapefile: shapes = [feature["geometry"] for feature in shapefile] else: with fiona.open(ntl_shapefile, "r") as shapefile: shapes = [feature["geometry"] for feature in shapefile] # loop over images and filter for raster in tqdm(glob.glob(dest + '/pre-event/*.tif')): raster = raster.replace('\\', '/') raster_or = raster out_name = raster.split('.')[0] + '-ntl.tif' if 'ntl' in raster: continue crop_next = True logger.info(f'processing {raster}') out_name_ntl = raster.split('.')[0] + '-ntl-mask.tif' try: with rasterio.open(raster) as src: shapes_r = [ x for x in shapes if not rasterio.coords.disjoint_bounds( src.bounds, rasterio.features.bounds(x)) ] if len(shapes_r) == 0: logger.info('no ntl present, discard') crop_next = False else: logger.info('ntl present, creating mask') out_image, out_transform = rasterio.mask.mask(src, shapes_r, crop=True) out_meta = src.meta out_meta.update({ "driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform }) # save temporary ntl file logger.info(f'saving mask {out_name_ntl}') with rasterio.open(out_name_ntl, "w", **out_meta) as dst: dst.write(out_image) crop_next = True raster = out_name_ntl if crop_next: with rasterio.open(raster) as src: logger.info(f'cropping nan on {raster}') window = get_data_window(src.read(1, masked=True)) kwargs = src.meta.copy() kwargs.update({ 'height': window.height, 'width': window.width, 'transform': rasterio.windows.transform(window, src.transform) }) logger.info(f'saving {out_name}') try: with rasterio.open(out_name, 'w', **kwargs) as dst: dst.write(src.read(window=window)) except Exception as e: logger.info(f"Exception occurred: {e}.\nDiscarding.") # remove temporary ntl file os.remove(raster) # remove original raster os.remove(raster_or) except Exception as e: logger.info(f"Exception occurred: {e}.\nDiscarding.")
def to_netcdf( files, output, variable, dtype, src_crs, x_name, y_name, z_name, datetime_pattern, netcdf3, compress, packed, xy_dtype, # z_dtype, calendar, autocrop): """ Convert rasters to NetCDF and stack them according to a dimension. X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y otherwise) unless specified. Will overwrite an existing NetCDF file. Only the first band of the input will be turned into a NetCDF file. """ # TODO: add format string template to this to parse out components filenames = list(glob.glob(files)) if not filenames: raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES') z_values = [] if datetime_pattern is not None: datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames) # Sort both datimes and filenames by datetimes z_values, filenames = [ list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0))) ] items = tuple(enumerate(filenames)) has_z = len(filenames) > 1 if has_z and not z_name: raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z') if src_crs: src_crs = CRS.from_string(src_crs) template_ds = rasterio.open(filenames[0]) src_crs = template_ds.crs or src_crs if not src_crs: raise click.BadParameter( 'Required when no CRS information available in source files', param='--src-crs', param_hint='--src-crs') prj = Proj(**src_crs.to_dict()) bounds = template_ds.bounds width = template_ds.width height = template_ds.height window = None src_dtype = numpy.dtype(template_ds.dtypes[0]) dtype = numpy.dtype(dtype) if dtype else src_dtype if dtype == src_dtype: fill_value = template_ds.nodata if src_dtype.kind in ('u', 'i'): # nodata always comes from rasterio as floating point fill_value = int(fill_value) else: fill_value = get_fill_value(dtype) x_name = x_name or ('lon' if src_crs.is_geographic else 'x') y_name = y_name or ('lat' if src_crs.is_geographic else 'y') var_kwargs = {'fill_value': fill_value} format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' with Dataset(output, 'w', format=format) as out: if packed or autocrop: mins = [] maxs = [] windows = [] click.echo('Inspecting input datasets...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True) if packed: mins.append(data.min()) maxs.append(data.max()) if autocrop: data_window = get_data_window(data) if data_window != ((0, height), (0, width)): windows.append(data_window) if packed: min_value = min(mins) max_value = max(maxs) scale, offset = get_pack_atts(dtype, min_value, max_value) if autocrop and windows: window = union(windows) bounds = template_ds.window_bounds(window) height = window[0][1] - window[0][0] width = window[1][1] - window[1][0] coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype) coords.add_to_dataset(out, x_name, y_name, zlib=compress) var_dimensions = [y_name, x_name] shape = list(coords.shape) if has_z: shape.insert(0, len(filenames)) out.createDimension(z_name, shape[0]) var_dimensions.insert(0, z_name) if z_values: dates = DateVariable(numpy.array(z_values), units_start_date=z_values[0], calendar=calendar) dates.add_to_dataset(out, z_name) click.echo('Creating {0}:{1} with shape {2}'.format( output, variable, shape)) out_var = out.createVariable(variable, dtype, dimensions=var_dimensions, zlib=compress, **var_kwargs) set_crs(out, variable, prj, set_proj4_att=True) if packed: out_var.setncattr('scale_factor', scale) out_var.setncattr('add_offset', offset) click.echo('Copying data from input files...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True, window=window) if has_z: out_var[index, :] = data else: out_var[:] = data out.sync()
def main(cadastre_filepath, dst_tif_filepath, dst_shp_filepath, dst_res, num_patches, kernel_radius, urban_threshold, buffer_dist, dst_nodata): logger = logging.getLogger(__name__) logger.info("preparing raster agglomeration LULC from %s", cadastre_filepath) cadastre_arr, cadastre_transform = utils.rasterize_cadastre( cadastre_filepath, dst_res, dst_nodata) logger.info("rasterized cadastre vector LULC dataset to shape %s", str(cadastre_arr.shape)) # get the urban extent mask according to the criteria used in the "Atlas # of Urban Expansion, The 2016 Edition" by Angel, S. et al. uf = ufp.UrbanFootprinter(cadastre_arr, urban_classes=utils.URBAN_CLASSES, res=dst_res) urban_mask = uf.compute_footprint_mask(kernel_radius, urban_threshold, num_patches=num_patches, buffer_dist=buffer_dist) logger.info( "obtained extent of the %d largest urban cluster(s) (%d pixels)", num_patches, np.sum(urban_mask)) # exclude lake # TODO: arguments to customize `LULC_WATER_VAL` and `SIEVE_SIZE` label_arr = ndi.label(cadastre_arr == utils.LULC_WATER_VAL, ndi.generate_binary_structure(2, 2))[0] cluster_label = np.argmax(np.unique(label_arr, return_counts=True)[1][1:]) + 1 largest_cluster = np.array(label_arr == cluster_label, dtype=np.uint8) urban_mask = features.sieve( np.array(urban_mask.astype(bool) & ~largest_cluster.astype(bool), dtype=urban_mask.dtype), SIEVE_SIZE) # get window and transform of valid data points, i.e., the computed extent extent_window = windows.get_data_window(urban_mask, nodata=0) extent_transform = windows.transform(extent_window, cadastre_transform) dst_arr = np.where(urban_mask, cadastre_arr, dst_nodata)[windows.window_index(extent_window)] # dump it # ACHTUNG: use hardcoded CRS string (for the same CRS) to avoid issues with rio.open( dst_tif_filepath, 'w', driver='GTiff', width=extent_window.width, height=extent_window.height, count=1, crs=utils.CRS, # cadastre_gdf.crs transform=extent_transform, dtype=np.uint8, nodata=dst_nodata) as dst: dst.write(dst_arr, 1) logger.info("dumped rasterized dataset to %s", dst_tif_filepath) if dst_shp_filepath: # save the geometry extent # get the urban mask geometry # urban_mask_geom = uf.compute_footprint_mask_shp( # kernel_radius, # urban_threshold, # largest_patch_only=largest_patch_only, # buffer_dist=buffer_dist, # transform=extent_transform) urban_mask_geom = geometry.shape( max([(geom, val) for geom, val in features.shapes( np.array(dst_arr != dst_nodata, dtype=np.uint8), transform=extent_transform) if val == 1], key=lambda geom: len(geom[0]['coordinates']))[0]) # get the window and transform of the lake extent lake_mask = features.sieve(largest_cluster, SIEVE_SIZE) extent_window = windows.get_data_window(lake_mask, nodata=0) extent_transform = windows.transform(extent_window, cadastre_transform) lake_mask = lake_mask[windows.window_index(extent_window)] # get the lake mask geometry lake_mask_geom = geometry.shape( max([(geom, val) for geom, val in features.shapes( lake_mask, transform=extent_transform) if val == 1], key=lambda geom: len(geom[0]['coordinates']))[0]) # ACHTUNG: use hardcoded CRS string (for the same CRS) to avoid issues gpd.GeoSeries([urban_mask_geom, lake_mask_geom], crs=utils.CRS).to_file(dst_shp_filepath) logger.info("dumped extent geometry to %s", dst_shp_filepath)
def handle(self, output_directory, region_name, zoneset, *args, **kwargs): output_directory = output_directory[0] region_name = region_name[0] if zoneset is None or zoneset.strip() == "": sources = ZoneSource.objects.all().order_by("name") if len(sources) == 0: raise CommandError("No zonesets available") else: sources = ZoneSource.objects.filter( name__in=zoneset.split(",")).order_by("name") if len(sources) == 0: raise CommandError( "No zonesets available to analyze that match --zones values" ) region = Region.objects.filter(name=region_name) if not region.exists(): raise CommandError( "Region {} is not available".format(region_name)) region = region.first() ### Create output directories if not os.path.exists(output_directory): os.makedirs(output_directory) with ElevationDataset() as elevation_ds: elevation_ds.load_region(region.name) for source in sources: all_species = [ e["species"] for e in source.seedzone_set.values("species").distinct() ] for species in all_species: zones = source.seedzone_set.filter( species=species).order_by("zone_id") out_index = 0 zone_ids = [] zone_input_ids = [] out = numpy.empty(shape=elevation_ds.data.shape, dtype="uint16") out.fill(NODATA) with ZoneConfig(source.name) as config: for zone in Bar( "Processing {} - {} zones".format( source.name, species), max=source.seedzone_set.count(), ).iter(zones): source_name = zone.source window, coords = elevation_ds.get_read_window( zone.polygon.extent) transform = coords.affine elevation = elevation_ds.data[window] zone_mask = rasterize( (json.loads(zone.polygon.geojson), ), out_shape=elevation.shape, transform=transform, fill=1, # mask is True OUTSIDE the zone default_value=0, dtype=numpy.dtype("uint8"), ).astype("bool") nodata_mask = elevation == elevation_ds.nodata_value mask = nodata_mask | zone_mask # Create a 2D array for extracting to new dataset, in integer feet elevation = (numpy.where( ~mask, elevation / 0.3048, elevation_ds.nodata_value).round().astype( "int")) # if there are no pixels in the mask, skip this zone if elevation.size == 0: continue elevation_data = elevation[ elevation != elevation_ds.nodata_value] min_elevation = math.floor( numpy.nanmin(elevation_data)) max_elevation = math.ceil( numpy.nanmax(elevation_data)) bands = list( config.get_elevation_bands( zone, min_elevation, max_elevation)) bands = generate_missing_bands( bands, min_elevation, max_elevation) if not bands: # min / max elevation outside defined bands warnings.warn( "\nElevation range {} - {} ft outside defined bands\n" .format(min_elevation, max_elevation)) continue for band in bands: low, high = band[:2] band_mask = (elevation >= low) & (elevation <= high) if not numpy.any(band_mask): continue # extract actual elevation range within the mask as integer feet band_elevation = elevation.flat[ band_mask.flatten()] band_range = [ math.floor(numpy.nanmin(band_elevation)), math.ceil(numpy.nanmax(band_elevation)), ] # extract 2D version of elevation within the band value = numpy.where( (~mask) & band_mask, out_index, out[window], ) if not numpy.any(value == out_index): continue out[window] = value # zone ids are based on actual elevation range zone_ids.append("{}_{}_{}".format( zone.zone_uid, *band_range)) # zone_input is based on input elevation range zone_input_ids.append("{}_{}_{}".format( zone.zone_uid, low, high)) out_index += 1 if out_index > NODATA - 1: raise ValueError( "Too many zone / band combinations for uint16") # Find the data window of the zones data_window = (windows.get_data_window( out, NODATA).round_offsets(op="floor").round_lengths( op="ceil")) out = out[data_window.toslices()] data_coords = elevation_ds.coords.slice_by_window( Window(*data_window.toslices())) filename = os.path.join( output_directory, "{}_{}_zones.nc".format(source_name, species)) with Dataset(filename, "w", format="NETCDF4") as ds: # create ID variable ds.createDimension("zone", len(zone_ids)) id_var = ds.createVariable("zone", str, dimensions=("zone", )) id_var[:] = numpy.array(zone_ids) data_coords.add_to_dataset(ds, "longitude", "latitude") data_var = ds.createVariable( "zones", "uint16", dimensions=("latitude", "longitude"), fill_value=NODATA, ) data_var[:] = out set_crs(ds, "zones", Proj({"init": "EPSG:4326"})) with open(filename.replace(".nc", ".csv"), "w") as fp: writer = csv.writer(fp) writer.writerow(["value", "zone", "zone_input"]) writer.writerows([[i, zone_ids[i], zone_input_ids[i]] for i in range(len(zone_ids))])
def test_data_window_empty_result(): data = numpy.zeros((3, 10, 10), dtype='uint8') window = windows.get_data_window(data, nodata=0) assert window == ((0, 0), (0, 0))