def check_spatial_overlap(spatial_filepaths_list, different_projections_ok=False): """Check that the given spatial files spatially overlap. Args: spatial_filepaths_list (list): A list of files that can be opened with GDAL. Must be on the local filesystem. different_projections_ok=False (bool): Whether it's OK for the input spatial files to have different projections. If ``True``, all projections will be converted to WGS84 before overlap is checked. Returns: A string error message if an error is found. ``None`` otherwise. """ wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) wgs84_wkt = wgs84_srs.ExportToWkt() bounding_boxes = [] checked_file_list = [] for filepath in spatial_filepaths_list: try: info = pygeoprocessing.get_raster_info(filepath) except ValueError: info = pygeoprocessing.get_vector_info(filepath) if info['projection_wkt'] is None: return f'Spatial file {filepath} has no projection' if different_projections_ok: bounding_box = pygeoprocessing.transform_bounding_box( info['bounding_box'], info['projection_wkt'], wgs84_wkt) else: bounding_box = info['bounding_box'] if all([numpy.isinf(coord) for coord in bounding_box]): LOGGER.warning('Skipping infinite bounding box for file %s', filepath) continue bounding_boxes.append(bounding_box) checked_file_list.append(filepath) try: pygeoprocessing.merge_bounding_box_list(bounding_boxes, 'intersection') except ValueError as error: LOGGER.debug(error) formatted_lists = ' | '.join([ a + ': ' + str(b) for a, b in zip(checked_file_list, bounding_boxes) ]) message = f"Bounding boxes do not intersect: {formatted_lists}" return message return None
def test_clip_vector_by_vector_polygons(self): """WaveEnergy: testing clipping polygons from polygons.""" from natcap.invest import wave_energy aoi_path = os.path.join(REGRESSION_DATA, 'aoi_proj_to_extract.shp') extract_path = os.path.join(SAMPLE_DATA, 'WaveData', 'Global_extract.shp') result_path = os.path.join(self.workspace_dir, 'aoi_proj_clipped.shp') target_projection = pygeoprocessing.get_vector_info( extract_path)['projection_wkt'] wave_energy._clip_vector_by_vector(aoi_path, extract_path, result_path, target_projection, self.workspace_dir) expected_path = os.path.join(REGRESSION_DATA, 'aoi_proj_clipped.shp') WaveEnergyRegressionTests._assert_point_vectors_equal( result_path, expected_path)
def _clip_and_mask_dem(dem_path, aoi_path, target_path, working_dir): """Clip and mask the DEM to the AOI. Args: dem_path (string): The path to the DEM to use. Must have the same projection as the AOI. aoi_path (string): The path to the AOI to use. Must have the same projection as the DEM. target_path (string): The path on disk to where the clipped and masked raster will be saved. If a file exists at this location it will be overwritten. The raster will have a bounding box matching the intersection of the AOI and the DEM's bounding box and a spatial reference matching the AOI and the DEM. working_dir (string): A path to a directory on disk. A new temporary directory will be created within this directory for the storage of several working files. This temporary directory will be removed at the end of this function. Returns: ``None`` """ temp_dir = tempfile.mkdtemp(dir=working_dir, prefix='clip_dem') LOGGER.info('Clipping the DEM to its intersection with the AOI.') aoi_vector_info = pygeoprocessing.get_vector_info(aoi_path) dem_raster_info = pygeoprocessing.get_raster_info(dem_path) mean_pixel_size = ( abs(dem_raster_info['pixel_size'][0]) + abs(dem_raster_info['pixel_size'][1])) / 2.0 pixel_size = (mean_pixel_size, -mean_pixel_size) intersection_bbox = [op(aoi_dim, dem_dim) for (aoi_dim, dem_dim, op) in zip(aoi_vector_info['bounding_box'], dem_raster_info['bounding_box'], [max, max, min, min])] clipped_dem_path = os.path.join(temp_dir, 'clipped_dem.tif') pygeoprocessing.warp_raster( dem_path, pixel_size, clipped_dem_path, 'near', target_bb=intersection_bbox) LOGGER.info('Masking DEM pixels outside the AOI to nodata') aoi_mask_raster_path = os.path.join(temp_dir, 'aoi_mask.tif') pygeoprocessing.new_raster_from_base( clipped_dem_path, aoi_mask_raster_path, gdal.GDT_Byte, [_BYTE_NODATA], [0], raster_driver_creation_tuple=BYTE_GTIFF_CREATION_OPTIONS) pygeoprocessing.rasterize(aoi_path, aoi_mask_raster_path, [1], None) dem_nodata = dem_raster_info['nodata'][0] def _mask_op(dem, aoi_mask): valid_pixels = (~utils.array_equals_nodata(dem, dem_nodata) & (aoi_mask == 1)) masked_dem = numpy.empty(dem.shape) masked_dem[:] = dem_nodata masked_dem[valid_pixels] = dem[valid_pixels] return masked_dem pygeoprocessing.raster_calculator( [(clipped_dem_path, 1), (aoi_mask_raster_path, 1)], _mask_op, target_path, gdal.GDT_Float32, dem_nodata, raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS) shutil.rmtree(temp_dir, ignore_errors=True)
def _calculate_args_bounding_box(args, args_spec): """Calculate the bounding boxes of any GIS types found in `args_dict`. Args: args (dict): a string key and any value pair dictionary. args_spec (dict): the model ARGS_SPEC describing args Returns: bb_intersection, bb_union tuple that's either the lat/lng bounding intersection and union bounding boxes of the gis types referred to in args_dict. If no GIS types are present, this is a (None, None) tuple. """ def _merge_bounding_boxes(bb1, bb2, mode): """Merge two bounding boxes through union or intersection. Args: bb1 (list of float): bounding box of the form [minx, maxy, maxx, miny] or None bb2 (list of float): bounding box of the form [minx, maxy, maxx, miny] or None mode (string): either "union" or "intersection" indicating the how to combine the two bounding boxes. Returns: either the intersection or union of bb1 and bb2 depending on mode. If either bb1 or bb2 is None, the other is returned. If both are None, None is returned. """ if bb1 is None: return bb2 if bb2 is None: return bb1 if mode == "union": comparison_ops = [min, max, max, min] if mode == "intersection": comparison_ops = [max, min, min, max] bb_out = [op(x, y) for op, x, y in zip(comparison_ops, bb1, bb2)] return bb_out bb_intersection = None bb_union = None for key, value in args.items(): # Using gdal.OpenEx to check if an input is spatial caused the # model to hang sometimes (possible race condition), so only # get the bounding box of inputs that are known to be spatial. # Also eliminate any string paths that are empty to prevent an # exception. By the time we've made it to this function, all paths # should already have been validated so the path is either valid or # blank. spatial_info = None if args_spec['args'][key]['type'] == 'raster' and value.strip() != '': spatial_info = pygeoprocessing.get_raster_info(value) elif (args_spec['args'][key]['type'] == 'vector' and value.strip() != ''): spatial_info = pygeoprocessing.get_vector_info(value) if spatial_info: local_bb = spatial_info['bounding_box'] projection_wkt = spatial_info['projection_wkt'] spatial_ref = osr.SpatialReference() spatial_ref.ImportFromWkt(projection_wkt) try: # means there's a GIS type with a well defined bounding box # create transform, and reproject local bounding box to # lat/lng lat_lng_ref = osr.SpatialReference() lat_lng_ref.ImportFromEPSG(4326) # EPSG 4326 is lat/lng to_lat_trans = utils.create_coordinate_transformer( spatial_ref, lat_lng_ref) for point_index in [0, 2]: local_bb[point_index], local_bb[point_index + 1], _ = ( to_lat_trans.TransformPoint(local_bb[point_index], local_bb[point_index + 1])) bb_intersection = _merge_bounding_boxes( local_bb, bb_intersection, 'intersection') bb_union = _merge_bounding_boxes(local_bb, bb_union, 'union') except Exception as transform_error: # All kinds of exceptions from bad transforms or CSV files # or dbf files could get us to this point, just don't # bother with the local_bb at all LOGGER.exception('Error when transforming coordinates: %s', transform_error) else: LOGGER.debug(f'Arg {key} of type {args_spec["args"][key]["type"]} ' 'excluded from bounding box calculation') return bb_intersection, bb_union
def _add_zonal_stats(runoff_retention_pickle_path, runoff_retention_ret_vol_pickle_path, flood_vol_pickle_path, base_watershed_result_vector_path, target_watershed_result_vector_path): """Add watershed scale values of the given base_raster. Parameters: runoff_retention_pickle_path (str): path to runoff retention zonal stats pickle file. runoff_retention_ret_vol_pickle_path (str): path to runoff retention volume zonal stats pickle file. flood_vol_pickle_path (str): path to flood volume zonal stats pickle file. base_watershed_result_vector_path (str): path to existing vector to copy for the target vector. target_watershed_result_vector_path (str): path to target vector that will contain the additional fields: * rnf_rt_idx * rnf_rt_m3 * serv_bld Return: None. """ LOGGER.info("Processing zonal stats for %s", target_watershed_result_vector_path) with open(runoff_retention_pickle_path, 'rb') as runoff_retention_file: runoff_retention_stats = pickle.load(runoff_retention_file) with open(runoff_retention_ret_vol_pickle_path, 'rb') as (runoff_retention_ret_vol_file): runoff_retention_vol_stats = pickle.load(runoff_retention_ret_vol_file) with open(flood_vol_pickle_path, 'rb') as flood_vol_pickle_file: flood_vol_stats = pickle.load(flood_vol_pickle_file) base_sr_wkt = pygeoprocessing.get_vector_info( base_watershed_result_vector_path)['projection'] base_watershed_vector = gdal.OpenEx(base_watershed_result_vector_path, gdal.OF_VECTOR) base_watershed_layer = base_watershed_vector.GetLayer() base_geom_type = base_watershed_layer.GetGeomType() base_sr = osr.SpatialReference() base_sr.ImportFromWkt(base_sr_wkt) if os.path.exists(target_watershed_result_vector_path): LOGGER.warn("deleting existing target result at %s", target_watershed_result_vector_path) os.remove(target_watershed_result_vector_path) esri_driver = gdal.GetDriverByName('ESRI Shapefile') target_watershed_vector = esri_driver.Create( target_watershed_result_vector_path, 0, 0, 0, gdal.GDT_Unknown) layer_name = str( os.path.splitext( os.path.basename(target_watershed_result_vector_path))[0]) LOGGER.debug("creating layer %s", layer_name) target_watershed_layer = target_watershed_vector.CreateLayer( str(layer_name), base_sr, base_geom_type) for field_name in ['aff_bld', 'rnf_rt_idx', 'rnf_rt_m3', 'serv_bld']: field_def = ogr.FieldDefn(field_name, ogr.OFTReal) field_def.SetWidth(24) field_def.SetPrecision(11) target_watershed_layer.CreateField(field_def) target_layer_defn = target_watershed_layer.GetLayerDefn() for base_feature in base_watershed_layer: feature_id = base_feature.GetFID() target_feature = ogr.Feature(target_layer_defn) base_geom_ref = base_feature.GetGeometryRef() target_feature.SetGeometry(base_geom_ref.Clone()) base_geom_ref = None if feature_id in runoff_retention_stats: pixel_count = runoff_retention_stats[feature_id]['count'] if pixel_count > 0: mean_value = (runoff_retention_stats[feature_id]['sum'] / float(pixel_count)) target_feature.SetField('rnf_rt_idx', float(mean_value)) if feature_id in runoff_retention_vol_stats: target_feature.SetField( 'rnf_rt_m3', float(runoff_retention_vol_stats[feature_id]['sum'])) if feature_id in flood_vol_stats: pixel_count = flood_vol_stats[feature_id]['count'] if pixel_count > 0: affected_build = base_feature.GetField('aff_bld') target_feature.SetField('aff_bld', affected_build) target_feature.SetField( 'serv_bld', affected_build * float(runoff_retention_vol_stats[feature_id]['sum'])) target_watershed_layer.CreateFeature(target_feature) target_watershed_layer.SyncToDisk() target_watershed_layer = None target_watershed_vector = None
def main(): """Entry point.""" #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: # try: # os.makedirs(dir_path) # except OSError: # pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_wb_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL)) download_raster_task = task_graph.add_task( func=ecoshard.download_url, args=(RASTER_URL, raster_path), target_path_list=[raster_path], task_name='download raster') #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) #world_borders_layer = world_borders_vector.GetLayer() #wgs84_srs = osr.SpatialReference() #wgs84_srs.ImportFromEPSG(4326) # mask out everything that's not a country masked_raster_path = os.path.join( WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext( os.path.basename(raster_path))) # we need to define this because otherwise no nodata value is defined mask_nodata = -1 mask_task = task_graph.add_task( func=pygeoprocessing.mask_raster, args=( (raster_path, 1), world_borders_path, masked_raster_path), kwargs={ 'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS, 'target_mask_value': mask_nodata, }, target_path_list=[masked_raster_path], dependent_task_list=[download_wb_task, download_raster_task], task_name='mask raster') download_raster_task.join() raster_info = pygeoprocessing.get_raster_info(raster_path) country_name = "Global" country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') target_percentile_pickle_path = os.path.join( WORKSPACE_DIR, '%s.pkl' % ( os.path.basename(os.path.splitext(raster_path)[0]))) calculate_percentiles_task = task_graph.add_task( func=calculate_percentiles, args=( raster_path, PERCENTILE_LIST, target_percentile_pickle_path), target_path_list=[target_percentile_pickle_path], dependent_task_list=[mask_task], task_name='calculate percentiles') calculate_percentiles_task.join() with open(target_percentile_pickle_path, 'rb') as pickle_file: percentile_values = pickle.load(pickle_file) LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) raster_info = pygeoprocessing.get_raster_info(raster_path) nodata = raster_info['nodata'][0] valid_pixel_count = 0 total_pixel_count = 0 total_pixels = ( raster_info['raster_size'][0] * raster_info['raster_size'][1]) for _, data_block in pygeoprocessing.iterblocks( (raster_path, 1), largest_block=2**28): nodata_mask = ~numpy.isclose(data_block, nodata) nonzero_count = numpy.count_nonzero(nodata_mask) if nonzero_count == 0: continue valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum((data_block[ nodata_mask & (data_block >= percentile_value)]).astype( numpy.float32)) total_pixel_count += data_block.size LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels) LOGGER.debug('current cdf array: %s', cdf_array) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close() return for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('nev_name') country_name= country_name.replace('.','') LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] valid_pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
def _merge_local_bounding_boxes(arg, bb_intersection=None, bb_union=None): """Traverse nested dictionary to merge bounding boxes of GIS types. Args: arg (dict): contains string keys and pairs that might be files to gis types. They can be any other type, including dictionaries. bb_intersection (list or None): if list, has the form [xmin, ymin, xmax, ymax], where coordinates are in lng, lat bb_union (list or None): if list, has the form [xmin, ymin, xmax, ymax], where coordinates are in lng, lat Returns: (intersection, union) bounding box tuples of all filepaths to GIS data types found in the dictionary and bb_intersection and bb_union inputs. None, None if no arguments were GIS data types and input bounding boxes are None. """ def _is_spatial(arg): if isinstance(arg, str) and os.path.exists(arg): with utils.capture_gdal_logging(): dataset = gdal.OpenEx(arg) if dataset is not None: # OGR opens CSV files. For now, we should not # consider these to be vectors. driver_name = dataset.GetDriver().ShortName if driver_name == 'CSV': return False return True return False if isinstance(arg, dict): # if dict, grab the bb's for all the members in it for value in arg.values(): bb_intersection, bb_union = _merge_local_bounding_boxes( value, bb_intersection, bb_union) elif isinstance(arg, list): # if list, grab the bb's for all the members in it for value in arg: bb_intersection, bb_union = _merge_local_bounding_boxes( value, bb_intersection, bb_union) else: # singular value, test if GIS type, if not, don't update bb's # this is an undefined bounding box that gets returned when ogr # opens a table only if _is_spatial(arg): with utils.capture_gdal_logging(): if gdal.OpenEx(arg, gdal.OF_RASTER) is not None: spatial_info = pygeoprocessing.get_raster_info(arg) else: # If it isn't a raster, it should be a vector! spatial_info = pygeoprocessing.get_vector_info(arg) local_bb = [0., 0., 0., 0.] local_bb = spatial_info['bounding_box'] projection_wkt = spatial_info['projection'] spatial_ref = osr.SpatialReference() spatial_ref.ImportFromWkt(projection_wkt) try: # means there's a GIS type with a well defined bounding box # create transform, and reproject local bounding box to # lat/lng lat_lng_ref = osr.SpatialReference() lat_lng_ref.ImportFromEPSG(4326) # EPSG 4326 is lat/lng to_lat_trans = osr.CoordinateTransformation( spatial_ref, lat_lng_ref) for point_index in [0, 2]: local_bb[point_index], local_bb[point_index + 1], _ = ( to_lat_trans.TransformPoint( local_bb[point_index], local_bb[point_index + 1])) bb_intersection = _merge_bounding_boxes( local_bb, bb_intersection, 'intersection') bb_union = _merge_bounding_boxes(local_bb, bb_union, 'union') except Exception as transform_error: # All kinds of exceptions from bad transforms or CSV files # or dbf files could get us to this point, just don't # bother with the local_bb at all LOGGER.exception('Error when transforming coordinates: %s', transform_error) return bb_intersection, bb_union
def execute(args): """Forest Carbon Edge Effect. InVEST Carbon Edge Model calculates the carbon due to edge effects in tropical forest pixels. Args: args['workspace_dir'] (string): a path to the directory that will write output and other temporary files during calculation. (required) args['results_suffix'] (string): a string to append to any output file name (optional) args['n_nearest_model_points'] (int): number of nearest neighbor model points to search for args['aoi_vector_path'] (string): (optional) if present, a path to a shapefile that will be used to aggregate carbon stock results at the end of the run. args['biophysical_table_path'] (string): a path to a CSV table that has at least the fields 'lucode' and 'c_above'. If ``args['compute_forest_edge_effects'] == True``, table must also contain an 'is_tropical_forest' field. If ``args['pools_to_calculate'] == 'all'``, this table must contain the fields 'c_below', 'c_dead', and 'c_soil'. * ``lucode``: an integer that corresponds to landcover codes in the raster ``args['lulc_raster_path']`` * ``is_tropical_forest``: either 0 or 1 indicating whether the landcover type is forest (1) or not (0). If 1, the value in ``c_above`` is ignored and instead calculated from the edge regression model. * ``c_above``: floating point number indicating tons of above ground carbon per hectare for that landcover type * ``{'c_below', 'c_dead', 'c_soil'}``: three other optional carbon pools that will statically map landcover types to the carbon densities in the table. Example:: lucode,is_tropical_forest,c_above,c_soil,c_dead,c_below 0,0,32.8,5,5.2,2.1 1,1,n/a,2.5,0.0,0.0 2,1,n/a,1.8,1.0,0.0 16,0,28.1,4.3,0.0,2.0 Note the "n/a" in ``c_above`` are optional since that field is ignored when ``is_tropical_forest==1``. args['lulc_raster_path'] (string): path to a integer landcover code raster args['pools_to_calculate'] (string): if "all" then all carbon pools will be calculted. If any other value only above ground carbon pools will be calculated and expect only a 'c_above' header in the biophysical table. If "all" model expects 'c_above', 'c_below', 'c_dead', 'c_soil' in header of biophysical_table and will make a translated carbon map for each based off the landcover map. args['compute_forest_edge_effects'] (boolean): if True, requires biophysical table to have 'is_tropical_forest' forest field, and any landcover codes that have a 1 in this column calculate carbon stocks using the Chaplin-Kramer et. al method and ignore 'c_above'. args['tropical_forest_edge_carbon_model_vector_path'] (string): path to a shapefile that defines the regions for the local carbon edge models. Has at least the fields 'method', 'theta1', 'theta2', 'theta3'. Where 'method' is an int between 1..3 describing the biomass regression model, and the thetas are floating point numbers that have different meanings depending on the 'method' parameter. Specifically, * method 1 (asymptotic model):: biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km) * method 2 (logarithmic model):: # NOTE: theta3 is ignored for this method biomass = theta1 + theta2 * numpy.log(edge_dist_km) * method 3 (linear regression):: biomass = theta1 + theta2 * edge_dist_km args['biomass_to_carbon_conversion_factor'] (string/float): Number by which to multiply forest biomass to convert to carbon in the edge effect calculation. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None """ # just check that the AOI exists since it wouldn't crash until the end of # the whole model run if it didn't. if 'aoi_vector_path' in args and args['aoi_vector_path'] != '': aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR) if not aoi_vector: raise ValueError("Unable to open aoi at: %s" % args['aoi_vector_path']) else: aoi_vector = None lulc_raster_bb = pygeoprocessing.get_raster_info( args['lulc_raster_path'])['bounding_box'] aoi_vector_bb = pygeoprocessing.get_vector_info( args['aoi_vector_path'])['bounding_box'] try: merged_bb = pygeoprocessing.merge_bounding_box_list( [lulc_raster_bb, aoi_vector_bb], 'intersection') LOGGER.debug("merged bounding boxes: %s", merged_bb) except ValueError: raise ValueError( "The landcover raster %s and AOI %s do not touch each " "other." % (args['lulc_raster_path'], args['aoi_vector_path'])) output_dir = args['workspace_dir'] intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') utils.make_directories([output_dir, intermediate_dir]) file_suffix = utils.make_suffix_string(args, 'results_suffix') # Initialize a TaskGraph taskgraph_working_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) # used to keep track of files generated by this module output_file_registry = { 'c_above_map': os.path.join(intermediate_dir, 'c_above_carbon_stocks%s.tif' % file_suffix), 'carbon_map': os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix), 'aggregated_result_vector': os.path.join(output_dir, 'aggregated_carbon_stocks%s.shp' % file_suffix) } if args['pools_to_calculate'] == 'all': output_file_registry['c_below_map'] = os.path.join( intermediate_dir, 'c_below_carbon_stocks%s.tif' % file_suffix) output_file_registry['c_soil_map'] = os.path.join( intermediate_dir, 'c_soil_carbon_stocks%s.tif' % file_suffix) output_file_registry['c_dead_map'] = os.path.join( intermediate_dir, 'c_dead_carbon_stocks%s.tif' % file_suffix) if args['compute_forest_edge_effects']: output_file_registry['spatial_index_pickle'] = os.path.join( intermediate_dir, 'spatial_index%s.pickle' % file_suffix) output_file_registry['edge_distance'] = os.path.join( intermediate_dir, 'edge_distance%s.tif' % file_suffix) output_file_registry['tropical_forest_edge_carbon_map'] = os.path.join( intermediate_dir, 'tropical_forest_edge_carbon_stocks%s.tif' % file_suffix) output_file_registry['non_forest_mask'] = os.path.join( intermediate_dir, 'non_forest_mask%s.tif' % file_suffix) # Map non-forest landcover codes to carbon biomasses LOGGER.info('Calculating direct mapped carbon stocks') carbon_maps = [] biophysical_table = utils.build_lookup_from_csv( args['biophysical_table_path'], 'lucode', to_lower=False) biophysical_keys = [ x.lower() for x in list(biophysical_table.values())[0].keys() ] pool_list = [('c_above', True)] if args['pools_to_calculate'] == 'all': pool_list.extend([('c_below', False), ('c_soil', False), ('c_dead', False)]) for carbon_pool_type, ignore_tropical_type in pool_list: if carbon_pool_type in biophysical_keys: carbon_maps.append(output_file_registry[carbon_pool_type + '_map']) task_graph.add_task( func=_calculate_lulc_carbon_map, args=(args['lulc_raster_path'], args['biophysical_table_path'], carbon_pool_type, ignore_tropical_type, args['compute_forest_edge_effects'], carbon_maps[-1]), target_path_list=[carbon_maps[-1]], task_name='calculate_lulc_%s_map' % carbon_pool_type) if args['compute_forest_edge_effects']: # generate a map of pixel distance to forest edge from the landcover # map LOGGER.info('Calculating distance from forest edge') map_distance_task = task_graph.add_task( func=_map_distance_from_tropical_forest_edge, args=(args['lulc_raster_path'], args['biophysical_table_path'], output_file_registry['edge_distance'], output_file_registry['non_forest_mask']), target_path_list=[ output_file_registry['edge_distance'], output_file_registry['non_forest_mask'] ], task_name='map_distance_from_forest_edge') # Build spatial index for gridded global model for closest 3 points LOGGER.info('Building spatial index for forest edge models.') build_spatial_index_task = task_graph.add_task( func=_build_spatial_index, args=(args['lulc_raster_path'], intermediate_dir, args['tropical_forest_edge_carbon_model_vector_path'], output_file_registry['spatial_index_pickle']), target_path_list=[output_file_registry['spatial_index_pickle']], task_name='build_spatial_index') # calculate the carbon edge effect on forests LOGGER.info('Calculating forest edge carbon') task_graph.add_task( func=_calculate_tropical_forest_edge_carbon_map, args=(output_file_registry['edge_distance'], output_file_registry['spatial_index_pickle'], int(args['n_nearest_model_points']), float(args['biomass_to_carbon_conversion_factor']), output_file_registry['tropical_forest_edge_carbon_map']), target_path_list=[ output_file_registry['tropical_forest_edge_carbon_map'] ], task_name='calculate_forest_edge_carbon_map', dependent_task_list=[map_distance_task, build_spatial_index_task]) # This is also a carbon stock carbon_maps.append( output_file_registry['tropical_forest_edge_carbon_map']) # combine maps into a single output LOGGER.info('combining carbon maps into single raster') carbon_maps_band_list = [(path, 1) for path in carbon_maps] # Join here since the raster calculation depends on the target datasets # from all the tasks above task_graph.join() combine_carbon_maps_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=(carbon_maps_band_list, combine_carbon_maps, output_file_registry['carbon_map'], gdal.GDT_Float32, NODATA_VALUE), target_path_list=[output_file_registry['carbon_map']], task_name='combine_carbon_maps') # generate report (optional) by aoi if they exist if 'aoi_vector_path' in args and args['aoi_vector_path'] != '': LOGGER.info('aggregating carbon map by aoi') task_graph.add_task( func=_aggregate_carbon_map, args=(args['aoi_vector_path'], output_file_registry['carbon_map'], output_file_registry['aggregated_result_vector']), target_path_list=[ output_file_registry['aggregated_result_vector'] ], task_name='combine_carbon_maps', dependent_task_list=[combine_carbon_maps_task]) # close taskgraph task_graph.close() task_graph.join()
def alternative_index_workflow(workspace_dir, raster_input_dict, aoi_path, index_path, polygon_input_list=None): """Compute the alternative index from raw inputs. All inputs, including AOI, must be share coordinate reference system and must have roughly equivalent extents. Recommend that inputs are clipped and projected in Arc prior to running this script. Args: workspace_dir (string): path to workspace where intermediate results should be created/stored raster_input_dict (dict): a nested python dictionary containing info about raster-based inputs that should be combined. The keys in the index should be the labels for each input; values in the dictionary should be dictionaries containing the keys 'path' (path to the raster input) and 'weight' (weighting value that is applied to the normalized values in this input relative to others). EACH INDEX IS INTERPRETED AS HIGH VALUE = GOOD. aoi_path (string): path to boundary of the study area index_path (string): path to location where the index should be saved polygon_input_list (list): list of paths to polygon inputs that should be included. Each of these is assigned a weight of 1. Side effects: creates or modifies a raster at the location ``index_path`` Returns: None """ # ensure that each new input shares spatial reference vector_info = pygeoprocessing.get_vector_info(aoi_path) destination_proj = osr.SpatialReference() destination_proj.ImportFromWkt(vector_info['projection_wkt']) problem_list = [] for new_input in raster_input_dict: new_proj = osr.SpatialReference() new_proj.ImportFromWkt( pygeoprocessing.get_raster_info( raster_input_dict[new_input]['path'])['projection_wkt']) if (new_proj.IsSame(destination_proj) == 0): problem_list.append(new_input) if problem_list: raise ValueError( "Project these to match the AOI: {}".format(problem_list)) intermediate_dir = os.path.join(workspace_dir, 'intermediate') if not os.path.exists(intermediate_dir): os.makedirs(intermediate_dir) normalized_dir = os.path.join(intermediate_dir, 'normalized') if not os.path.exists(normalized_dir): os.makedirs(normalized_dir) aligned_dir = os.path.join(intermediate_dir, 'aligned') if not os.path.exists(aligned_dir): os.makedirs(aligned_dir) # normalize all raster-based inputs within AOI base_raster_path_list = [] aligned_raster_path_list = [] for new_input in raster_input_dict: value_raster_path = raster_input_dict[new_input]['path'] try: weight = raster_input_dict[new_input]['weight'] except KeyError: weight = 1 bn = os.path.basename(value_raster_path) normalized_path = os.path.join(normalized_dir, bn) aligned_path = os.path.join(aligned_dir, bn) base_raster_path_list.append(normalized_path) aligned_raster_path_list.append(aligned_path) if not os.path.exists(normalized_path): with tempfile.NamedTemporaryFile( prefix='mask_raster', delete=False, suffix='.tif', dir=normalized_dir) as clipped_raster_file: clipped_raster_path = clipped_raster_file.name pygeoprocessing.mask_raster((value_raster_path, 1), aoi_path, clipped_raster_path) normalize(clipped_raster_path, normalized_path, aoi_path, weight) os.remove(clipped_raster_path) # align and resample normalized rasters, using minimum pixel size of inputs pixel_size_list = [] for new_input in raster_input_dict: value_raster_path = raster_input_dict[new_input]['path'] raster_info = pygeoprocessing.get_raster_info(value_raster_path) pixel_size_list.append(raster_info['pixel_size']) target_pixel_size = min(pixel_size_list) min_pixel_index = pixel_size_list.index(min(pixel_size_list)) if not all([os.path.exists(f) for f in aligned_raster_path_list]): pygeoprocessing.align_and_resize_raster_stack( base_raster_path_list, aligned_raster_path_list, ['near'] * len(base_raster_path_list), target_pixel_size, 'intersection', raster_align_index=min_pixel_index) # rasterize polygon inputs template_raster_path = aligned_raster_path_list[0] if polygon_input_list: for vec_path in polygon_input_list: target_raster_path = os.path.join( aligned_dir, '{}.tif'.format(os.path.basename(vec_path)[:-4])) aligned_raster_path_list.append(target_raster_path) if not os.path.exists(target_raster_path): pygeoprocessing.new_raster_from_base( template_raster_path, target_raster_path, gdal.GDT_Int16, [_TARGET_NODATA], fill_value_list=[_TARGET_NODATA]) pygeoprocessing.rasterize(vec_path, target_raster_path, burn_values=[100]) # add together raster_list_sum(aligned_raster_path_list, _TARGET_NODATA, index_path, _TARGET_NODATA, nodata_remove=True)
def _write_summary_vector(source_aoi_vector_path, target_vector_path, runoff_ret_stats, runoff_ret_vol_stats, flood_volume_stats, damage_per_aoi_stats=None): """Write a vector with summary statistics. This vector will always contain two fields:: * ``'flood_vol'``: The volume of flood (runoff), in m3, per watershed. * ``'rnf_rt_idx'``: Average of runoff retention values per watershed * ``'rnf_rt_m3'``: Sum of runoff retention volumes, in m3, per watershed. If ``damage_per_aoi_stats`` is provided, then these additional columns will be written to the vector:: * ``'aff_bld'``: Potential damage to built infrastructure in currency units, per watershed. * ``'serv_blt'``: Spatial indicator of the importance of the runoff retention service Args: source_aoi_vector_path (str): The path to a GDAL vector that exists on disk. target_vector_path (str): The path to a vector that will be created. If a file already exists at this path, it will be deleted before the new file is created. This filepath must end with the extension ``.shp``, as the file created will be an ESRI Shapefile. runoff_ret_stats (dict): A dict representing summary statistics of the runoff raster. If provided, it must be a dictionary mapping feature IDs from ``source_aoi_vector_path`` to dicts with ``'count'`` and ``'sum'`` keys. runoff_ret_vol_stats (dict): A dict representing summary statistics of the runoff volume raster. If provided, it must be a dictionary mapping feature IDs from ``source_aoi_vector_path`` to dicts with ``'count'`` and ``'sum'`` keys. flood_volume_stats(dict): A dict mapping feature IDs from ``source_aoi_vector_path`` to float values representing the flood volume over the AOI. damage_per_aoi_stats (dict): A dict mapping feature IDs from ``source_aoi_vector_path`` to float values representing the total damage to built infrastructure in that watershed. Returns: ``None`` """ source_aoi_vector = gdal.OpenEx(source_aoi_vector_path, gdal.OF_VECTOR) source_aoi_layer = source_aoi_vector.GetLayer() source_geom_type = source_aoi_layer.GetGeomType() source_srs_wkt = pygeoprocessing.get_vector_info( source_aoi_vector_path)['projection_wkt'] source_srs = osr.SpatialReference() source_srs.ImportFromWkt(source_srs_wkt) esri_driver = gdal.GetDriverByName('ESRI Shapefile') target_watershed_vector = esri_driver.Create(target_vector_path, 0, 0, 0, gdal.GDT_Unknown) layer_name = os.path.splitext(os.path.basename(target_vector_path))[0] LOGGER.debug("creating layer %s", layer_name) target_watershed_layer = target_watershed_vector.CreateLayer( layer_name, source_srs, source_geom_type) target_fields = ['rnf_rt_idx', 'rnf_rt_m3', 'flood_vol'] if not damage_per_aoi_stats: damage_per_aoi_stats = {} else: target_fields += ['aff_bld', 'serv_blt'] for field_name in target_fields: field_def = ogr.FieldDefn(field_name, ogr.OFTReal) field_def.SetWidth(36) field_def.SetPrecision(11) target_watershed_layer.CreateField(field_def) target_layer_defn = target_watershed_layer.GetLayerDefn() for base_feature in source_aoi_layer: feature_id = base_feature.GetFID() target_feature = ogr.Feature(target_layer_defn) base_geom_ref = base_feature.GetGeometryRef() target_feature.SetGeometry(base_geom_ref.Clone()) base_geom_ref = None if feature_id in runoff_ret_stats: pixel_count = runoff_ret_stats[feature_id]['count'] if pixel_count > 0: mean_value = (runoff_ret_stats[feature_id]['sum'] / float(pixel_count)) target_feature.SetField('rnf_rt_idx', float(mean_value)) if feature_id in runoff_ret_vol_stats: target_feature.SetField( 'rnf_rt_m3', float(runoff_ret_vol_stats[feature_id]['sum'])) if feature_id in damage_per_aoi_stats: pixel_count = runoff_ret_vol_stats[feature_id]['count'] if pixel_count > 0: damage_sum = damage_per_aoi_stats[feature_id] target_feature.SetField('aff_bld', damage_sum) # This is the service_built equation. target_feature.SetField( 'serv_blt', (damage_sum * runoff_ret_vol_stats[feature_id]['sum'])) if feature_id in flood_volume_stats: target_feature.SetField( 'flood_vol', float(flood_volume_stats[feature_id]['sum'])) target_watershed_layer.CreateFeature(target_feature) target_watershed_layer.SyncToDisk() target_watershed_layer = None target_watershed_vector = None
def main(): """Entry point.""" for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') download_task.join() world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) world_borders_layer = world_borders_vector.GetLayer() wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) raster_info = pygeoprocessing.get_raster_info(RASTER_PATH) country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('NAME') if country_name != 'Canada': continue LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
"""Demo some clipping.""" import logging import pygeoprocessing logging.basicConfig( level=logging.DEBUG, format=( '%(asctime)s (%(relativeCreated)d) %(processName)s %(levelname)s ' '%(name)s [%(funcName)s:%(lineno)d] %(message)s')) LOGGER = logging.getLogger(__name__) if __name__ == '__main__': raster_path = '../session2/DEM_md5_53d4998eec75d803a318fafd28c40a3e.tif' aoi_vector_path = './session2/aoi.gpkg' raster_info = pygeoprocessing.get_raster_info(raster_path) vector_info = pygeoprocessing.get_vector_info(aoi_vector_path) raster_projected_bounding_box = pygeoprocessing.transform_bounding_box( vector_info['bounding_box'], vector_info['projection_wkt'], raster_info['projection_wkt']) target_clipped_raster_path = 'DEM_clip.tif' pygeoprocessing.warp_raster( raster_path, raster_info['pixel_size'], target_clipped_raster_path, 'near', target_bb=raster_projected_bounding_box)
def _mask_raster_by_vector( base_raster_path_band, vector_path, working_dir, target_raster_path): """Mask pixels outside of the vector to nodata. Parameters: base_raster_path (string): path/band tuple to raster to process vector_path (string): path to single layer raster that is used to indicate areas to preserve from the base raster. Areas outside of this vector are set to nodata. working_dir (str): path to temporary directory. target_raster_path (string): path to a single band raster that will be created of the same dimensions and data type as `base_raster_path_band` where any pixels that lie outside of `vector_path` coverage will be set to nodata. Returns: None. """ # Warp input raster to be same bounding box as AOI if smaller. base_raster_info = pygeoprocessing.get_raster_info( base_raster_path_band[0]) nodata = base_raster_info['nodata'][base_raster_path_band[1]-1] target_pixel_size = base_raster_info['pixel_size'] vector_info = pygeoprocessing.get_vector_info(vector_path) target_bounding_box = pygeoprocessing.merge_bounding_box_list( [base_raster_info['bounding_box'], vector_info['bounding_box']], 'intersection') pygeoprocessing.warp_raster( base_raster_path_band[0], target_pixel_size, target_raster_path, 'near', target_bb=target_bounding_box) # Create mask raster same size as the warped raster. tmp_dir = tempfile.mkdtemp(dir=working_dir) mask_raster_path = os.path.join(tmp_dir, 'mask.tif') pygeoprocessing.new_raster_from_base( target_raster_path, mask_raster_path, gdal.GDT_Byte, [0], fill_value_list=[0]) # Rasterize the vector onto the mask raster pygeoprocessing.rasterize(vector_path, mask_raster_path, [1], None) # Parallel iterate over warped raster and mask raster to mask out original. target_raster = gdal.OpenEx( target_raster_path, gdal.GA_Update | gdal.OF_RASTER) target_band = target_raster.GetRasterBand(1) mask_raster = gdal.OpenEx(mask_raster_path, gdal.OF_RASTER) mask_band = mask_raster.GetRasterBand(1) for offset_dict in pygeoprocessing.iterblocks( (mask_raster_path, 1), offset_only=True): data_array = target_band.ReadAsArray(**offset_dict) mask_array = mask_band.ReadAsArray(**offset_dict) data_array[mask_array != 1] = nodata target_band.WriteArray( data_array, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) target_band.FlushCache() target_band = None target_raster = None mask_band = None mask_raster = None try: shutil.rmtree(tmp_dir) except OSError: LOGGER.warn("Unable to delete temporary file %s", mask_raster_path)
from natcap.invest import coastal_vulnerability as cv import pygeoprocessing import logging import time import shutil from osgeo import gdal, ogr LOGGER = logging.getLogger() logging.basicConfig(level='INFO') fetch_ray_vector_path = 'C:/Users/dmf/projects/invest_dev/coastal_vulnerability/bahamas/workspace_37/workspace_37/intermediate/wind_wave/fetch_rays.gpkg' # fetch_ray_vector_path = 'C:/Users/dmf/projects/invest_dev/coastal_vulnerability/MAR/intermediate/wind_wave/fetch_rays.gpkg' vector_info = pygeoprocessing.get_vector_info(fetch_ray_vector_path) model_resolution = 500 file_suffix = '' base_bathy_path = 'C:/Users/dmf/projects/invest/data/invest-sample-data/Base_Data/Marine/DEMs/global_dem' target_bathy_path = 'bathy_utm.tif' working_dir = 'temp_zonal_stats' target_fetch_depth_path = 'fetch_depth_bahamas.gpkg' start = time.time() cv.clip_and_project_raster(base_bathy_path, vector_info['bounding_box'], vector_info['projection'], model_resolution, working_dir, file_suffix, target_bathy_path) result = pygeoprocessing.zonal_statistics((target_bathy_path, 1), fetch_ray_vector_path, polygons_might_overlap=False, working_dir=working_dir)
churn_dir = os.path.join(args.workspace_dir, 'churn') try: os.makedirs(churn_dir) except OSError: pass # ensure AOI and CV points are in the same projection aoi_raster_info = pygeoprocessing.get_raster_info( args.aoi_mask_raster_path) aoi_srs = osr.SpatialReference() aoi_srs.ImportFromWkt(aoi_raster_info['projection_wkt']) aoi_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1) shoreline_point_info = pygeoprocessing.get_vector_info( args.shoreline_point_vector_path) shoreline_srs = osr.SpatialReference() shoreline_srs.ImportFromWkt(shoreline_point_info['projection_wkt']) shoreline_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1) habitat_vector_info = pygeoprocessing.get_vector_info( args.habitat_vector_path) habitat_vector_srs = osr.SpatialReference() habitat_vector_srs.ImportFromWkt(habitat_vector_info['projection_wkt']) habitat_vector_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1) if len(set([habitat_vector_epsg, shoreline_epsg, aoi_epsg])) > 1: raise ValueError( "AOI raster, shoreline point vector, and habitat vector do not " "all share the same projection")
def process_watershed(job_id, watershed_vector_path, watershed_fid, dem_path, hab_path, pop_raster_path_list, target_beneficiaries_path_list, target_normalized_beneficiaries_path_list, target_hab_normalized_beneficiaries_path_list, target_stitch_work_queue_list): """Calculate downstream beneficiaries for this watershed. Args: job_id (str): unique ID identifying this job, can be used to create unique workspaces. watershed_vector_path (str): path to watershed vector watershed_fid (str): watershed FID to process dem_path (str): path to DEM raster hab_path (str): path to habitat mask raster pop_raster_path_list (list): list of population rasters to route target_beneficiaries_path_list (str): list of target downstream beneficiary rasters to create, parallel with `pop_raster_path_list`. target_normalized_beneficiaries_path_list (list): list of target normalized downstream beneficiary rasters, parallel with other lists. target_hab_normalized_beneficiaries_path_list (list): list of target hab normalized downstream beneficiary rasters, parallel with other lists. target_stitch_work_queue_list (list): list of work queue tuples to put done signals in when each beneficiary raster is done. The first element is for the standard target, the second for the normalized raster. Return: None. """ working_dir = os.path.join( os.path.dirname(target_beneficiaries_path_list[0])) os.makedirs(working_dir, exist_ok=True) LOGGER.debug(f'create working directory for {job_id} at {working_dir}') task_graph = taskgraph.TaskGraph(working_dir, -1) watershed_info = pygeoprocessing.get_vector_info(watershed_vector_path) watershed_vector = gdal.OpenEx(watershed_vector_path, gdal.OF_VECTOR) watershed_layer = watershed_vector.GetLayer() watershed_feature = watershed_layer.GetFeature(watershed_fid) watershed_geom = watershed_feature.GetGeometryRef() watershed_centroid = watershed_geom.Centroid() utm_code = (math.floor((watershed_centroid.GetX() + 180) / 6) % 60) + 1 lat_code = 6 if watershed_centroid.GetY() > 0 else 7 epsg_code = int('32%d%02d' % (lat_code, utm_code)) epsg_sr = osr.SpatialReference() epsg_sr.ImportFromEPSG(epsg_code) watershed_envelope = watershed_geom.GetEnvelope() # swizzle the envelope order that by default is xmin/xmax/ymin/ymax lat_lng_watershed_bb = [watershed_envelope[i] for i in [0, 2, 1, 3]] target_watershed_bb = pygeoprocessing.transform_bounding_box( lat_lng_watershed_bb, watershed_info['projection_wkt'], epsg_sr.ExportToWkt()) watershed_vector = None watershed_layer = None watershed_feature = None watershed_geom = None watershed_centroid = None watershed_envelope = None target_pixel_size = (300, -300) warped_dem_raster_path = os.path.join(working_dir, f'{job_id}_dem.tif') warped_habitat_raster_path = os.path.join(working_dir, f'{job_id}_hab.tif') align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=([dem_path, hab_path], [warped_dem_raster_path, warped_habitat_raster_path], ['near', 'mode'], target_pixel_size, target_watershed_bb), kwargs={ 'target_projection_wkt': epsg_sr.ExportToWkt(), 'vector_mask_options': { 'mask_vector_path': watershed_vector_path, 'mask_vector_where_filter': f'"FID"={watershed_fid}' }, }, target_path_list=[warped_dem_raster_path, warped_habitat_raster_path], task_name=( f'align and clip and warp dem/hab to {warped_dem_raster_path} ' f'{warped_habitat_raster_path}')) filled_dem_raster_path = os.path.join(working_dir, f'{job_id}_filled_dem.tif') fill_pits_task = task_graph.add_task( func=pygeoprocessing.routing.fill_pits, args=((warped_dem_raster_path, 1), filled_dem_raster_path), kwargs={ 'working_dir': working_dir, 'max_pixel_fill_count': 1000000 }, dependent_task_list=[align_task], target_path_list=[filled_dem_raster_path], task_name=f'fill dem pits to {filled_dem_raster_path}') flow_dir_mfd_raster_path = os.path.join(working_dir, f'{job_id}_flow_dir_mfd.tif') flow_dir_mfd_task = task_graph.add_task( func=pygeoprocessing.routing.flow_dir_mfd, args=((filled_dem_raster_path, 1), flow_dir_mfd_raster_path), kwargs={'working_dir': working_dir}, dependent_task_list=[fill_pits_task], target_path_list=[flow_dir_mfd_raster_path], task_name=f'calc flow dir for {flow_dir_mfd_raster_path}') outlet_vector_path = os.path.join(working_dir, f'{job_id}_outlet_vector.gpkg') detect_outlets_task = task_graph.add_task( func=pygeoprocessing.routing.detect_outlets, args=((flow_dir_mfd_raster_path, 1), 'mfd', outlet_vector_path), dependent_task_list=[flow_dir_mfd_task], target_path_list=[outlet_vector_path], task_name=f'detect outlets {outlet_vector_path}') outlet_raster_path = os.path.join(working_dir, f'{job_id}_outlet_raster.tif') create_outlet_raster_task = task_graph.add_task( func=_create_outlet_raster, args=(outlet_vector_path, flow_dir_mfd_raster_path, outlet_raster_path), dependent_task_list=[detect_outlets_task], target_path_list=[outlet_raster_path], task_name=f'create outlet raster {outlet_raster_path}') flow_accum_mfd_raster_path = os.path.join(working_dir, f'{job_id}_flow_accum.tif') flow_accum_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=((flow_dir_mfd_raster_path, 1), flow_accum_mfd_raster_path), dependent_task_list=[flow_dir_mfd_task], target_path_list=[flow_accum_mfd_raster_path], task_name=f'calc upstream flow area for {flow_accum_mfd_raster_path}') hab_upstream_area_raster_path = os.path.join(working_dir, f'{job_id}_hab_upstream.tif') hab_upstream_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=((flow_dir_mfd_raster_path, 1), hab_upstream_area_raster_path), kwargs={'weight_raster_path_band': (warped_habitat_raster_path, 1)}, dependent_task_list=[flow_dir_mfd_task], target_path_list=[hab_upstream_area_raster_path], task_name=( f'calc upstream hab area for {hab_upstream_area_raster_path}')) for (pop_raster_path, target_beneficiaries_path, target_normalized_beneficiaries_path, target_hab_normalized_beneficiaries_path, stitch_queue_tuple) in zip( pop_raster_path_list, target_beneficiaries_path_list, target_normalized_beneficiaries_path_list, target_hab_normalized_beneficiaries_path_list, target_stitch_work_queue_list): LOGGER.debug(f'processing {target_beneficiaries_path} and normalized') aligned_pop_raster_path = os.path.join( working_dir, f'''{job_id}_{os.path.basename( os.path.splitext(pop_raster_path)[0])}.tif''') pop_warp_task = task_graph.add_task( func=_warp_and_wgs84_area_scale, args=(pop_raster_path, warped_dem_raster_path, aligned_pop_raster_path, 'near', lat_lng_watershed_bb, watershed_vector_path, watershed_fid, working_dir), dependent_task_list=[align_task], target_path_list=[aligned_pop_raster_path], task_name=f'align {aligned_pop_raster_path}') downstream_bene_task = task_graph.add_task( func=pygeoprocessing.routing.distance_to_channel_mfd, args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1), target_beneficiaries_path), kwargs={'weight_raster_path_band': (aligned_pop_raster_path, 1)}, dependent_task_list=[ pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task ], target_path_list=[target_beneficiaries_path], task_name=('calc downstream beneficiaries for ' f'{target_beneficiaries_path}')) # divide aligned_pop_raster_path by flow accum to get normalized then # route it downstream pop_normal_by_upstream_raster_path = '%s_norm%s' % os.path.splitext( aligned_pop_raster_path) normalize_by_dist_task = task_graph.add_task( func=normalize, args=(aligned_pop_raster_path, flow_accum_mfd_raster_path, pop_normal_by_upstream_raster_path), dependent_task_list=[flow_accum_task, align_task], target_path_list=[pop_normal_by_upstream_raster_path], task_name=(f'normalized beneficiaries for ' f'{pop_normal_by_upstream_raster_path}')) prescaled_normalized_beneficiaries_path = ( '%s_prescaled%s' % os.path.splitext(target_normalized_beneficiaries_path)) downstream_norm_bene_task = task_graph.add_task( func=pygeoprocessing.routing.distance_to_channel_mfd, args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1), prescaled_normalized_beneficiaries_path), kwargs={ 'weight_raster_path_band': (pop_normal_by_upstream_raster_path, 1) }, dependent_task_list=[ pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task, normalize_by_dist_task ], target_path_list=[prescaled_normalized_beneficiaries_path], task_name=('calc downstream normalized beneficiaries for ' f'{prescaled_normalized_beneficiaries_path}')) task_graph.add_task( func=rescale_by_base, args=(aligned_pop_raster_path, prescaled_normalized_beneficiaries_path, target_normalized_beneficiaries_path), target_path_list=[target_normalized_beneficiaries_path], dependent_task_list=[downstream_norm_bene_task], task_name=f'rescale {target_normalized_beneficiaries_path}') # divide aligned_pop_raster_path by hab accum to get normalized by # hab then route it downstream pop_hab_normal_by_upstream_raster_path = ( '%s_hab_norm%s' % os.path.splitext(aligned_pop_raster_path)) normalize_by_dist_task = task_graph.add_task( func=normalize, args=(aligned_pop_raster_path, hab_upstream_area_raster_path, pop_hab_normal_by_upstream_raster_path), dependent_task_list=[hab_upstream_task, align_task], target_path_list=[pop_hab_normal_by_upstream_raster_path], task_name=(f'normalized beneficiaries for ' f'{pop_hab_normal_by_upstream_raster_path}')) hab_pre_mask_normalized_beneficiaries_path = ( '%s_pre_mask%s' % os.path.splitext(target_hab_normalized_beneficiaries_path)) downstream_norm_hab_bene_task = task_graph.add_task( func=pygeoprocessing.routing.distance_to_channel_mfd, args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1), hab_pre_mask_normalized_beneficiaries_path), kwargs={ 'weight_raster_path_band': (pop_hab_normal_by_upstream_raster_path, 1) }, dependent_task_list=[ pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task, normalize_by_dist_task ], target_path_list=[hab_pre_mask_normalized_beneficiaries_path], task_name=('calc downstream normalized beneficiaries for ' f'{hab_pre_mask_normalized_beneficiaries_path}')) # mask this result to the target prescaled_hab_normalized_beneficiaries_path = ( '%s_prescaled%s' % os.path.splitext(target_hab_normalized_beneficiaries_path)) mask_downstream_norm_bene_task = task_graph.add_task( func=_mask_raster, args=(hab_pre_mask_normalized_beneficiaries_path, warped_habitat_raster_path, prescaled_hab_normalized_beneficiaries_path), dependent_task_list=[downstream_norm_hab_bene_task, align_task], target_path_list=[prescaled_hab_normalized_beneficiaries_path], task_name=f'mask {prescaled_hab_normalized_beneficiaries_path}') task_graph.add_task( func=rescale_by_base, args=(aligned_pop_raster_path, prescaled_hab_normalized_beneficiaries_path, target_hab_normalized_beneficiaries_path), target_path_list=[target_hab_normalized_beneficiaries_path], dependent_task_list=[mask_downstream_norm_bene_task], task_name=f'rescale {target_hab_normalized_beneficiaries_path}') task_graph.join() stitch_queue_tuple[0].put( (target_beneficiaries_path, working_dir, job_id)) stitch_queue_tuple[1].put( (target_normalized_beneficiaries_path, working_dir, job_id)) stitch_queue_tuple[2].put( (target_hab_normalized_beneficiaries_path, working_dir, job_id)) task_graph.close() task_graph.join() task_graph = None