def test_habitat_rank(self): """CV: regression test for habitat ranks.""" workspace_dir = self.workspace_dir base_shore_point_vector_path = os.path.join( INPUT_DATA, "wwiii_shore_points_5000m.gpkg") habitat_table_path = os.path.join(INPUT_DATA, "natural_habitats_wcvi.csv") target_habitat_protection_path = os.path.join( workspace_dir, 'habitat_protection.csv') file_suffix = '' task_graph = taskgraph.TaskGraph( os.path.join(workspace_dir, 'taskgraph_dir'), -1) task_list, pickle_list = coastal_vulnerability._schedule_habitat_tasks( base_shore_point_vector_path, habitat_table_path, workspace_dir, file_suffix, task_graph) coastal_vulnerability.calculate_habitat_rank( pickle_list, target_habitat_protection_path) expected_habitat_path = os.path.join( REGRESSION_DATA, 'expected_habitat_protection.csv') actual_values_df = pandas.read_csv(target_habitat_protection_path) expected_values_df = pandas.read_csv(expected_habitat_path) pandas.testing.assert_frame_equal(actual_values_df, expected_values_df)
def __init__(self, download_dir, taskgraph_object_or_dir, n_workers=0): """Construct TaskGraphDownloader object. Parameters: download_dir (str): the base directory which files will be downloaded into. taskgraph_object_or_dir (str/TaskGraph): path to the taskgraph workspace database used to manage the TaskGraph object. This directory should not be used for any other file storage. n_workers (int): number of processes to use to simultaneously download ecoshards. """ try: os.makedirs(download_dir) except OSError: pass if isinstance(taskgraph_object_or_dir, taskgraph.TaskGraph): LOGGER.debug('got taskgraph object') self.task_graph = taskgraph_object_or_dir else: LOGGER.debug('no taskgraph object, creating internal one') self.task_graph = taskgraph.TaskGraph(taskgraph_object_or_dir, n_workers) # this will be a dictionary indexed by ecoshard key to a dict # containing fields: # 'url': the original url # 'local_path': path to local file/dir # 'download_task': the taskgraph.Task object used to fetch the # ecoshard self.download_dir = download_dir self.key_to_path_task_map = {}
def main(): """Entry point.""" parser = argparse.ArgumentParser( description='Carbon regression scenario maker') parser.add_argument( '--target_dir', help="path to output dir") parser.add_argument( 'base_rasters', nargs='+', help=("glob to base rasters to optimize")) parser.add_argument( '--sum', action='store_true', help='if set, report sum of raster') parser.add_argument( '--target_val', type=float, default=None, help='if set use this as the goal met cutoff') args = parser.parse_args() task_graph = taskgraph.TaskGraph(args.target_dir, -1) churn_dir = os.path.join(args.target_dir, 'churn') try: os.makedirs(churn_dir) except OSError: pass for raster_path in [ raster_path for glob_pattern in args.base_rasters for raster_path in glob.glob(glob_pattern)]: LOGGER.debug(raster_path) raster_sum_task = task_graph.add_task( func=calc_raster_sum, args=(raster_path,), task_name=f'calc sum for {raster_path}') raster_sum = raster_sum_task.get() if args.sum: LOGGER.info(f'{raster_path}: {raster_sum}') raster_id = os.path.basename(os.path.splitext(raster_path)[0]) output_dir = os.path.join(args.target_dir, raster_id) try: os.makedirs(output_dir) except OSError: pass with open(os.path.join(output_dir, f'sum_of_{raster_id}'), 'w') as \ sum_file: sum_file.write(f'{raster_sum}\n') if args.target_val is not None: LOGGER.info(f'optimize to {args.target_val}') target_threshold = args.target_val / raster_sum pygeoprocessing.raster_optimization( [(raster_path, 1)], churn_dir, output_dir, target_suffix=raster_id, goal_met_cutoffs=numpy.linspace(0, target_threshold, 5)[1:], heap_buffer_size=2**28, ffi_buffer_size=2**10) else: LOGGER.info('running to 100%') pygeoprocessing.raster_optimization( [(raster_path, 1)], churn_dir, output_dir, target_suffix=raster_id, goal_met_cutoffs=[float(x)/100.0 for x in range(1, 101)], heap_buffer_size=2**28, ffi_buffer_size=2**10)
def main(): """Write your expression here.""" percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\percentile_working_dir" try: os.makedirs(percentile_working_dir) except OSError: pass table_path = r"C:\Users\Becky\Documents\cnc_project\carbon_percentiles_table.csv" # this is the directory the loop will search through base_directory = r"C:\Users\Becky\Documents\cnc_project\original_rasters\carbon" # you can modify this list and the rest of the code will adapt # make a list full of 0s as long as the percentile list percentiles_list = list(range(0, 101, 1)) #[0, 0.01, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 99.9, 100] task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, N_CPUS, 5.0) pickle_path_list = [] # this will loop through every file that ends in ".tif" in the base # directory raster_path_list = glob.glob(os.path.join(base_directory, '*.tif')) for raster_path in sorted(raster_path_list): LOGGER.debug('processing %s', raster_path) result_pickle_path = os.path.join( percentile_working_dir, '%s.pickle' % ( os.path.splitext(os.path.basename(raster_path)))[0]) pickle_path_list.append(result_pickle_path) _ = task_graph.add_task( func=calculate_percentile, args=( raster_path, percentiles_list, percentile_working_dir, result_pickle_path), target_path_list=[result_pickle_path], task_name='%s percentile' % raster_path) LOGGER.debug('waiting for pipeline to process') task_graph.join() LOGGER.debug('saving results to a csv table') table_file = open(table_path, 'w') for result_pickle_path, raster_path in zip( pickle_path_list, raster_path_list): raster_filename = os.path.basename(raster_path) LOGGER.debug('loading: %s', result_pickle_path) with open(result_pickle_path, 'rb') as result_pickle_file: result_dict = pickle.load(result_pickle_file) LOGGER.debug(result_dict) table_file.write('%s\n' % raster_filename) table_file.write('percentile,percentile_value,percentile_sum\n') pixel_stats_string = ( '\n'.join(['%f,%.10e,%.10e' % ( percentile, percentile_value, percentile_sum) for percentile, percentile_value, percentile_sum in zip( result_dict['percentiles_list'], result_dict['percentile_values_list'], result_dict['percentile_sum_list'])])) table_file.write(pixel_stats_string) table_file.write('\n') table_file.close()
def initialize(): """Entry point.""" for dir_path in [WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1) # download countries country_borders_path = os.path.join(ECOSHARD_DIR, os.path.basename(COUNTRY_BORDERS_URL)) country_fetch_task = task_graph.add_task( func=ecoshard.download_url, args=(COUNTRY_BORDERS_URL, country_borders_path), target_path_list=[country_borders_path], task_name='download country borders') # download watersheds watersheds_zip_path = os.path.join(ECOSHARD_DIR, os.path.basename(WATERSHEDS_URL)) LOGGER.debug('scheduing download of watersheds: %s', WATERSHEDS_URL) watersheds_zip_fetch_task = task_graph.add_task( func=ecoshard.download_url, args=(WATERSHEDS_URL, watersheds_zip_path), target_path_list=[watersheds_zip_path], task_name='download watersheds zip') watersheds_unzip_dir = os.path.join( CHURN_DIR, os.path.basename(watersheds_zip_path.replace('.zip', ''))) unzip_token_path = os.path.join( CHURN_DIR, '%s.UNZIPTOKEN' % os.path.basename(watersheds_unzip_dir)) LOGGER.debug('scheduing unzip of: %s', watersheds_zip_path) unzip_watersheds_task = task_graph.add_task( func=unzip_file, args=(watersheds_zip_path, watersheds_unzip_dir, unzip_token_path), target_path_list=[unzip_token_path], dependent_task_list=[watersheds_zip_fetch_task], task_name='unzip %s' % watersheds_zip_path) database_complete_token_path = os.path.join( CHURN_DIR, 'create_status_database.COMPLETE') _ = task_graph.add_task( func=create_status_database, args=(STATUS_DATABASE_PATH, watersheds_unzip_dir, country_borders_path, database_complete_token_path), target_path_list=[database_complete_token_path], ignore_path_list=[STATUS_DATABASE_PATH], dependent_task_list=[country_fetch_task, unzip_watersheds_task], task_name='create status database') task_graph.join() task_graph.close()
def _calculate_modeled_biomass_from_mask( base_lulc_raster_path, new_forest_mask_raster_path, target_biomass_raster_path): """Calculate new biomass raster from base layer and new forest mask. Args: base_lulc_raster_path (str): path to base ESA LULC raster. new_forest_mask_raster_path (str): path to raster that indicates where new forest is applied with a 1. target_biomass_raster_path (str): created by this function, a raster that has biomass per pixel for the scenario given by new_forest_mask_raster_path from base_lulc_raster_path. n_workers (int): number of workers to allow for reprojection. Returns: None """ churn_dir = os.path.join( os.path.dirname(target_biomass_raster_path), os.path.basename(os.path.splitext(target_biomass_raster_path)[0])) task_graph = taskgraph.TaskGraph(churn_dir, -1) # this raster is base with new forest in it converted_lulc_raster_path = os.path.join(churn_dir, 'converted_lulc.tif') LOGGER.info( f'creating converted LULC off of {base_lulc_raster_path} to ' f'{converted_lulc_raster_path}') replace_value_by_mask_task = task_graph.add_task( func=_replace_value_by_mask, args=( base_lulc_raster_path, FOREST_CODE, new_forest_mask_raster_path, converted_lulc_raster_path), target_path_list=[converted_lulc_raster_path], task_name=f'replace by mask to {converted_lulc_raster_path}') # calculate biomass for that raster task_graph.add_task( func=dnn_model.run_model, args=( converted_lulc_raster_path, MODEL_PATH, target_biomass_raster_path), dependent_task_list=[replace_value_by_mask_task], target_path_list=[target_biomass_raster_path], task_name=( f'calculated modeled biomass for {target_biomass_raster_path}')) task_graph.close() task_graph.join()
def test_existing_regression_coef(self): """Recreation test regression coefficients handle existing output.""" from natcap.invest.recreation import recmodel_client # Initialize a TaskGraph taskgraph_db_dir = os.path.join(self.workspace_dir, '_taskgraph_working_dir') n_workers = -1 # single process mode. task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers) response_vector_path = os.path.join(self.workspace_dir, 'no_grid_vector_path.shp') response_polygons_lookup_path = os.path.join( self.workspace_dir, 'response_polygons_lookup.pickle') recmodel_client._copy_aoi_no_grid( os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), response_vector_path) predictor_table_path = os.path.join(SAMPLE_DATA, 'predictors.csv') # make outputs to be overwritten predictor_dict = utils.build_lookup_from_csv(predictor_table_path, 'id') predictor_list = predictor_dict.keys() tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir) empty_json_list = [ os.path.join(tmp_working_dir, x + '.json') for x in predictor_list ] out_coefficient_vector_path = os.path.join( self.workspace_dir, 'out_coefficient_vector.shp') _make_empty_files([out_coefficient_vector_path] + empty_json_list) prepare_response_polygons_task = task_graph.add_task( func=recmodel_client._prepare_response_polygons_lookup, args=(response_vector_path, response_polygons_lookup_path), target_path_list=[response_polygons_lookup_path], task_name='prepare response polygons for geoprocessing') # build again to test against overwriting output recmodel_client._schedule_predictor_data_processing( response_vector_path, response_polygons_lookup_path, prepare_response_polygons_task, predictor_table_path, out_coefficient_vector_path, tmp_working_dir, task_graph) expected_coeff_vector_path = os.path.join( REGRESSION_DATA, 'test_regression_coefficients.shp') pygeoprocessing.testing.assert_vectors_equal( out_coefficient_vector_path, expected_coeff_vector_path, 1E-6)
def main(): """Entry point, takes in base path and compression algorithm.""" task_graph = taskgraph.TaskGraph('compression_taskgraph_dir', -1) parser = argparse.ArgumentParser( description='Compress and build overview for raster.') parser.add_argument('filepath', nargs='+', help='Files to hash and rename.') parser.add_argument( '--resample_method', default='near', help='A gdal valid interpolation method (e.g. near, bilinear, etc.') args = parser.parse_args() for file_path in args.filepath: target_path = f'{os.path.splitext(file_path)[0]}_compressed.tif' LOGGER.info(f'starting {file_path} to {target_path}') compress_to(task_graph, file_path, args.resample_method, target_path)
def main(): """Main.""" dem_dir = os.path.join(CHURN_DIR, 'dem_dir') for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR, dem_dir]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(CHURN_DIR, -1) download_task = task_graph.add_task(func=download_and_unzip, args=(GLOBAL_DEM_ECOSHARD_URL, dem_dir), task_name='unzip and download dem') download_task.join() for dem_tif in glob.glob(os.path.join(dem_dir, '*.tif')): LOGGER.debug(dem_tif) task_graph.join() task_graph.close()
def main(): path = 'c:/temp' task_graph = taskgraph.TaskGraph(path, 4) uris = [ "C:\\test_data\\lulc_rgb.tif", "C:\\test_data\\clay_percent.tif", "C:\\test_data\\lulc_modis_2012.tif", "C:\\test_data\\lulc_modis_2012_dupe.tif", ] r_tasks = [] for raster_path in uris: out_path = os.path.join(path, os.path.basename(raster_path)) r_task = task_graph.add_task(func=pg.raster_calculator, args=([(raster_path, 1)], my_func_is_rawk, out_path, gdal.GDT_Float32, -9999), target_path_list=[out_path]) r_tasks.append(r_task) print('hi rich', out_path) task_graph.join() print('sup', 'yo') second_Task = task_graph.add_task(dependent_task_list=r_tasks)
def execute(args): """GLOBIO. The model operates in two modes. Mode (a) generates a landcover map based on a base landcover map and information about crop yields, infrastructure, and more. Mode (b) assumes the globio landcover map is generated. These modes are used below to describe input parameters. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['predefined_globio'] (boolean): if True then "mode (b)" else "mode (a)" args['results_suffix'] (string): (optional) string to append to any output files args['lulc_path'] (string): used in "mode (a)" path to a base landcover map with integer codes args['lulc_to_globio_table_path'] (string): used in "mode (a)" path to table that translates the land-cover args['lulc_path'] to intermediate GLOBIO classes, from which they will be further differentiated using the additional data in the model. Contains at least the following fields: * 'lucode': Land use and land cover class code of the dataset used. LULC codes match the 'values' column in the LULC raster of mode (b) and must be numeric and unique. * 'globio_lucode': The LULC code corresponding to the GLOBIO class to which it should be converted, using intermediate codes described in the example below. args['infrastructure_dir'] (string): used in "mode (a) and (b)" a path to a folder containing maps of either gdal compatible rasters or OGR compatible shapefiles. These data will be used in the infrastructure to calculation of MSA. args['pasture_path'] (string): used in "mode (a)" path to pasture raster args['potential_vegetation_path'] (string): used in "mode (a)" path to potential vegetation raster args['pasture_threshold'] (float): used in "mode (a)" args['intensification_fraction'] (float): used in "mode (a)"; a value between 0 and 1 denoting proportion of total agriculture that should be classified as 'high input' args['primary_threshold'] (float): used in "mode (a)" args['msa_parameters_path'] (string): path to MSA classification parameters args['aoi_path'] (string): (optional) if it exists then final MSA raster is summarized by AOI args['globio_lulc_path'] (string): used in "mode (b)" path to predefined globio raster. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None """ msa_parameter_table = load_msa_parameter_table( args['msa_parameters_path'], float(args['intensification_fraction'])) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) # For intermediate files that users may want to explore: intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') # For intermediate files that users probably don't need to see, # but should persist for taskgraph purposes: tmp_dir = os.path.join(intermediate_dir, 'tmp') utils.make_directories([output_dir, intermediate_dir, tmp_dir]) # Initialize a TaskGraph taskgraph_db_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers) gaussian_kernel_path = os.path.join(tmp_dir, 'gaussian_kernel%s.tif' % file_suffix) make_gaussian_kernel_task = task_graph.add_task( func=make_gaussian_kernel_path, args=(SIGMA, gaussian_kernel_path), target_path_list=[gaussian_kernel_path], task_name='gaussian_kernel') calculate_globio_task_list = [] # get base raster cell size and nodata from whichever lulc is # provided in args if not args['predefined_globio']: globio_lulc_path = os.path.join(intermediate_dir, 'globio_lulc%s.tif' % file_suffix) base_lulc_info = pygeoprocessing.get_raster_info(args['lulc_path']) out_pixel_size = (abs(base_lulc_info['pixel_size'][0]) + abs(base_lulc_info['pixel_size'][0])) / 2 globio_nodata = -1 globio_lulc_task = _calculate_globio_lulc_map( args['lulc_to_globio_table_path'], args['lulc_path'], args['potential_vegetation_path'], args['pasture_path'], gaussian_kernel_path, float(args['pasture_threshold']), float(args['primary_threshold']), file_suffix, tmp_dir, globio_lulc_path, globio_nodata, task_graph) calculate_globio_task_list.append(globio_lulc_task) else: LOGGER.info('no need to calculate GLOBIO LULC because it is passed in') globio_lulc_path = args['globio_lulc_path'] globio_lulc_info = pygeoprocessing.get_raster_info(globio_lulc_path) out_pixel_size = (abs(globio_lulc_info['pixel_size'][0]) + abs(globio_lulc_info['pixel_size'][0])) / 2 globio_nodata = globio_lulc_info['nodata'][0] infrastructure_path = os.path.join( tmp_dir, 'combined_infrastructure%s.tif' % file_suffix) combine_infrastructure_task = task_graph.add_task( func=_collapse_infrastructure_layers, args=(args['infrastructure_dir'], globio_lulc_path, infrastructure_path, tmp_dir), target_path_list=[infrastructure_path], dependent_task_list=calculate_globio_task_list, task_name='combine_infrastructure') # calc_msa_f primary_veg_mask_path = os.path.join( tmp_dir, 'primary_veg_mask%s.tif' % file_suffix) primary_veg_mask_nodata = -1 LOGGER.info("create mask of primary veg areas") # lucodes for primary veg are hardcoded in the local_op mask_primary_veg_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(globio_lulc_path, 1), (globio_nodata, 'raw'), (primary_veg_mask_nodata, 'raw')], _primary_veg_mask_op, primary_veg_mask_path, gdal.GDT_Int16, primary_veg_mask_nodata), target_path_list=[primary_veg_mask_path], dependent_task_list=calculate_globio_task_list, task_name='mask_primary_veg') LOGGER.info('smooth primary veg areas with gaussian filter') smoothed_primary_veg_mask_path = os.path.join( tmp_dir, 'smoothed_primary_veg_mask%s.tif' % file_suffix) smooth_primary_veg_mask_task = task_graph.add_task( func=pygeoprocessing.convolve_2d, args=((primary_veg_mask_path, 1), (gaussian_kernel_path, 1), smoothed_primary_veg_mask_path), target_path_list=[smoothed_primary_veg_mask_path], dependent_task_list=[mask_primary_veg_task, make_gaussian_kernel_task], task_name='smooth_primary_veg_mask') LOGGER.info('calculate primary_veg_smooth') # Passing the filter over the veg mask means veg has bled outside the mask, # so mask it again to get the final ffqi primary_veg_smooth_path = os.path.join( intermediate_dir, 'primary_veg_smooth%s.tif' % file_suffix) smooth_primary_veg_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(primary_veg_mask_path, 1), (smoothed_primary_veg_mask_path, 1), (primary_veg_mask_nodata, 'raw') ], _ffqi_op, primary_veg_smooth_path, gdal.GDT_Float32, primary_veg_mask_nodata), target_path_list=[primary_veg_smooth_path], dependent_task_list=[smooth_primary_veg_mask_task], task_name='smooth_primary_veg') LOGGER.info('calculate msa_f') msa_nodata = -1 msa_f_table = msa_parameter_table['msa_f'] msa_f_path = os.path.join(output_dir, 'msa_f%s.tif' % file_suffix) calculate_msa_f_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(primary_veg_smooth_path, 1), (primary_veg_mask_nodata, 'raw'), (msa_f_table, 'raw'), (msa_nodata, 'raw')], _msa_f_op, msa_f_path, gdal.GDT_Float32, msa_nodata), target_path_list=[msa_f_path], dependent_task_list=[smooth_primary_veg_task], task_name='calculate_msa_f') # calc_msa_i msa_i_other_table = msa_parameter_table['msa_i_other'] msa_i_primary_table = msa_parameter_table['msa_i_primary'] LOGGER.info('distance transform infrasture raster') distance_to_infrastructure_path = os.path.join( intermediate_dir, 'distance_to_infrastructure%s.tif' % file_suffix) distance_to_infrastructure_task = task_graph.add_task( func=pygeoprocessing.distance_transform_edt, args=((infrastructure_path, 1), distance_to_infrastructure_path), target_path_list=[distance_to_infrastructure_path], dependent_task_list=[combine_infrastructure_task], task_name='distance_to_infrastructure') LOGGER.info('calculate msa_i') msa_i_path = os.path.join(output_dir, 'msa_i%s.tif' % file_suffix) calculate_msa_i_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(globio_lulc_path, 1), (distance_to_infrastructure_path, 1), (out_pixel_size, 'raw'), (msa_i_primary_table, 'raw'), (msa_i_other_table, 'raw')], _msa_i_op, msa_i_path, gdal.GDT_Float32, msa_nodata), target_path_list=[msa_i_path], dependent_task_list=[distance_to_infrastructure_task], task_name='calculate_msa_i') # calc_msa_lu msa_lu_path = os.path.join(output_dir, 'msa_lu%s.tif' % file_suffix) LOGGER.info('calculate msa_lu') calculate_msa_lu_task = task_graph.add_task( func=pygeoprocessing.reclassify_raster, args=((globio_lulc_path, 1), msa_parameter_table['msa_lu'], msa_lu_path, gdal.GDT_Float32, globio_nodata), target_path_list=[msa_lu_path], dependent_task_list=calculate_globio_task_list, task_name='calculate_msa_lu') LOGGER.info('calculate msa') msa_path = os.path.join(output_dir, 'msa%s.tif' % file_suffix) calculate_msa_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(msa_f_path, 1), (msa_lu_path, 1), (msa_i_path, 1), (globio_nodata, 'raw')], _msa_op, msa_path, gdal.GDT_Float32, msa_nodata), target_path_list=[msa_path], dependent_task_list=[ calculate_msa_f_task, calculate_msa_i_task, calculate_msa_lu_task ], task_name='calculate_msa') LOGGER.info('summarize msa result in AOI polygons') # the AOI is an optional argument, so check for its existence if 'aoi_path' in args and len(args['aoi_path']) > 0: summary_aoi_path = os.path.join(output_dir, 'aoi_summary%s.shp' % file_suffix) task_graph.add_task(func=_summarize_results_in_aoi, args=(args['aoi_path'], summary_aoi_path, msa_path), target_path_list=[summary_aoi_path], dependent_task_list=[calculate_msa_task], task_name='summarize_msa_in_aoi') task_graph.close() task_graph.join()
def execute(args): """Annual Water Yield: Reservoir Hydropower Production. Executes the hydropower/water_yield model Parameters: args['workspace_dir'] (string): a path to the directory that will write output and other temporary files during calculation. (required) args['lulc_path'] (string): a path to a land use/land cover raster whose LULC indexes correspond to indexes in the biophysical table input. Used for determining soil retention and other biophysical properties of the landscape. (required) args['depth_to_root_rest_layer_path'] (string): a path to an input raster describing the depth of "good" soil before reaching this restrictive layer (required) args['precipitation_path'] (string): a path to an input raster describing the average annual precipitation value for each cell (mm) (required) args['pawc_path'] (string): a path to an input raster describing the plant available water content value for each cell. Plant Available Water Content fraction (PAWC) is the fraction of water that can be stored in the soil profile that is available for plants' use. PAWC is a fraction from 0 to 1 (required) args['eto_path'] (string): a path to an input raster describing the annual average evapotranspiration value for each cell. Potential evapotranspiration is the potential loss of water from soil by both evaporation from the soil and transpiration by healthy Alfalfa (or grass) if sufficient water is available (mm) (required) args['watersheds_path'] (string): a path to an input shapefile of the watersheds of interest as polygons. (required) args['sub_watersheds_path'] (string): a path to an input shapefile of the subwatersheds of interest that are contained in the ``args['watersheds_path']`` shape provided as input. (optional) args['biophysical_table_path'] (string): a path to an input CSV table of land use/land cover classes, containing data on biophysical coefficients such as root_depth (mm) and Kc, which are required. A column with header LULC_veg is also required which should have values of 1 or 0, 1 indicating a land cover type of vegetation, a 0 indicating non vegetation or wetland, water. NOTE: these data are attributes of each LULC class rather than attributes of individual cells in the raster map (required) args['seasonality_constant'] (float): floating point value between 1 and 30 corresponding to the seasonal distribution of precipitation (required) args['results_suffix'] (string): a string that will be concatenated onto the end of file names (optional) args['demand_table_path'] (string): (optional) if a non-empty string, a path to an input CSV table of LULC classes, showing consumptive water use for each landuse / land-cover type (cubic meters per year) to calculate water scarcity. args['valuation_table_path'] (string): (optional) if a non-empty string, a path to an input CSV table of hydropower stations with the following fields to calculate valuation: ('ws_id', 'time_span', 'discount', 'efficiency', 'fraction', 'cost', 'height', 'kw_price') Required if ``calculate_valuation`` is True. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None """ LOGGER.info('Validating arguments') invalid_parameters = validate(args) if invalid_parameters: raise ValueError("Invalid parameters passed: %s" % invalid_parameters) # valuation_params is passed to create_vector_output() # which computes valuation if valuation_params is not None. valuation_params = None if 'valuation_table_path' in args and args['valuation_table_path'] != '': LOGGER.info( 'Checking that watersheds have entries for every `ws_id` in the ' 'valuation table.') # Open/read in valuation parameters from CSV file valuation_params = utils.build_lookup_from_csv( args['valuation_table_path'], 'ws_id') watershed_vector = gdal.OpenEx(args['watersheds_path'], gdal.OF_VECTOR) watershed_layer = watershed_vector.GetLayer() missing_ws_ids = [] for watershed_feature in watershed_layer: watershed_ws_id = watershed_feature.GetField('ws_id') if watershed_ws_id not in valuation_params: missing_ws_ids.append(watershed_ws_id) watershed_feature = None watershed_layer = None watershed_vector = None if missing_ws_ids: raise ValueError( 'The following `ws_id`s exist in the watershed vector file ' 'but are not found in the valuation table. Check your ' 'valuation table to see if they are missing: "%s"' % (', '.join(str(x) for x in sorted(missing_ws_ids)))) # Construct folder paths workspace_dir = args['workspace_dir'] output_dir = os.path.join(workspace_dir, 'output') per_pixel_output_dir = os.path.join(output_dir, 'per_pixel') intermediate_dir = os.path.join(workspace_dir, 'intermediate') pickle_dir = os.path.join(intermediate_dir, '_tmp_zonal_stats') utils.make_directories([ workspace_dir, output_dir, per_pixel_output_dir, intermediate_dir, pickle_dir ]) # Append a _ to the suffix if it's not empty and doesn't already have one file_suffix = utils.make_suffix_string(args, 'results_suffix') # Paths for targets of align_and_resize_raster_stack clipped_lulc_path = os.path.join(intermediate_dir, 'clipped_lulc%s.tif' % file_suffix) eto_path = os.path.join(intermediate_dir, 'eto%s.tif' % file_suffix) precip_path = os.path.join(intermediate_dir, 'precip%s.tif' % file_suffix) depth_to_root_rest_layer_path = os.path.join( intermediate_dir, 'depth_to_root_rest_layer%s.tif' % file_suffix) pawc_path = os.path.join(intermediate_dir, 'pawc%s.tif' % file_suffix) tmp_pet_path = os.path.join(intermediate_dir, 'pet%s.tif' % file_suffix) # Paths for output rasters fractp_path = os.path.join(per_pixel_output_dir, 'fractp%s.tif' % file_suffix) wyield_path = os.path.join(per_pixel_output_dir, 'wyield%s.tif' % file_suffix) aet_path = os.path.join(per_pixel_output_dir, 'aet%s.tif' % file_suffix) demand_path = os.path.join(intermediate_dir, 'demand%s.tif' % file_suffix) watersheds_path = args['watersheds_path'] watershed_results_vector_path = os.path.join( output_dir, 'watershed_results_wyield%s.shp' % file_suffix) watershed_paths_list = [(watersheds_path, 'ws_id', watershed_results_vector_path)] sub_watersheds_path = None if 'sub_watersheds_path' in args and args['sub_watersheds_path'] != '': sub_watersheds_path = args['sub_watersheds_path'] subwatershed_results_vector_path = os.path.join( output_dir, 'subwatershed_results_wyield%s.shp' % file_suffix) watershed_paths_list.append((sub_watersheds_path, 'subws_id', subwatershed_results_vector_path)) seasonality_constant = float(args['seasonality_constant']) # Initialize a TaskGraph work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. graph = taskgraph.TaskGraph(work_token_dir, n_workers) base_raster_path_list = [ args['eto_path'], args['precipitation_path'], args['depth_to_root_rest_layer_path'], args['pawc_path'], args['lulc_path'] ] aligned_raster_path_list = [ eto_path, precip_path, depth_to_root_rest_layer_path, pawc_path, clipped_lulc_path ] target_pixel_size = pygeoprocessing.get_raster_info( args['lulc_path'])['pixel_size'] align_raster_stack_task = graph.add_task( pygeoprocessing.align_and_resize_raster_stack, args=(base_raster_path_list, aligned_raster_path_list, ['near'] * len(base_raster_path_list), target_pixel_size, 'intersection'), kwargs={ 'raster_align_index': 4, 'base_vector_path_list': [watersheds_path] }, target_path_list=aligned_raster_path_list, task_name='align_raster_stack') # Joining now since this task will always be the root node # and it's useful to have the raster info available. align_raster_stack_task.join() nodata_dict = { 'out_nodata': -1.0, 'precip': pygeoprocessing.get_raster_info(precip_path)['nodata'][0], 'eto': pygeoprocessing.get_raster_info(eto_path)['nodata'][0], 'depth_root': pygeoprocessing.get_raster_info(depth_to_root_rest_layer_path) ['nodata'][0], 'pawc': pygeoprocessing.get_raster_info(pawc_path)['nodata'][0], 'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0] } # Open/read in the csv file into a dictionary and add to arguments bio_dict = utils.build_lookup_from_csv(args['biophysical_table_path'], 'lucode', to_lower=True) bio_lucodes = set(bio_dict.keys()) bio_lucodes.add(nodata_dict['lulc']) LOGGER.debug('bio_lucodes %s', bio_lucodes) if 'demand_table_path' in args and args['demand_table_path'] != '': demand_dict = utils.build_lookup_from_csv(args['demand_table_path'], 'lucode') demand_reclassify_dict = dict([(lucode, demand_dict[lucode]['demand']) for lucode in demand_dict]) demand_lucodes = set(demand_dict.keys()) demand_lucodes.add(nodata_dict['lulc']) LOGGER.debug('demand_lucodes %s', demand_lucodes) else: demand_lucodes = None valid_lulc_txt_path = os.path.join(intermediate_dir, 'valid_lulc_values.txt') check_missing_lucodes_task = graph.add_task( _check_missing_lucodes, args=(clipped_lulc_path, demand_lucodes, bio_lucodes, valid_lulc_txt_path), target_path_list=[valid_lulc_txt_path], dependent_task_list=[align_raster_stack_task], task_name='check_missing_lucodes') # Break the bio_dict into three separate dictionaries based on # Kc, root_depth, and LULC_veg fields to use for reclassifying Kc_dict = {} root_dict = {} vegetated_dict = {} for lulc_code in bio_dict: Kc_dict[lulc_code] = bio_dict[lulc_code]['kc'] # Catch invalid LULC_veg values with an informative error. lulc_veg_value = bio_dict[lulc_code]['lulc_veg'] try: vegetated_dict[lulc_code] = int(lulc_veg_value) if vegetated_dict[lulc_code] not in set([0, 1]): raise ValueError() except ValueError: # If the user provided an invalid LULC_veg value, raise an # informative error. raise ValueError('LULC_veg value must be either 1 or 0, not %s', lulc_veg_value) # If LULC_veg value is 1 get root depth value if vegetated_dict[lulc_code] == 1.0: root_dict[lulc_code] = bio_dict[lulc_code]['root_depth'] # If LULC_veg value is 0 then we do not care about root # depth value so will just substitute in a 1.0 . This # value will not end up being used. else: root_dict[lulc_code] = 1.0 # Create Kc raster from table values to use in future calculations LOGGER.info("Reclassifying temp_Kc raster") tmp_Kc_raster_path = os.path.join(intermediate_dir, 'kc_raster.tif') create_Kc_raster_task = graph.add_task( func=pygeoprocessing.reclassify_raster, args=((clipped_lulc_path, 1), Kc_dict, tmp_Kc_raster_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[tmp_Kc_raster_path], dependent_task_list=[ align_raster_stack_task, check_missing_lucodes_task ], task_name='create_Kc_raster') # Create root raster from table values to use in future calculations LOGGER.info("Reclassifying tmp_root raster") tmp_root_raster_path = os.path.join(intermediate_dir, 'root_depth.tif') create_root_raster_task = graph.add_task( func=pygeoprocessing.reclassify_raster, args=((clipped_lulc_path, 1), root_dict, tmp_root_raster_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[tmp_root_raster_path], dependent_task_list=[ align_raster_stack_task, check_missing_lucodes_task ], task_name='create_root_raster') # Create veg raster from table values to use in future calculations # of determining which AET equation to use LOGGER.info("Reclassifying tmp_veg raster") tmp_veg_raster_path = os.path.join(intermediate_dir, 'veg.tif') create_veg_raster_task = graph.add_task( func=pygeoprocessing.reclassify_raster, args=((clipped_lulc_path, 1), vegetated_dict, tmp_veg_raster_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[tmp_veg_raster_path], dependent_task_list=[ align_raster_stack_task, check_missing_lucodes_task ], task_name='create_veg_raster') dependent_tasks_for_watersheds_list = [] LOGGER.info('Calculate PET from Ref Evap times Kc') calculate_pet_task = graph.add_task( func=pygeoprocessing.raster_calculator, args=([(eto_path, 1), (tmp_Kc_raster_path, 1), (nodata_dict['eto'], 'raw'), (nodata_dict['out_nodata'], 'raw')], pet_op, tmp_pet_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[tmp_pet_path], dependent_task_list=[create_Kc_raster_task], task_name='calculate_pet') dependent_tasks_for_watersheds_list.append(calculate_pet_task) # List of rasters to pass into the vectorized fractp operation raster_list = [ tmp_Kc_raster_path, eto_path, precip_path, tmp_root_raster_path, depth_to_root_rest_layer_path, pawc_path, tmp_veg_raster_path ] LOGGER.debug('Performing fractp operation') calculate_fractp_task = graph.add_task( func=pygeoprocessing.raster_calculator, args=([(x, 1) for x in raster_list] + [(nodata_dict, 'raw'), (seasonality_constant, 'raw')], fractp_op, fractp_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[fractp_path], dependent_task_list=[ create_Kc_raster_task, create_veg_raster_task, create_root_raster_task, align_raster_stack_task ], task_name='calculate_fractp') LOGGER.info('Performing wyield operation') calculate_wyield_task = graph.add_task( func=pygeoprocessing.raster_calculator, args=([(fractp_path, 1), (precip_path, 1), (nodata_dict['precip'], 'raw'), (nodata_dict['out_nodata'], 'raw')], wyield_op, wyield_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[wyield_path], dependent_task_list=[calculate_fractp_task, align_raster_stack_task], task_name='calculate_wyield') dependent_tasks_for_watersheds_list.append(calculate_wyield_task) LOGGER.debug('Performing aet operation') calculate_aet_task = graph.add_task(func=pygeoprocessing.raster_calculator, args=([ (fractp_path, 1), (precip_path, 1), (nodata_dict['precip'], 'raw'), (nodata_dict['out_nodata'], 'raw') ], aet_op, aet_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[aet_path], dependent_task_list=[ calculate_fractp_task, create_veg_raster_task, align_raster_stack_task ], task_name='calculate_aet') dependent_tasks_for_watersheds_list.append(calculate_aet_task) # list of rasters that will always be summarized with zonal stats raster_names_paths_list = [('precip_mn', precip_path), ('PET_mn', tmp_pet_path), ('AET_mn', aet_path), ('wyield_mn', wyield_path)] if 'demand_table_path' in args and args['demand_table_path'] != '': # Create demand raster from table values to use in future calculations create_demand_raster_task = graph.add_task( func=pygeoprocessing.reclassify_raster, args=((clipped_lulc_path, 1), demand_reclassify_dict, demand_path, gdal.GDT_Float32, nodata_dict['out_nodata']), target_path_list=[demand_path], dependent_task_list=[ align_raster_stack_task, check_missing_lucodes_task ], task_name='create_demand_raster') dependent_tasks_for_watersheds_list.append(create_demand_raster_task) raster_names_paths_list.append(('demand', demand_path)) # Aggregate results to watershed polygons, and do the optional # scarcity and valuation calculations. for base_ws_path, ws_id_name, target_ws_path in watershed_paths_list: zonal_stats_task_list = [] zonal_stats_pickle_list = [] # Do zonal stats with the input shapefiles provided by the user # and store results dictionaries in pickles for key_name, rast_path in raster_names_paths_list: target_stats_pickle = os.path.join( pickle_dir, '%s_%s%s.pickle' % (ws_id_name, key_name, file_suffix)) zonal_stats_pickle_list.append((target_stats_pickle, key_name)) zonal_stats_task_list.append( graph.add_task( func=zonal_stats_tofile, args=(base_ws_path, rast_path, target_stats_pickle), target_path_list=[target_stats_pickle], dependent_task_list=dependent_tasks_for_watersheds_list, task_name='%s_%s_zonalstats' % (ws_id_name, key_name))) # Create copies of the input shapefiles in the output workspace. # Add the zonal stats data to the attribute tables. # Compute optional scarcity and valuation create_output_vector_task = graph.add_task( func=create_vector_output, args=(base_ws_path, target_ws_path, ws_id_name, zonal_stats_pickle_list, valuation_params), target_path_list=[target_ws_path], dependent_task_list=zonal_stats_task_list, task_name='create_%s_vector_output' % ws_id_name) # Export a CSV with all the fields present in the output vector target_basename = os.path.splitext(target_ws_path)[0] target_csv_path = target_basename + '.csv' create_output_table_task = graph.add_task( func=convert_vector_to_csv, args=(target_ws_path, target_csv_path), target_path_list=[target_csv_path], dependent_task_list=[create_output_vector_task], task_name='create_%s_table_output' % ws_id_name) graph.join()
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Calcualte risk from reefs') parser.add_argument('cv_risk_vector_pattern', nargs='+', help='Can be a pattern to a file.') args = parser.parse_args() for dir_path in [WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, 2, 5.0) tdd_downloader = taskgraph_downloader_pnn.TaskGraphDownloader( ECOSHARD_DIR, task_graph) tdd_downloader.download_ecoshard(GLOBAL_REEFS_RASTER_URL, 'reefs') tdd_downloader.download_ecoshard(LS_POPULATION_RASTER_URL, 'total_pop', decompress='unzip', local_path='lspop2017') tdd_downloader.download_ecoshard(POVERTY_POPULATION_RASTER_URL, 'poor_pop') tdd_downloader.download_ecoshard(GLOBAL_DEM_RASTER_URL, 'global_dem') reef_degree_pixel_size = [0.004, -0.004] wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) projected_reef_raster_path = os.path.join(CHURN_DIR, 'wgs84_reefs.tif')
def _execute(args): """Execute the seasonal water yield model. Parameters: See the parameters for `natcap.invest.seasonal_water_yield.seasonal_wateryield.execute`. Returns: None """ LOGGER.info('prepare and test inputs for common errors') # fail early on a missing required rain events table if (not args['user_defined_local_recharge'] and not args['user_defined_climate_zones']): rain_events_lookup = ( utils.build_lookup_from_csv( args['rain_events_table_path'], 'month')) biophysical_table = utils.build_lookup_from_csv( args['biophysical_table_path'], 'lucode') bad_value_list = [] for lucode, value in biophysical_table.items(): for biophysical_id in ['cn_a', 'cn_b', 'cn_c', 'cn_d'] + [ 'kc_%d' % (month_index+1) for month_index in range(N_MONTHS)]: try: _ = float(value[biophysical_id]) except ValueError: bad_value_list.append( (biophysical_id, lucode, value[biophysical_id])) if bad_value_list: raise ValueError( 'biophysical_table at %s seems to have the following incorrect ' 'values (expecting all floating point numbers): %s' % ( args['biophysical_table_path'], ','.join( ['%s(lucode %d): "%s"' % ( lucode, biophysical_id, bad_value) for lucode, biophysical_id, bad_value in bad_value_list]))) if args['monthly_alpha']: # parse out the alpha lookup table of the form (month_id: alpha_val) alpha_month_map = dict( (key, val['alpha']) for key, val in utils.build_lookup_from_csv( args['monthly_alpha_path'], 'month').items()) else: # make all 12 entries equal to args['alpha_m'] alpha_m = float(fractions.Fraction(args['alpha_m'])) alpha_month_map = dict( (month_index+1, alpha_m) for month_index in range(N_MONTHS)) beta_i = float(fractions.Fraction(args['beta_i'])) gamma = float(fractions.Fraction(args['gamma'])) threshold_flow_accumulation = float(args['threshold_flow_accumulation']) pixel_size = pygeoprocessing.get_raster_info( args['dem_raster_path'])['pixel_size'] file_suffix = utils.make_suffix_string(args, 'results_suffix') intermediate_output_dir = os.path.join( args['workspace_dir'], 'intermediate_outputs') cache_dir = os.path.join(args['workspace_dir'], 'cache_dir') output_dir = args['workspace_dir'] utils.make_directories([intermediate_output_dir, cache_dir, output_dir]) try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph( cache_dir, n_workers, reporting_interval=5.0) LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), (_TMP_BASE_FILES, cache_dir)], file_suffix) LOGGER.info('Checking that the AOI is not the output aggregate vector') if (os.path.normpath(args['aoi_path']) == os.path.normpath(file_registry['aggregate_vector_path'])): raise ValueError( "The input AOI is the same as the output aggregate vector, " "please choose a different workspace or move the AOI file " "out of the current workspace %s" % file_registry['aggregate_vector_path']) LOGGER.info('Aligning and clipping dataset list') input_align_list = [args['lulc_raster_path'], args['dem_raster_path']] output_align_list = [ file_registry['lulc_aligned_path'], file_registry['dem_aligned_path']] if not args['user_defined_local_recharge']: precip_path_list = [] et0_path_list = [] et0_dir_list = [ os.path.join(args['et0_dir'], f) for f in os.listdir( args['et0_dir'])] precip_dir_list = [ os.path.join(args['precip_dir'], f) for f in os.listdir( args['precip_dir'])] for month_index in range(1, N_MONTHS + 1): month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index) for data_type, dir_list, path_list in [ ('et0', et0_dir_list, et0_path_list), ('Precip', precip_dir_list, precip_path_list)]: file_list = [ month_file_path for month_file_path in dir_list if month_file_match.match(month_file_path)] if len(file_list) == 0: raise ValueError( "No %s found for month %d" % (data_type, month_index)) if len(file_list) > 1: raise ValueError( "Ambiguous set of files found for month %d: %s" % (month_index, file_list)) path_list.append(file_list[0]) input_align_list = ( precip_path_list + [args['soil_group_path']] + et0_path_list + input_align_list) output_align_list = ( file_registry['precip_path_aligned_list'] + [file_registry['soil_group_aligned_path']] + file_registry['et0_path_aligned_list'] + output_align_list) align_index = len(input_align_list) - 1 # this aligns with the DEM if args['user_defined_local_recharge']: input_align_list.append(args['l_path']) output_align_list.append(file_registry['l_aligned_path']) elif args['user_defined_climate_zones']: input_align_list.append(args['climate_zone_raster_path']) output_align_list.append( file_registry['cz_aligned_raster_path']) interpolate_list = ['near'] * len(input_align_list) align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=( input_align_list, output_align_list, interpolate_list, pixel_size, 'intersection'), kwargs={ 'base_vector_path_list': (args['aoi_path'],), 'raster_align_index': align_index}, target_path_list=output_align_list, task_name='align rasters') fill_pit_task = task_graph.add_task( func=pygeoprocessing.routing.fill_pits, args=( (file_registry['dem_aligned_path'], 1), file_registry['dem_pit_filled_path']), kwargs={'working_dir': cache_dir}, target_path_list=[file_registry['dem_pit_filled_path']], dependent_task_list=[align_task], task_name='fill dem pits') flow_dir_task = task_graph.add_task( func=pygeoprocessing.routing.flow_dir_mfd, args=( (file_registry['dem_pit_filled_path'], 1), file_registry['flow_dir_mfd_path']), kwargs={'working_dir': cache_dir}, target_path_list=[file_registry['flow_dir_mfd_path']], dependent_task_list=[fill_pit_task], task_name='flow dir mfd') flow_accum_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=( (file_registry['flow_dir_mfd_path'], 1), file_registry['flow_accum_path']), target_path_list=[file_registry['flow_accum_path']], dependent_task_list=[flow_dir_task], task_name='flow accum task') stream_threshold_task = task_graph.add_task( func=pygeoprocessing.routing.extract_streams_mfd, args=( (file_registry['flow_accum_path'], 1), (file_registry['flow_dir_mfd_path'], 1), threshold_flow_accumulation, file_registry['stream_path']), target_path_list=[file_registry['stream_path']], dependent_task_list=[flow_accum_task], task_name='stream threshold') LOGGER.info('quick flow') if args['user_defined_local_recharge']: file_registry['l_path'] = file_registry['l_aligned_path'] l_avail_task = task_graph.add_task( func=_calculate_l_avail, args=( file_registry['l_path'], gamma, file_registry['l_avail_path']), target_path_list=[file_registry['l_avail_path']], dependent_task_list=[align_task], task_name='l avail task') else: # user didn't predefine local recharge so calculate it LOGGER.info('loading number of monthly events') reclassify_n_events_task_list = [] for month_id in range(N_MONTHS): if args['user_defined_climate_zones']: cz_rain_events_lookup = ( utils.build_lookup_from_csv( args['climate_zone_table_path'], 'cz_id')) month_label = MONTH_ID_TO_LABEL[month_id] climate_zone_rain_events_month = dict([ (cz_id, cz_rain_events_lookup[cz_id][month_label]) for cz_id in cz_rain_events_lookup]) n_events_nodata = -1 n_events_task = task_graph.add_task( func=pygeoprocessing.reclassify_raster, args=( (file_registry['cz_aligned_raster_path'], 1), climate_zone_rain_events_month, file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, n_events_nodata), kwargs={'values_required': True}, target_path_list=[ file_registry['n_events_path_list'][month_id]], dependent_task_list=[align_task], task_name='n_events for month %d' % month_id) reclassify_n_events_task_list.append(n_events_task) else: # rain_events_lookup defined near entry point of execute n_events = rain_events_lookup[month_id+1]['events'] n_events_task = task_graph.add_task( func=pygeoprocessing.new_raster_from_base, args=( file_registry['dem_aligned_path'], file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, [TARGET_NODATA]), kwargs={'fill_value_list': (n_events,)}, target_path_list=[ file_registry['n_events_path_list'][month_id]], dependent_task_list=[align_task], hash_algorithm='md5', copy_duplicate_artifact=True, task_name=( 'n_events as a constant raster month %d' % month_id)) reclassify_n_events_task_list.append(n_events_task) curve_number_task = task_graph.add_task( func=_calculate_curve_number_raster, args=( file_registry['lulc_aligned_path'], file_registry['soil_group_aligned_path'], biophysical_table, file_registry['cn_path']), target_path_list=[file_registry['cn_path']], dependent_task_list=[align_task], task_name='calculate curve number') si_task = task_graph.add_task( func=_calculate_si_raster, args=( file_registry['cn_path'], file_registry['stream_path'], file_registry['si_path']), target_path_list=[file_registry['si_path']], dependent_task_list=[curve_number_task, stream_threshold_task], task_name='calculate Si raster') quick_flow_task_list = [] for month_index in range(N_MONTHS): LOGGER.info('calculate quick flow for month %d', month_index+1) monthly_quick_flow_task = task_graph.add_task( func=_calculate_monthly_quick_flow, args=( file_registry['precip_path_aligned_list'][month_index], file_registry['lulc_aligned_path'], file_registry['cn_path'], file_registry['n_events_path_list'][month_index], file_registry['stream_path'], file_registry['si_path'], file_registry['qfm_path_list'][month_index]), target_path_list=[ file_registry['qfm_path_list'][month_index]], dependent_task_list=[ align_task, reclassify_n_events_task_list[month_index], si_task, stream_threshold_task], hash_algorithm='md5', copy_duplicate_artifact=True, task_name='calculate quick flow for month %d' % ( month_index+1)) quick_flow_task_list.append(monthly_quick_flow_task) qf_task = task_graph.add_task( func=_calculate_annual_qfi, args=(file_registry['qfm_path_list'], file_registry['qf_path']), target_path_list=[file_registry['qf_path']], dependent_task_list=quick_flow_task_list, task_name='calculate QFi') LOGGER.info('calculate local recharge') kc_task_list = [] for month_index in range(N_MONTHS): kc_lookup = dict([ (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)]) for lucode in biophysical_table]) kc_nodata = -1 # a reasonable nodata value kc_task = task_graph.add_task( func=pygeoprocessing.reclassify_raster, args=( (file_registry['lulc_aligned_path'], 1), kc_lookup, file_registry['kc_path_list'][month_index], gdal.GDT_Float32, kc_nodata), target_path_list=[file_registry['kc_path_list'][month_index]], dependent_task_list=[align_task], hash_algorithm='md5', copy_duplicate_artifact=True, task_name='classify kc month %d' % month_index) kc_task_list.append(kc_task) # call through to a cython function that does the necessary routing # between AET and L.sum.avail in equation [7], [4], and [3] calculate_local_recharge_task = task_graph.add_task( func=seasonal_water_yield_core.calculate_local_recharge, args=( file_registry['precip_path_aligned_list'], file_registry['et0_path_aligned_list'], file_registry['qfm_path_list'], file_registry['flow_dir_mfd_path'], file_registry['kc_path_list'], alpha_month_map, beta_i, gamma, file_registry['stream_path'], file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_avail_path'], file_registry['aet_path']), target_path_list=[ file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_avail_path'], file_registry['aet_path']], dependent_task_list=[ align_task, flow_dir_task, stream_threshold_task, fill_pit_task, qf_task] + quick_flow_task_list, task_name='calculate local recharge') #calculate Qb as the sum of local_recharge_avail over the AOI, Eq [9] if args['user_defined_local_recharge']: vri_dependent_task_list = [l_avail_task] else: vri_dependent_task_list = [calculate_local_recharge_task] vri_task = task_graph.add_task( func=_calculate_vri, args=(file_registry['l_path'], file_registry['vri_path']), target_path_list=[file_registry['vri_path']], dependent_task_list=vri_dependent_task_list, task_name='calculate vri') aggregate_recharge_task = task_graph.add_task( func=_aggregate_recharge, args=( args['aoi_path'], file_registry['l_path'], file_registry['vri_path'], file_registry['aggregate_vector_path']), target_path_list=[file_registry['aggregate_vector_path']], dependent_task_list=[vri_task], task_name='aggregate recharge') LOGGER.info('calculate L_sum') # Eq. [12] l_sum_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=( (file_registry['flow_dir_mfd_path'], 1), file_registry['l_sum_path']), kwargs={'weight_raster_path_band': (file_registry['l_path'], 1)}, target_path_list=[file_registry['l_sum_path']], dependent_task_list=vri_dependent_task_list + [ fill_pit_task, flow_dir_task, stream_threshold_task], task_name='calculate l sum') if args['user_defined_local_recharge']: b_sum_dependent_task_list = [l_avail_task] else: b_sum_dependent_task_list = [calculate_local_recharge_task] b_sum_task = task_graph.add_task( func=seasonal_water_yield_core.route_baseflow_sum, args=( file_registry['flow_dir_mfd_path'], file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_path'], file_registry['stream_path'], file_registry['b_path'], file_registry['b_sum_path']), target_path_list=[ file_registry['b_sum_path'], file_registry['b_path']], dependent_task_list=b_sum_dependent_task_list + [l_sum_task], task_name='calculate B_sum') task_graph.close() task_graph.join() LOGGER.info(' (\\w/) SWY Complete!') LOGGER.info(' (.. \\ ') LOGGER.info(' _/ ) \\______') LOGGER.info('(oo /\'\\ )`,') LOGGER.info(' `--\' (v __( / ||') LOGGER.info(' ||| ||| ||') LOGGER.info(' //_| //_|')
def execute(args): """Urban Flood Risk Mitigation model. The model computes the peak flow attenuation for each pixel, delineates areas benefiting from this service, then calculates the monetary value of potential avoided damage to built infrastructure. Parameters: args['workspace_dir'] (string): a path to the directory that will write output and other temporary files during calculation. args['results_suffix'] (string): appended to any output file name. args['aoi_watersheds_path'] (string): path to a shapefile of (sub)watersheds or sewersheds used to indicate spatial area of interest. args['rainfall_depth'] (float): depth of rainfall in mm. args['lulc_path'] (string): path to a landcover raster. args['soils_hydrological_group_raster_path'] (string): Raster with values equal to 1, 2, 3, 4, corresponding to soil hydrologic group A, B, C, or D, respectively (used to derive the CN number). args['curve_number_table_path'] (string): path to a CSV table that contains at least the headers 'lucode', 'CN_A', 'CN_B', 'CN_C', 'CN_D'. args['built_infrastructure_vector_path'] (string): (optional) path to a vector with built infrastructure footprints. Attribute table contains a column 'Type' with integers (e.g. 1=residential, 2=office, etc.). args['infrastructure_damage_loss_table_path'] (string): (optional) path to a a CSV table with columns 'Type' and 'Damage' with values of built infrastructure type from the 'Type' field in `args['built_infrastructure_vector_path']` and potential damage loss (in $/m^2). args['n_workers'] (int): (optional) if present, indicates how many worker processes should be used in parallel processing. -1 indicates single process mode, 0 is single process but non-blocking mode, and >= 1 is number of processes. Returns: None. """ if 'built_infrastructure_vector_path' in args and ( args['built_infrastructure_vector_path'] != ''): infrastructure_damage_loss_table_path = ( args['infrastructure_damage_loss_table_path']) else: infrastructure_damage_loss_table_path = None file_suffix = utils.make_suffix_string(args, 'results_suffix') temporary_working_dir = os.path.join(args['workspace_dir'], 'temp_working_dir_not_for_humans') intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_files') utils.make_directories( [args['workspace_dir'], intermediate_dir, temporary_working_dir]) try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. task_graph = taskgraph.TaskGraph(temporary_working_dir, n_workers) # Align LULC with soils aligned_lulc_path = os.path.join(temporary_working_dir, 'aligned_lulc%s.tif' % file_suffix) aligned_soils_path = os.path.join( temporary_working_dir, 'aligned_soils_hydrological_group%s.tif' % file_suffix) lulc_raster_info = pygeoprocessing.get_raster_info(args['lulc_path']) target_pixel_size = lulc_raster_info['pixel_size'] pixel_area = abs(target_pixel_size[0] * target_pixel_size[1]) target_sr_wkt = lulc_raster_info['projection'] soil_raster_info = pygeoprocessing.get_raster_info( args['soils_hydrological_group_raster_path']) align_raster_stack_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=([ args['lulc_path'], args['soils_hydrological_group_raster_path'] ], [aligned_lulc_path, aligned_soils_path], ['mode', 'mode'], target_pixel_size, 'intersection'), kwargs={ 'target_sr_wkt': target_sr_wkt, 'base_vector_path_list': [args['aoi_watersheds_path']], 'raster_align_index': 0 }, target_path_list=[aligned_lulc_path, aligned_soils_path], task_name='align raster stack') # Load CN table cn_table = utils.build_lookup_from_csv(args['curve_number_table_path'], 'lucode') # make cn_table into a 2d array where first dim is lucode, second is # 0..3 to correspond to CN_A..CN_D data = [] row_ind = [] col_ind = [] for lucode in cn_table: data.extend([ cn_table[lucode]['cn_%s' % soil_id] for soil_id in ['a', 'b', 'c', 'd'] ]) row_ind.extend([int(lucode)] * 4) col_ind = [0, 1, 2, 3] * (len(row_ind) // 4) lucode_to_cn_table = scipy.sparse.csr_matrix((data, (row_ind, col_ind))) cn_nodata = -1 lucode_nodata = lulc_raster_info['nodata'][0] soil_type_nodata = soil_raster_info['nodata'][0] cn_raster_path = os.path.join(temporary_working_dir, 'cn_raster%s.tif' % file_suffix) align_raster_stack_task.join() cn_raster_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(aligned_lulc_path, 1), (aligned_soils_path, 1), (lucode_nodata, 'raw'), (soil_type_nodata, 'raw'), (cn_nodata, 'raw'), (lucode_to_cn_table, 'raw')], _lu_to_cn_op, cn_raster_path, gdal.GDT_Float32, cn_nodata), target_path_list=[cn_raster_path], dependent_task_list=[align_raster_stack_task], task_name='create cn raster') # Generate S_max s_max_nodata = -9999 s_max_raster_path = os.path.join(temporary_working_dir, 's_max%s.tif' % file_suffix) s_max_task = task_graph.add_task(func=pygeoprocessing.raster_calculator, args=([(cn_raster_path, 1), (cn_nodata, 'raw'), (s_max_nodata, 'raw') ], _s_max_op, s_max_raster_path, gdal.GDT_Float32, s_max_nodata), target_path_list=[s_max_raster_path], dependent_task_list=[cn_raster_task], task_name='create s_max') # Generate Qpi q_pi_nodata = -9999. q_pi_raster_path = os.path.join(intermediate_dir, 'Q_mm%s.tif' % file_suffix) q_pi_task = task_graph.add_task(func=pygeoprocessing.raster_calculator, args=([ (float(args['rainfall_depth']), 'raw'), (s_max_raster_path, 1), (s_max_nodata, 'raw'), (q_pi_nodata, 'raw') ], _q_pi_op, q_pi_raster_path, gdal.GDT_Float32, q_pi_nodata), target_path_list=[q_pi_raster_path], dependent_task_list=[s_max_task], task_name='create q_pi') # Generate Runoff Retention runoff_retention_nodata = -9999. runoff_retention_raster_path = os.path.join( args['workspace_dir'], 'Runoff_retention%s.tif' % file_suffix) runoff_retention_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(q_pi_raster_path, 1), (float(args['rainfall_depth']), 'raw'), (q_pi_nodata, 'raw'), (runoff_retention_nodata, 'raw') ], _runoff_retention_op, runoff_retention_raster_path, gdal.GDT_Float32, runoff_retention_nodata), target_path_list=[runoff_retention_raster_path], dependent_task_list=[q_pi_task], task_name='generate runoff retention') # calculate runoff retention volumne runoff_retention_ret_vol_raster_path = os.path.join( args['workspace_dir'], 'Runoff_retention_m3%s.tif' % file_suffix) runoff_retention_ret_vol_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(runoff_retention_raster_path, 1), (runoff_retention_nodata, 'raw'), (float(args['rainfall_depth']), 'raw'), (abs(target_pixel_size[0] * target_pixel_size[1]), 'raw'), (runoff_retention_nodata, 'raw')], _runoff_retention_ret_vol_op, runoff_retention_ret_vol_raster_path, gdal.GDT_Float32, runoff_retention_nodata), target_path_list=[runoff_retention_ret_vol_raster_path], dependent_task_list=[runoff_retention_task], task_name='calculate runoff retention vol') # calculate flood vol raster flood_vol_raster_path = os.path.join(intermediate_dir, 'Q_m3%s.tif' % file_suffix) flood_vol_nodata = -1 flood_vol_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(float(args['rainfall_depth']), 'raw'), (q_pi_raster_path, 1), (q_pi_nodata, 'raw'), (pixel_area, 'raw'), (flood_vol_nodata, 'raw')], _flood_vol_op, flood_vol_raster_path, gdal.GDT_Float32, flood_vol_nodata), target_path_list=[flood_vol_raster_path], dependent_task_list=[q_pi_task], task_name='calculate service built raster') if 'built_infrastructure_vector_path' not in args or ( args['built_infrastructure_vector_path'] in ('', None)): task_graph.close() task_graph.join() return # intersect built_infrastructure_vector_path with aoi_watersheds_path intermediate_target_watershed_result_vector_path = os.path.join( temporary_working_dir, 'intermediate_flood_risk_service%s.gpkg' % file_suffix) # this is the field name that can be used to uniquely identify a feature intermediate_affected_vector_task = task_graph.add_task( func=_build_affected_vector, args=(args['aoi_watersheds_path'], target_sr_wkt, infrastructure_damage_loss_table_path, args['built_infrastructure_vector_path'], intermediate_target_watershed_result_vector_path), target_path_list=[intermediate_target_watershed_result_vector_path], task_name='build affected vector') # do the pickle runoff_retention_pickle_path = os.path.join( temporary_working_dir, 'runoff_retention_stats%s.pickle' % file_suffix) runoff_retention_pickle_task = task_graph.add_task( func=_pickle_zonal_stats, args=(intermediate_target_watershed_result_vector_path, runoff_retention_raster_path, runoff_retention_pickle_path), dependent_task_list=[ intermediate_affected_vector_task, runoff_retention_task ], target_path_list=[runoff_retention_pickle_path], task_name='pickle runoff index stats') runoff_retention_ret_vol_pickle_path = os.path.join( temporary_working_dir, 'runoff_retention_ret_vol_stats%s.pickle' % file_suffix) runoff_retention_ret_vol_pickle_task = task_graph.add_task( func=_pickle_zonal_stats, args=(intermediate_target_watershed_result_vector_path, runoff_retention_ret_vol_raster_path, runoff_retention_ret_vol_pickle_path), dependent_task_list=[ intermediate_affected_vector_task, runoff_retention_ret_vol_task ], target_path_list=[runoff_retention_ret_vol_pickle_path], task_name='pickle runoff retention volume stats') flood_vol_pickle_path = os.path.join( temporary_working_dir, 'flood_vol_stats%s.pickle' % file_suffix) flood_vol_pickle_task = task_graph.add_task( func=_pickle_zonal_stats, args=(intermediate_target_watershed_result_vector_path, flood_vol_raster_path, flood_vol_pickle_path), dependent_task_list=[ intermediate_affected_vector_task, flood_vol_task ], target_path_list=[flood_vol_pickle_path], task_name='pickle flood volume stats') target_watershed_result_vector_path = os.path.join( args['workspace_dir'], 'flood_risk_service%s.shp' % file_suffix) task_graph.add_task( func=_add_zonal_stats, args=(runoff_retention_pickle_path, runoff_retention_ret_vol_pickle_path, flood_vol_pickle_path, intermediate_target_watershed_result_vector_path, target_watershed_result_vector_path), target_path_list=[target_watershed_result_vector_path], dependent_task_list=[ flood_vol_pickle_task, runoff_retention_ret_vol_pickle_task, runoff_retention_pickle_task, intermediate_affected_vector_task ], task_name='add zonal stats') task_graph.close() task_graph.join()
def main(): """Entry point.""" for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph( os.path.join(WORKSPACE_DIR, 'taskgraph_cache'), N_CPUS, TASKGRAPH_REPORTING_FREQUENCY) root_logger = logging.getLogger() root_logger.setLevel(LOGGING_LEVEL) lulc_path = os.path.join(ECOSHARD_DIR, os.path.basename(LULC_URL)) fetch_lulc_task = task_graph.add_task(func=url_fetch_and_validate, args=(LULC_URL, lulc_path), target_path_list=[lulc_path], task_name='fetch lulc raster') erosivity_path = os.path.join(ECOSHARD_DIR, os.path.basename(EROSIVITY_URL)) fetch_erosivity_task = task_graph.add_task( func=url_fetch_and_validate, args=(EROSIVITY_URL, erosivity_path), target_path_list=[erosivity_path], task_name='fetch erosivity raster') erodibility_path = os.path.join(ECOSHARD_DIR, os.path.basename(ERODIBILITY_URL)) fetch_erodibility_task = task_graph.add_task( func=url_fetch_and_validate, args=(ERODIBILITY_URL, erodibility_path), target_path_list=[erodibility_path], task_name='fetch erodibility raster') biophysical_table_path = os.path.join( ECOSHARD_DIR, os.path.basename(BIOPHYSICAL_TABLE_URL)) fetch_biophysical_table_task = task_graph.add_task( func=url_fetch_and_validate, args=(BIOPHYSICAL_TABLE_URL, biophysical_table_path), target_path_list=[biophysical_table_path], task_name='fetch biophysical_table raster') dem_token_path = os.path.join(ECOSHARD_DIR, '%s.COMPLETE' % os.path.basename(DEM_URL)) fetch_dem_task = task_graph.add_task(func=download_validate_and_unzip, args=(DEM_URL, ECOSHARD_DIR, dem_token_path), target_path_list=[dem_token_path], task_name='fetch dem raster') watersheds_token_path = os.path.join( ECOSHARD_DIR, '%s.COMPLETE' % os.path.basename(WATERSHEDS_URL)) fetch_watersheds_task = task_graph.add_task( func=download_validate_and_unzip, args=(WATERSHEDS_URL, ECOSHARD_DIR, watersheds_token_path), target_path_list=[watersheds_token_path], task_name='fetch watersheds shapefile') dem_vrt_path = os.path.join(CHURN_DIR, 'global_dem.vrt') dem_vrt_token_path = os.path.join( CHURN_DIR, '%s.COMPLETE' % os.path.basename(dem_vrt_path)) base_raster_pattern = os.path.join(ECOSHARD_DIR, 'global_dem_3s', '*.tif') make_dem_task = task_graph.add_task(func=make_vrt, args=(base_raster_pattern, DEM_TARGET_NODATA, dem_vrt_path, dem_vrt_token_path), dependent_task_list=[fetch_dem_task], ignore_path_list=[dem_vrt_path], target_path_list=[dem_vrt_token_path], task_name='make dem vrt') scheduled_watershed_prefixes = set() task_graph.join() fetch_watersheds_task.join() LOGGER.debug('iterating over hydrosheds') for watershed_path in glob.glob( os.path.join(ECOSHARD_DIR, 'watersheds_globe_HydroSHEDS_15arcseconds', '*.shp')): LOGGER.debug(watershed_path) watershed_basename = os.path.splitext( os.path.basename(watershed_path))[0] watershed_vector = gdal.OpenEx(watershed_path, gdal.OF_VECTOR) watershed_layer = watershed_vector.GetLayer() for watershed_feature in watershed_layer: watershed_fid = watershed_feature.GetFID() ws_prefix = 'ws_%s_%d' % (watershed_basename, watershed_fid) if ws_prefix in scheduled_watershed_prefixes: raise ValueError('%s has already been scheduled', ws_prefix) scheduled_watershed_prefixes.add(ws_prefix) watershed_geom = watershed_feature.GetGeometryRef() watershed_area = watershed_geom.GetArea() if watershed_area < 0.03: # 0.03 square degrees is a healthy underapproximation of # 100 sq km which is about the minimum watershed size we'd # want. continue LOGGER.info('processing %s', ws_prefix) # make a few subdirectories so we don't explode on number of files per # directory. The largest watershed is 726k last_digits = '%.4d' % watershed_fid local_workspace_dir = os.path.join( SDR_WORKSPACES_DIR, last_digits[-1], last_digits[-2], last_digits[-3], last_digits[-4], "%s" % ws_prefix) if not os.path.exists(local_workspace_dir): os.makedirs(local_workspace_dir) # find EPSG code and pass that/modify SDR for it centroid_geom = watershed_geom.Centroid() utm_code = (math.floor((centroid_geom.GetX() + 180) / 6) % 60) + 1 lat_code = 6 if centroid_geom.GetY() > 0 else 7 epsg_code = int('32%d%02d' % (lat_code, utm_code)) local_watershed_vector_path = os.path.join(local_workspace_dir, '%s.gpkg' % ws_prefix) make_local_watershed_task = task_graph.add_task( func=make_local_watershed, args=(watershed_path, watershed_fid, epsg_code, local_watershed_vector_path), target_path_list=[local_watershed_vector_path], task_name='make local watershed for %s' % ws_prefix) # clip dem clipped_dir = os.path.join(local_workspace_dir, 'pre_clipped') try: os.makedirs(clipped_dir) except OSError: pass target_raster_path_list = [ os.path.join(clipped_dir, '%s_clipped%s.tif' % (raster_type, ws_prefix)) for raster_type in ['dem', 'erosivity', 'erodibility', 'lulc'] ] base_raster_path_list = [ dem_vrt_path, erosivity_path, erodibility_path, lulc_path ] dem_info = pygeoprocessing.get_raster_info(dem_vrt_path) dem_pixel_size = dem_info['pixel_size'] pre_align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=(base_raster_path_list, target_raster_path_list, ['near'] * len(base_raster_path_list), dem_pixel_size, 'intersection'), kwargs={ 'base_vector_path_list': [local_watershed_vector_path], 'target_sr_wkt': dem_info['projection'] }, dependent_task_list=[ fetch_lulc_task, fetch_erosivity_task, fetch_erodibility_task, make_dem_task, make_local_watershed_task ], target_path_list=target_raster_path_list, task_name='pre-clip for %s' % ws_prefix) m_per_deg = length_of_degree(centroid_geom.GetY()) target_pixel_size = (m_per_deg * dem_pixel_size[0], m_per_deg * dem_pixel_size[1]) sdr_args = { 'workspace_dir': local_workspace_dir, 'results_suffix': ws_prefix, 'dem_path': target_raster_path_list[0], 'erosivity_path': target_raster_path_list[1], 'erodibility_path': target_raster_path_list[2], 'lulc_path': target_raster_path_list[3], 'watersheds_path': local_watershed_vector_path, 'biophysical_table_path': biophysical_table_path, 'threshold_flow_accumulation': 1000, 'biophysical_table_lucode_header_id': 'ID', 'k_param': '2', 'sdr_max': '0.8', 'ic_0_param': '0.5', 'local_projection_epsg': epsg_code, 'target_pixel_size': target_pixel_size, 'biophysical_table_lucode_field': 'id', } LOGGER.debug('adding %s', ws_prefix) task_graph.add_task(func=natcap.invest.sdr.execute, args=(sdr_args, ), target_path_list=[ os.path.join( local_workspace_dir, 'sed_export_%s.tif' % ws_prefix) ], dependent_task_list=[pre_align_task], task_name='sdr for %s' % ws_prefix) task_graph.close() task_graph.join()
def execute(args): """Scenic Quality. Args: args['workspace_dir'] (string): (required) output directory for intermediate, temporary, and final files. args['results_suffix'] (string): (optional) string to append to any output file. args['aoi_path'] (string): (required) path to a vector that indicates the area over which the model should be run. args['structure_path'] (string): (required) path to a point vector that has the features for the viewpoints. Optional fields: 'WEIGHT', 'RADIUS' / 'RADIUS2', 'HEIGHT' args['dem_path'] (string): (required) path to a digital elevation model raster. args['refraction'] (float): (required) number indicating the refraction coefficient to use for calculating curvature of the earth. args['do_valuation'] (bool): (optional) indicates whether to compute valuation. If ``False``, per-viewpoint value will not be computed, and the summation of valuation rasters (vshed_value.tif) will not be created. Additionally, the Viewshed Quality raster will represent the weighted sum of viewsheds. Default: ``False``. args['valuation_function'] (string): The type of economic function to use for valuation. One of "linear", "logarithmic", or "exponential". args['a_coef'] (float): The "a" coefficient for valuation. Required if ``args['do_valuation']`` is ``True``. args['b_coef'] (float): The "b" coefficient for valuation. Required if ``args['do_valuation']`` is ``True``. args['max_valuation_radius'] (float): Past this distance from the viewpoint, the valuation raster's pixel values will be set to 0. Required if ``args['do_valuation']`` is ``True``. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: ``None`` """ LOGGER.info("Starting Scenic Quality Model") dem_raster_info = pygeoprocessing.get_raster_info(args['dem_path']) try: do_valuation = bool(args['do_valuation']) except KeyError: do_valuation = False if do_valuation: valuation_coefficients = { 'a': float(args['a_coef']), 'b': float(args['b_coef']), } if (args['valuation_function'] not in ARGS_SPEC['args']['valuation_function']['options']): raise ValueError('Valuation function type %s not recognized' % args['valuation_function']) max_valuation_radius = float(args['max_valuation_radius']) # Create output and intermediate directory output_dir = os.path.join(args['workspace_dir'], 'output') intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate') utils.make_directories([output_dir, intermediate_dir]) file_suffix = utils.make_suffix_string( args, 'results_suffix') LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), (_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix) work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous execution graph = taskgraph.TaskGraph(work_token_dir, n_workers) reprojected_aoi_task = graph.add_task( pygeoprocessing.reproject_vector, args=(args['aoi_path'], dem_raster_info['projection_wkt'], file_registry['aoi_reprojected']), target_path_list=[file_registry['aoi_reprojected']], task_name='reproject_aoi_to_dem') reprojected_viewpoints_task = graph.add_task( pygeoprocessing.reproject_vector, args=(args['structure_path'], dem_raster_info['projection_wkt'], file_registry['structures_reprojected']), target_path_list=[file_registry['structures_reprojected']], task_name='reproject_structures_to_dem') clipped_viewpoints_task = graph.add_task( _clip_vector, args=(file_registry['structures_reprojected'], file_registry['aoi_reprojected'], file_registry['structures_clipped']), target_path_list=[file_registry['structures_clipped']], dependent_task_list=[reprojected_aoi_task, reprojected_viewpoints_task], task_name='clip_reprojected_structures_to_aoi') clipped_dem_task = graph.add_task( _clip_and_mask_dem, args=(args['dem_path'], file_registry['aoi_reprojected'], file_registry['clipped_dem'], intermediate_dir), target_path_list=[file_registry['clipped_dem']], dependent_task_list=[reprojected_aoi_task], task_name='clip_dem_to_aoi') # viewshed calculation requires that the DEM and structures are all # finished. LOGGER.info('Waiting for clipping to finish') clipped_dem_task.join() clipped_viewpoints_task.join() # phase 2: calculate viewsheds. valid_viewpoints_task = graph.add_task( _determine_valid_viewpoints, args=(file_registry['clipped_dem'], file_registry['structures_clipped']), store_result=True, dependent_task_list=[clipped_viewpoints_task, clipped_dem_task], task_name='determine_valid_viewpoints') viewpoint_tuples = valid_viewpoints_task.get() if not viewpoint_tuples: raise ValueError('No valid viewpoints found. This may happen if ' 'viewpoints are beyond the edge of the DEM or are ' 'over nodata pixels.') # These are sorted outside the vector to ensure consistent ordering. This # helps avoid unnecessary recomputation in taskgraph for when an ESRI # Shapefile, for example, returns a different order of points because # someone decided to repack it. viewshed_files = [] viewshed_tasks = [] valuation_tasks = [] valuation_filepaths = [] weights = [] feature_index = 0 for viewpoint, max_radius, weight, viewpoint_height in sorted( viewpoint_tuples, key=lambda x: x[0]): weights.append(weight) visibility_filepath = file_registry['visibility_pattern'].format( id=feature_index) viewshed_files.append(visibility_filepath) viewshed_task = graph.add_task( viewshed, args=((file_registry['clipped_dem'], 1), # DEM viewpoint, visibility_filepath), kwargs={'curved_earth': True, # SQ model always assumes this. 'refraction_coeff': float(args['refraction']), 'max_distance': max_radius, 'viewpoint_height': viewpoint_height, 'aux_filepath': None}, # Remove aux filepath after run target_path_list=[visibility_filepath], dependent_task_list=[clipped_dem_task, clipped_viewpoints_task], task_name='calculate_visibility_%s' % feature_index) viewshed_tasks.append(viewshed_task) if do_valuation: # calculate valuation viewshed_valuation_path = file_registry['value_pattern'].format( id=feature_index) valuation_task = graph.add_task( _calculate_valuation, args=(visibility_filepath, viewpoint, weight, # user defined, from WEIGHT field in vector args['valuation_function'], valuation_coefficients, # a, b from args, a dict. max_valuation_radius, viewshed_valuation_path), target_path_list=[viewshed_valuation_path], dependent_task_list=[viewshed_task], task_name=f'calculate_valuation_for_viewshed_{feature_index}') valuation_tasks.append(valuation_task) valuation_filepaths.append(viewshed_valuation_path) feature_index += 1 # The weighted visible structures raster is a leaf node weighted_visible_structures_task = graph.add_task( _count_and_weight_visible_structures, args=(viewshed_files, weights, file_registry['clipped_dem'], file_registry['n_visible_structures']), target_path_list=[file_registry['n_visible_structures']], dependent_task_list=sorted(viewshed_tasks), task_name='sum_visibility_for_all_structures') # If we're not doing valuation, we can still compute visual quality, # we'll just use the weighted visible structures raster instead of the # sum of the valuation rasters. if not do_valuation: parent_visual_quality_task = weighted_visible_structures_task parent_visual_quality_raster_path = ( file_registry['n_visible_structures']) else: parent_visual_quality_task = graph.add_task( _sum_valuation_rasters, args=(file_registry['clipped_dem'], valuation_filepaths, file_registry['viewshed_value']), target_path_list=[file_registry['viewshed_value']], dependent_task_list=sorted(valuation_tasks), task_name='add_up_valuation_rasters') parent_visual_quality_raster_path = file_registry['viewshed_value'] # visual quality is one of the leaf nodes on the task graph. graph.add_task( _calculate_visual_quality, args=(parent_visual_quality_raster_path, intermediate_dir, file_registry['viewshed_quality']), dependent_task_list=[parent_visual_quality_task], target_path_list=[file_registry['viewshed_quality']], task_name='calculate_visual_quality' ) LOGGER.info('Waiting for Scenic Quality tasks to complete.') graph.join()
kernel_raster.SetProjection(srs.ExportToWkt()) kernel_band = kernel_raster.GetRasterBand(1) kernel_band.SetNoDataValue(127) kernel_array = numpy.array([[1, 1, 1], [1, 0, 1], [1, 1, 1]]) kernel_array = kernel_array / numpy.sum(kernel_array) kernel_band.WriteArray(kernel_array) if __name__ == '__main__': try: os.makedirs(WARP_DIR) except OSError: pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1) tdd_downloader = taskgraph_downloader_pnn.TaskGraphDownloader( ECOSHARD_DIR, task_graph) tdd_downloader.download_ecoshard( WATERSHEDS_URL, 'watersheds', decompress='unzip', local_path='watersheds_globe_HydroSHEDS_15arcseconds') raster_path_base_list = [ #'n_export.tif', 'intermediate_outputs/stream.tif', ] global_raster_info_map = {} for raster_path_pattern in raster_path_base_list:
def execute(args): """Forest Carbon Edge Effect. InVEST Carbon Edge Model calculates the carbon due to edge effects in tropical forest pixels. Args: args['workspace_dir'] (string): a path to the directory that will write output and other temporary files during calculation. (required) args['results_suffix'] (string): a string to append to any output file name (optional) args['n_nearest_model_points'] (int): number of nearest neighbor model points to search for args['aoi_vector_path'] (string): (optional) if present, a path to a shapefile that will be used to aggregate carbon stock results at the end of the run. args['biophysical_table_path'] (string): a path to a CSV table that has at least the fields 'lucode' and 'c_above'. If ``args['compute_forest_edge_effects'] == True``, table must also contain an 'is_tropical_forest' field. If ``args['pools_to_calculate'] == 'all'``, this table must contain the fields 'c_below', 'c_dead', and 'c_soil'. * ``lucode``: an integer that corresponds to landcover codes in the raster ``args['lulc_raster_path']`` * ``is_tropical_forest``: either 0 or 1 indicating whether the landcover type is forest (1) or not (0). If 1, the value in ``c_above`` is ignored and instead calculated from the edge regression model. * ``c_above``: floating point number indicating tons of above ground carbon per hectare for that landcover type * ``{'c_below', 'c_dead', 'c_soil'}``: three other optional carbon pools that will statically map landcover types to the carbon densities in the table. Example:: lucode,is_tropical_forest,c_above,c_soil,c_dead,c_below 0,0,32.8,5,5.2,2.1 1,1,n/a,2.5,0.0,0.0 2,1,n/a,1.8,1.0,0.0 16,0,28.1,4.3,0.0,2.0 Note the "n/a" in ``c_above`` are optional since that field is ignored when ``is_tropical_forest==1``. args['lulc_raster_path'] (string): path to a integer landcover code raster args['pools_to_calculate'] (string): if "all" then all carbon pools will be calculted. If any other value only above ground carbon pools will be calculated and expect only a 'c_above' header in the biophysical table. If "all" model expects 'c_above', 'c_below', 'c_dead', 'c_soil' in header of biophysical_table and will make a translated carbon map for each based off the landcover map. args['compute_forest_edge_effects'] (boolean): if True, requires biophysical table to have 'is_tropical_forest' forest field, and any landcover codes that have a 1 in this column calculate carbon stocks using the Chaplin-Kramer et. al method and ignore 'c_above'. args['tropical_forest_edge_carbon_model_vector_path'] (string): path to a shapefile that defines the regions for the local carbon edge models. Has at least the fields 'method', 'theta1', 'theta2', 'theta3'. Where 'method' is an int between 1..3 describing the biomass regression model, and the thetas are floating point numbers that have different meanings depending on the 'method' parameter. Specifically, * method 1 (asymptotic model):: biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km) * method 2 (logarithmic model):: # NOTE: theta3 is ignored for this method biomass = theta1 + theta2 * numpy.log(edge_dist_km) * method 3 (linear regression):: biomass = theta1 + theta2 * edge_dist_km args['biomass_to_carbon_conversion_factor'] (string/float): Number by which to multiply forest biomass to convert to carbon in the edge effect calculation. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None """ # just check that the AOI exists since it wouldn't crash until the end of # the whole model run if it didn't. if 'aoi_vector_path' in args and args['aoi_vector_path'] != '': aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR) if not aoi_vector: raise ValueError("Unable to open aoi at: %s" % args['aoi_vector_path']) else: aoi_vector = None lulc_raster_bb = pygeoprocessing.get_raster_info( args['lulc_raster_path'])['bounding_box'] aoi_vector_bb = pygeoprocessing.get_vector_info( args['aoi_vector_path'])['bounding_box'] try: merged_bb = pygeoprocessing.merge_bounding_box_list( [lulc_raster_bb, aoi_vector_bb], 'intersection') LOGGER.debug("merged bounding boxes: %s", merged_bb) except ValueError: raise ValueError( "The landcover raster %s and AOI %s do not touch each " "other." % (args['lulc_raster_path'], args['aoi_vector_path'])) output_dir = args['workspace_dir'] intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') utils.make_directories([output_dir, intermediate_dir]) file_suffix = utils.make_suffix_string(args, 'results_suffix') # Initialize a TaskGraph taskgraph_working_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # single process mode. task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers) # used to keep track of files generated by this module output_file_registry = { 'c_above_map': os.path.join(intermediate_dir, 'c_above_carbon_stocks%s.tif' % file_suffix), 'carbon_map': os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix), 'aggregated_result_vector': os.path.join(output_dir, 'aggregated_carbon_stocks%s.shp' % file_suffix) } if args['pools_to_calculate'] == 'all': output_file_registry['c_below_map'] = os.path.join( intermediate_dir, 'c_below_carbon_stocks%s.tif' % file_suffix) output_file_registry['c_soil_map'] = os.path.join( intermediate_dir, 'c_soil_carbon_stocks%s.tif' % file_suffix) output_file_registry['c_dead_map'] = os.path.join( intermediate_dir, 'c_dead_carbon_stocks%s.tif' % file_suffix) if args['compute_forest_edge_effects']: output_file_registry['spatial_index_pickle'] = os.path.join( intermediate_dir, 'spatial_index%s.pickle' % file_suffix) output_file_registry['edge_distance'] = os.path.join( intermediate_dir, 'edge_distance%s.tif' % file_suffix) output_file_registry['tropical_forest_edge_carbon_map'] = os.path.join( intermediate_dir, 'tropical_forest_edge_carbon_stocks%s.tif' % file_suffix) output_file_registry['non_forest_mask'] = os.path.join( intermediate_dir, 'non_forest_mask%s.tif' % file_suffix) # Map non-forest landcover codes to carbon biomasses LOGGER.info('Calculating direct mapped carbon stocks') carbon_maps = [] biophysical_table = utils.build_lookup_from_csv( args['biophysical_table_path'], 'lucode', to_lower=False) biophysical_keys = [ x.lower() for x in list(biophysical_table.values())[0].keys() ] pool_list = [('c_above', True)] if args['pools_to_calculate'] == 'all': pool_list.extend([('c_below', False), ('c_soil', False), ('c_dead', False)]) for carbon_pool_type, ignore_tropical_type in pool_list: if carbon_pool_type in biophysical_keys: carbon_maps.append(output_file_registry[carbon_pool_type + '_map']) task_graph.add_task( func=_calculate_lulc_carbon_map, args=(args['lulc_raster_path'], args['biophysical_table_path'], carbon_pool_type, ignore_tropical_type, args['compute_forest_edge_effects'], carbon_maps[-1]), target_path_list=[carbon_maps[-1]], task_name='calculate_lulc_%s_map' % carbon_pool_type) if args['compute_forest_edge_effects']: # generate a map of pixel distance to forest edge from the landcover # map LOGGER.info('Calculating distance from forest edge') map_distance_task = task_graph.add_task( func=_map_distance_from_tropical_forest_edge, args=(args['lulc_raster_path'], args['biophysical_table_path'], output_file_registry['edge_distance'], output_file_registry['non_forest_mask']), target_path_list=[ output_file_registry['edge_distance'], output_file_registry['non_forest_mask'] ], task_name='map_distance_from_forest_edge') # Build spatial index for gridded global model for closest 3 points LOGGER.info('Building spatial index for forest edge models.') build_spatial_index_task = task_graph.add_task( func=_build_spatial_index, args=(args['lulc_raster_path'], intermediate_dir, args['tropical_forest_edge_carbon_model_vector_path'], output_file_registry['spatial_index_pickle']), target_path_list=[output_file_registry['spatial_index_pickle']], task_name='build_spatial_index') # calculate the carbon edge effect on forests LOGGER.info('Calculating forest edge carbon') task_graph.add_task( func=_calculate_tropical_forest_edge_carbon_map, args=(output_file_registry['edge_distance'], output_file_registry['spatial_index_pickle'], int(args['n_nearest_model_points']), float(args['biomass_to_carbon_conversion_factor']), output_file_registry['tropical_forest_edge_carbon_map']), target_path_list=[ output_file_registry['tropical_forest_edge_carbon_map'] ], task_name='calculate_forest_edge_carbon_map', dependent_task_list=[map_distance_task, build_spatial_index_task]) # This is also a carbon stock carbon_maps.append( output_file_registry['tropical_forest_edge_carbon_map']) # combine maps into a single output LOGGER.info('combining carbon maps into single raster') carbon_maps_band_list = [(path, 1) for path in carbon_maps] # Join here since the raster calculation depends on the target datasets # from all the tasks above task_graph.join() combine_carbon_maps_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=(carbon_maps_band_list, combine_carbon_maps, output_file_registry['carbon_map'], gdal.GDT_Float32, NODATA_VALUE), target_path_list=[output_file_registry['carbon_map']], task_name='combine_carbon_maps') # generate report (optional) by aoi if they exist if 'aoi_vector_path' in args and args['aoi_vector_path'] != '': LOGGER.info('aggregating carbon map by aoi') task_graph.add_task( func=_aggregate_carbon_map, args=(args['aoi_vector_path'], output_file_registry['carbon_map'], output_file_registry['aggregated_result_vector']), target_path_list=[ output_file_registry['aggregated_result_vector'] ], task_name='combine_carbon_maps', dependent_task_list=[combine_carbon_maps_task]) # close taskgraph task_graph.close() task_graph.join()
def main(): """Entry point.""" # try: # os.makedirs(WORKSPACE_DIR) # except OSError: # pass #DEM_PATH = 'sample_data/pit_filled_dem.tif' DEM_PATH = 'sample_data/Inspring Data/Inputs/DEM/MERIT DEM Pro Agua Purus Acre clip2.tif' dem_info = pygeoprocessing.get_raster_info(DEM_PATH) dem_type = dem_info['numpy_type'] scrubbed_dem_path = os.path.join(WORKSPACE_DIR, 'scrubbed_dem.tif') nodata = dem_info['nodata'][0] new_nodata = float(numpy.finfo(dem_type).min) LOGGER.info(f'scrub invalid values to {nodata}') # percentile_list = pygeoprocessing.raster_band_percentile( # # (DEM_PATH, 1), WORKSPACE_DIR, [1, 99]) # #LOGGER.info(f'percentile_list: {percentile_list}') task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1) scrub_dem_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( [(DEM_PATH, 1), (nodata, 'raw'), (new_nodata, 'raw')], scrub_invalid_values, scrubbed_dem_path, dem_info['datatype'], new_nodata), target_path_list=[scrubbed_dem_path], task_name='scrub dem') # LOGGER.info('dialate dem') # dilated_dem_path = os.path.join(WORKSPACE_DIR, 'dialated_dem.tif') # dilate_holes(scrubbed_dem_path, dilated_dem_path) LOGGER.info('fill pits') filled_pits_path = os.path.join(WORKSPACE_DIR, 'filled_pits_dem.tif') fill_pits_task = task_graph.add_task( func=pygeoprocessing.routing.fill_pits, args=((scrubbed_dem_path, 1), filled_pits_path), target_path_list=[filled_pits_path], dependent_task_list=[scrub_dem_task], task_name='fill pits') # slope_path = os.path.join(WORKSPACE_DIR, 'slope.tif') # pygeoprocessing.calculate_slope((DEM_PATH, 1), slope_path) LOGGER.info('flow dir d8') flow_dir_d8_path = os.path.join(WORKSPACE_DIR, 'flow_dir_d8.tif') flow_dir_task = task_graph.add_task( func=pygeoprocessing.routing.flow_dir_d8, args=((filled_pits_path, 1), flow_dir_d8_path), kwargs={'working_dir': WORKSPACE_DIR}, target_path_list=[flow_dir_d8_path], dependent_task_list=[fill_pits_task], task_name='flow dir d8') LOGGER.info('flow accum d8') flow_accum_d8_path = os.path.join(WORKSPACE_DIR, 'flow_accum_d8.tif') flow_accum_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_d8, args=((flow_dir_d8_path, 1), flow_accum_d8_path), target_path_list=[flow_accum_d8_path], dependent_task_list=[flow_dir_task], task_name='flow accum d8') flow_threshold = 100 stream_vector_path = os.path.join( WORKSPACE_DIR, f'stream_segments_{flow_threshold}.gpkg') extract_stream_task = task_graph.add_task( func=pygeoprocessing.routing.extract_strahler_streams_d8, args=( (flow_dir_d8_path, 1), (flow_accum_d8_path, 1), (filled_pits_path, 1), stream_vector_path), kwargs={'min_flow_accum_threshold': flow_threshold, 'river_order': 7}, target_path_list=[stream_vector_path], hash_target_files=False, dependent_task_list=[flow_accum_task], task_name='stream extraction') target_watershed_boundary_vector_path = os.path.join( WORKSPACE_DIR, 'watershed_boundary.gpkg') calculate_watershed_boundary_task = task_graph.add_task( func=pygeoprocessing.routing.calculate_watershed_boundary, args=( (flow_dir_d8_path, 1), stream_vector_path, target_watershed_boundary_vector_path, -100), target_path_list=[target_watershed_boundary_vector_path], transient_run=True, dependent_task_list=[extract_stream_task], task_name='watershed boundary')
def execute(args): """RouteDEM: Hydrological routing. This model exposes the pygeoprocessing D8 and Multiple Flow Direction routing functionality as an InVEST model. This tool will always fill pits on the input DEM. Args: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['dem_path'] (string): path to a digital elevation raster args['dem_band_index'] (int): Optional. The band index to operate on. If not provided, band index 1 is assumed. args['algorithm'] (string): The routing algorithm to use. Must be one of 'D8' or 'MFD' (case-insensitive). Required when calculating flow direction, flow accumulation, stream threshold, and downstream distance. args['calculate_flow_direction'] (bool): If True, model will calculate flow direction for the filled DEM. args['calculate_flow_accumulation'] (bool): If True, model will calculate a flow accumulation raster. Only applies when args['calculate_flow_direction'] is True. args['calculate_stream_threshold'] (bool): if True, model will calculate a stream classification layer by thresholding flow accumulation to the provided value in ``args['threshold_flow_accumulation']``. Only applies when args['calculate_flow_accumulation'] and args['calculate_flow_direction'] are True. args['threshold_flow_accumulation'] (int): The number of upstream cells that must flow into a cell before it's classified as a stream. args['calculate_downstream_distance'] (bool): If True, and a stream threshold is calculated, model will calculate a downstream distance raster in units of pixels. Only applies when args['calculate_flow_accumulation'], args['calculate_flow_direction'], and args['calculate_stream_threshold'] are all True. args['calculate_slope'] (bool): If True, model will calculate a slope raster from the DEM. args['n_workers'] (int): The ``n_workers`` parameter to pass to the task graph. The default is ``-1`` if not provided. Returns: ``None`` """ file_suffix = utils.make_suffix_string(args, 'results_suffix') task_cache_dir = os.path.join(args['workspace_dir'], '_taskgraph_working_dir') utils.make_directories([args['workspace_dir'], task_cache_dir]) if ('calculate_flow_direction' in args and bool(args['calculate_flow_direction'])): # All routing functions depend on this one task. # Check the algorithm early so we can fail quickly, but only if we're # doing some sort of hydological routing algorithm = args['algorithm'].upper() try: routing_funcs = _ROUTING_FUNCS[algorithm] except KeyError: raise RuntimeError( 'Invalid algorithm specified (%s). Must be one of %s' % (args['algorithm'], ', '.join(sorted(_ROUTING_FUNCS.keys())))) if 'dem_band_index' in args and args['dem_band_index'] not in (None, ''): band_index = int(args['dem_band_index']) else: band_index = 1 LOGGER.info('Using DEM band index %s', band_index) dem_raster_path_band = (args['dem_path'], band_index) try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. graph = taskgraph.TaskGraph(task_cache_dir, n_workers=n_workers) # Calculate slope. This is intentionally on the original DEM, not # on the pitfilled DEM. If the user really wants the slop of the filled # DEM, they can pass it back through RouteDEM. if 'calculate_slope' in args and bool(args['calculate_slope']): target_slope_path = os.path.join( args['workspace_dir'], _TARGET_SLOPE_FILE_PATTERN % file_suffix) graph.add_task(pygeoprocessing.calculate_slope, args=(dem_raster_path_band, target_slope_path), task_name='calculate_slope', target_path_list=[target_slope_path]) dem_filled_pits_path = os.path.join( args['workspace_dir'], _TARGET_FILLED_PITS_FILED_PATTERN % file_suffix) filled_pits_task = graph.add_task(pygeoprocessing.routing.fill_pits, args=(dem_raster_path_band, dem_filled_pits_path, args['workspace_dir']), task_name='fill_pits', target_path_list=[dem_filled_pits_path]) if ('calculate_flow_direction' in args and bool(args['calculate_flow_direction'])): LOGGER.info("calculating flow direction") flow_dir_path = os.path.join( args['workspace_dir'], _TARGET_FLOW_DIRECTION_FILE_PATTERN % file_suffix) flow_direction_task = graph.add_task( routing_funcs['flow_direction'], args=( (dem_filled_pits_path, 1), # PGP>1.9.0 creates 1-band fills flow_dir_path, args['workspace_dir']), target_path_list=[flow_dir_path], dependent_task_list=[filled_pits_task], task_name='flow_dir_%s' % algorithm) if ('calculate_flow_accumulation' in args and bool(args['calculate_flow_accumulation'])): LOGGER.info("calculating flow accumulation") flow_accumulation_path = os.path.join( args['workspace_dir'], _FLOW_ACCUMULATION_FILE_PATTERN % file_suffix) flow_accum_task = graph.add_task( routing_funcs['flow_accumulation'], args=((flow_dir_path, 1), flow_accumulation_path), target_path_list=[flow_accumulation_path], task_name='flow_accumulation_%s' % algorithm, dependent_task_list=[flow_direction_task]) if ('calculate_stream_threshold' in args and bool(args['calculate_stream_threshold'])): stream_mask_path = os.path.join( args['workspace_dir'], _STREAM_MASK_FILE_PATTERN % file_suffix) if algorithm == 'D8': flow_accum_task.join() flow_accum_info = pygeoprocessing.get_raster_info( flow_accumulation_path) stream_threshold_task = graph.add_task( pygeoprocessing.raster_calculator, args=(((flow_accumulation_path, 1), (float(args['threshold_flow_accumulation']), 'raw'), (flow_accum_info['nodata'][0], 'raw'), (255, 'raw')), _threshold_flow, stream_mask_path, gdal.GDT_Byte, 255), target_path_list=[stream_mask_path], task_name='stream_thresholding_D8', dependent_task_list=[flow_accum_task]) else: # MFD stream_threshold_task = graph.add_task( routing_funcs['threshold_flow'], args=((flow_accumulation_path, 1), (flow_dir_path, 1), float(args['threshold_flow_accumulation']), stream_mask_path), target_path_list=[stream_mask_path], task_name=['stream_extraction_MFD'], dependent_task_list=[flow_accum_task]) if ('calculate_downstream_distance' in args and bool(args['calculate_downstream_distance'])): distance_path = os.path.join( args['workspace_dir'], _DOWNSTREAM_DISTANCE_FILE_PATTERN % file_suffix) graph.add_task(routing_funcs['distance_to_channel'], args=((flow_dir_path, 1), (stream_mask_path, 1), distance_path), target_path_list=[distance_path], task_name='downstream_distance_%s' % algorithm, dependent_task_list=[stream_threshold_task]) graph.join()
def execute(args): """Run the Scenic Quality Model. Parameters: args['workspace_dir'] (string): (required) output directory for intermediate, temporary, and final files. args['results_suffix'] (string): (optional) string to append to any output file. args['aoi_path'] (string): (required) path to a vector that indicates the area over which the model should be run. args['structure_path'] (string): (required) path to a point vector that has the features for the viewpoints. Optional fields: 'WEIGHT', 'RADIUS' / 'RADIUS2', 'HEIGHT' args['dem_path'] (string): (required) path to a digital elevation model raster. args['refraction'] (float): (required) number indicating the refraction coefficient to use for calculating curvature of the earth. args['do_valuation'] (bool): (optional) indicates whether to compute valuation. If ``False``, per-viewpoint value will not be computed, and the summation of valuation rasters (vshed_value.tif) will not be created. Additionally, the Viewshed Quality raster will represent the weighted sum of viewsheds. Default: ``False``. args['valuation_function'] (string): The type of economic function to use for valuation. One of "linear", "logarithmic", or "exponential". args['a_coef'] (float): The "a" coefficient for valuation. Required if ``args['do_valuation']`` is ``True``. args['b_coef'] (float): The "b" coefficient for valuation. Required if ``args['do_valuation']`` is ``True``. args['max_valuation_radius'] (float): Past this distance from the viewpoint, the valuation raster's pixel values will be set to 0. Required if ``args['do_valuation']`` is ``True``. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: ``None`` """ LOGGER.info("Starting Scenic Quality Model") dem_raster_info = pygeoprocessing.get_raster_info(args['dem_path']) try: do_valuation = bool(args['do_valuation']) except KeyError: do_valuation = False if do_valuation: valuation_coefficients = { 'a': float(args['a_coef']), 'b': float(args['b_coef']), } if args['valuation_function'].startswith('linear'): valuation_method = 'linear' elif args['valuation_function'].startswith('logarithmic'): valuation_method = 'logarithmic' elif args['valuation_function'].startswith('exponential'): valuation_method = 'exponential' else: raise ValueError('Valuation function type %s not recognized' % args['valuation_function']) max_valuation_radius = float(args['max_valuation_radius']) # Create output and intermediate directory output_dir = os.path.join(args['workspace_dir'], 'output') intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate') utils.make_directories([output_dir, intermediate_dir]) file_suffix = utils.make_suffix_string(args, 'results_suffix') LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), (_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix) work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous execution graph = taskgraph.TaskGraph(work_token_dir, n_workers) reprojected_aoi_task = graph.add_task( pygeoprocessing.reproject_vector, args=(args['aoi_path'], dem_raster_info['projection'], file_registry['aoi_reprojected']), target_path_list=[file_registry['aoi_reprojected']], task_name='reproject_aoi_to_dem') reprojected_viewpoints_task = graph.add_task( pygeoprocessing.reproject_vector, args=(args['structure_path'], dem_raster_info['projection'], file_registry['structures_reprojected']), target_path_list=[file_registry['structures_reprojected']], task_name='reproject_structures_to_dem') clipped_viewpoints_task = graph.add_task( _clip_vector, args=(file_registry['structures_reprojected'], file_registry['aoi_reprojected'], file_registry['structures_clipped']), target_path_list=[file_registry['structures_clipped']], dependent_task_list=[ reprojected_aoi_task, reprojected_viewpoints_task ], task_name='clip_reprojected_structures_to_aoi') clipped_dem_task = graph.add_task( _clip_and_mask_dem, args=(args['dem_path'], file_registry['aoi_reprojected'], file_registry['clipped_dem'], intermediate_dir), target_path_list=[file_registry['clipped_dem']], dependent_task_list=[reprojected_aoi_task], task_name='clip_dem_to_aoi') # viewshed calculation requires that the DEM and structures are all # finished. LOGGER.info('Waiting for clipping to finish') clipped_dem_task.join() clipped_viewpoints_task.join() # phase 2: calculate viewsheds. LOGGER.info('Setting up viewshed tasks') viewpoint_tuples = [] structures_vector = gdal.OpenEx(file_registry['structures_reprojected'], gdal.OF_VECTOR) for structures_layer_index in range(structures_vector.GetLayerCount()): structures_layer = structures_vector.GetLayer(structures_layer_index) layer_name = structures_layer.GetName() LOGGER.info('Layer %s has %s features', layer_name, structures_layer.GetFeatureCount()) for point in structures_layer: # Coordinates in map units to pass to viewshed algorithm geometry = point.GetGeometryRef() viewpoint = (geometry.GetX(), geometry.GetY()) if not _viewpoint_within_raster(viewpoint, file_registry['clipped_dem']): LOGGER.info( ('Feature %s in layer %s is outside of the DEM bounding ' 'box. Skipping.'), layer_name, point.GetFID()) continue if _viewpoint_over_nodata(viewpoint, file_registry['clipped_dem']): LOGGER.info('Feature %s in layer %s is over nodata; skipping.', point.GetFID(), layer_name) continue # RADIUS is the suggested value for InVEST Scenic Quality # RADIUS2 is for users coming from ArcGIS's viewshed. # Assume positive infinity if neither field is provided. # Positive infinity is represented in our viewshed by None. max_radius = None for fieldname in ('RADIUS', 'RADIUS2'): try: max_radius = math.fabs(point.GetField(fieldname)) break except (ValueError, KeyError): # When this field is not present. # ValueError was changed to KeyError between GDAL 2.2 and # 2.4. pass try: viewpoint_height = math.fabs(point.GetField('HEIGHT')) except (ValueError, KeyError): # When height field is not present, assume height of 0.0 # ValueError was changed to KeyError between GDAL 2.2 and 2.4. viewpoint_height = 0.0 try: weight = float(point.GetField('WEIGHT')) except (ValueError, KeyError): # When no weight provided, set scale to 1 # ValueError was changed to KeyError between GDAL 2.2 and 2.4. weight = 1.0 viewpoint_tuples.append( (viewpoint, max_radius, weight, viewpoint_height)) structures_vector = None if not viewpoint_tuples: raise ValueError('No valid viewpoints found. This may happen if ' 'viewpoints are beyond the edge of the DEM or are ' 'over nodata pixels.') # These are sorted outside the vector to ensure consistent ordering. This # helps avoid unnecesary recomputation in taskgraph for when an ESRI # Shapefile, for example, returns a different order of points because # someone decided to repack it. viewshed_files = [] viewshed_tasks = [] valuation_tasks = [] valuation_filepaths = [] weights = [] feature_index = 0 for viewpoint, max_radius, weight, viewpoint_height in sorted( viewpoint_tuples, key=lambda x: x[0]): weights.append(weight) visibility_filepath = file_registry['visibility_pattern'].format( id=feature_index) viewshed_files.append(visibility_filepath) viewshed_task = graph.add_task( viewshed, args=( (file_registry['clipped_dem'], 1), # DEM viewpoint, visibility_filepath), kwargs={ 'curved_earth': True, # SQ model always assumes this. 'refraction_coeff': float(args['refraction']), 'max_distance': max_radius, 'viewpoint_height': viewpoint_height, 'aux_filepath': None }, # Remove aux filepath after run target_path_list=[visibility_filepath], dependent_task_list=[clipped_dem_task, clipped_viewpoints_task], task_name='calculate_visibility_%s' % feature_index) viewshed_tasks.append(viewshed_task) if do_valuation: # calculate valuation viewshed_valuation_path = file_registry['value_pattern'].format( id=feature_index) valuation_task = graph.add_task( _calculate_valuation, args=( visibility_filepath, viewpoint, weight, # user defined, from WEIGHT field in vector valuation_method, valuation_coefficients, # a, b from args, a dict. max_valuation_radius, viewshed_valuation_path), target_path_list=[viewshed_valuation_path], dependent_task_list=[viewshed_task], task_name='calculate_valuation_for_viewshed_%s' % feature_index) valuation_tasks.append(valuation_task) valuation_filepaths.append(viewshed_valuation_path) feature_index += 1 # The weighted visible structures raster is a leaf node weighted_visible_structures_task = graph.add_task( _count_and_weight_visible_structures, args=(viewshed_files, weights, file_registry['clipped_dem'], file_registry['n_visible_structures']), target_path_list=[file_registry['n_visible_structures']], dependent_task_list=sorted(viewshed_tasks), task_name='sum_visibility_for_all_structures') # If we're not doing valuation, we can still compute visual quality, # we'll just use the weighted visible structures raster instead of the # sum of the valuation rasters. if not do_valuation: parent_visual_quality_task = weighted_visible_structures_task parent_visual_quality_raster_path = ( file_registry['n_visible_structures']) else: parent_visual_quality_task = graph.add_task( _sum_valuation_rasters, args=(file_registry['clipped_dem'], valuation_filepaths, file_registry['viewshed_value']), target_path_list=[file_registry['viewshed_value']], dependent_task_list=sorted(valuation_tasks), task_name='add_up_valuation_rasters') parent_visual_quality_raster_path = file_registry['viewshed_value'] # visual quality is one of the leaf nodes on the task graph. graph.add_task(_calculate_visual_quality, args=(parent_visual_quality_raster_path, intermediate_dir, file_registry['viewshed_quality']), dependent_task_list=[parent_visual_quality_task], target_path_list=[file_registry['viewshed_quality']], task_name='calculate_visual_quality') LOGGER.info('Waiting for Scenic Quality tasks to complete.') graph.join()
def execute(args): """DelineateIt: Watershed Delineation. This 'model' provides an InVEST-based wrapper around the pygeoprocessing routing API for watershed delineation. Upon successful completion, the following files are written to the output workspace: * ``snapped_outlets.gpkg`` - A GeoPackage with the points snapped to a nearby stream. * ``watersheds.gpkg`` - a GeoPackage of watersheds determined by the D8 routing algorithm. * ``stream.tif`` - a GeoTiff representing detected streams based on the provided ``flow_threshold`` parameter. Values of 1 are streams, values of 0 are not. Args: args['workspace_dir'] (string): The selected folder is used as the workspace all intermediate and output files will be written.If the selected folder does not exist, it will be created. If datasets already exist in the selected folder, they will be overwritten. (required) args['results_suffix'] (string): This text will be appended to the end of output files to help separate multiple runs. (optional) args['dem_path'] (string): A GDAL-supported raster file with an elevation for each cell. Make sure the DEM is corrected by filling in sinks, and if necessary burning hydrographic features into the elevation model (recommended when unusual streams are observed.) See the 'Working with the DEM' section of the InVEST User's Guide for more information. (required) args['outlet_vector_path'] (string): This is a vector representing geometries that the watersheds should be built around. Required if ``args['detect_pour_points']`` is False; not used otherwise. args['snap_points'] (bool): Whether to snap point geometries to the nearest stream pixel. If ``True``, ``args['flow_threshold']`` and ``args['snap_distance']`` must also be defined. args['flow_threshold'] (int): The number of upslope cells that must flow into a cell before it's considered part of a stream such that retention stops and the remaining export is exported to the stream. Used to define streams from the DEM. args['snap_distance'] (int): Pixel Distance to Snap Outlet Points args['skip_invalid_geometry'] (bool): Whether to crash when an invalid geometry is passed or skip it, including all valid geometries in the vector to be passed to delineation. If ``False``, this tool will crash if an invalid geometry is found. If ``True``, invalid geometries will be left out of the vector to be delineated. Default: True args['detect_pour_points'] (bool): Whether to run the pour point detection algorithm. If True, detected pour points are used instead of outlet_vector_path geometries. Default: False args['n_workers'] (int): The number of worker processes to use with taskgraph. Defaults to -1 (no parallelism). Returns: ``None`` """ output_directory = args['workspace_dir'] utils.make_directories([output_directory]) file_suffix = utils.make_suffix_string(args, 'results_suffix') file_registry = utils.build_file_registry( [(_OUTPUT_FILES, output_directory)], file_suffix) work_token_dir = os.path.join(output_directory, '_work_tokens') # Manually setting n_workers to be -1 so that everything happens in the # same thread. try: n_workers = int(args['n_workers']) except (KeyError, TypeError, ValueError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 graph = taskgraph.TaskGraph(work_token_dir, n_workers=n_workers) fill_pits_task = graph.add_task( pygeoprocessing.routing.fill_pits, args=((args['dem_path'], 1), file_registry['filled_dem']), kwargs={'working_dir': output_directory}, target_path_list=[file_registry['filled_dem']], task_name='fill_pits') flow_dir_task = graph.add_task( pygeoprocessing.routing.flow_dir_d8, args=((file_registry['filled_dem'], 1), file_registry['flow_dir_d8']), kwargs={'working_dir': output_directory}, target_path_list=[file_registry['flow_dir_d8']], dependent_task_list=[fill_pits_task], task_name='flow_direction') if 'detect_pour_points' in args and args['detect_pour_points']: # Detect pour points automatically and use them instead of # user-provided geometries pour_points_task = graph.add_task( detect_pour_points, args=((file_registry['flow_dir_d8'], 1), file_registry['pour_points']), dependent_task_list=[flow_dir_task], target_path_list=[file_registry['pour_points']], task_name='detect_pour_points') outlet_vector_path = file_registry['pour_points'] geometry_task = pour_points_task else: check_geometries_task = graph.add_task( check_geometries, args=(args['outlet_vector_path'], file_registry['filled_dem'], file_registry['preprocessed_geometries'], args.get('skip_invalid_geometry', True)), dependent_task_list=[fill_pits_task], target_path_list=[file_registry['preprocessed_geometries']], task_name='check_geometries') outlet_vector_path = file_registry['preprocessed_geometries'] geometry_task = check_geometries_task delineation_dependent_tasks = [flow_dir_task, geometry_task] if 'snap_points' in args and args['snap_points']: flow_accumulation_task = graph.add_task( pygeoprocessing.routing.flow_accumulation_d8, args=((file_registry['flow_dir_d8'], 1), file_registry['flow_accumulation']), target_path_list=[file_registry['flow_accumulation']], dependent_task_list=[flow_dir_task], task_name='flow_accumulation') delineation_dependent_tasks.append(flow_accumulation_task) snap_distance = int(args['snap_distance']) flow_threshold = int(args['flow_threshold']) out_nodata = 255 flow_accumulation_task.join() # wait so we can read the nodata value flow_accumulation_nodata = pygeoprocessing.get_raster_info( file_registry['flow_accumulation'])['nodata'] streams_task = graph.add_task( pygeoprocessing.raster_calculator, args=([(file_registry['flow_accumulation'], 1), (flow_accumulation_nodata, 'raw'), (out_nodata, 'raw'), (flow_threshold, 'raw')], _threshold_streams, file_registry['streams'], gdal.GDT_Byte, out_nodata), target_path_list=[file_registry['streams']], dependent_task_list=[flow_accumulation_task], task_name='threshold_streams') snapped_outflow_points_task = graph.add_task( snap_points_to_nearest_stream, args=(outlet_vector_path, file_registry['streams'], file_registry['flow_accumulation'], snap_distance, file_registry['snapped_outlets']), target_path_list=[file_registry['snapped_outlets']], dependent_task_list=[streams_task, geometry_task], task_name='snapped_outflow_points') delineation_dependent_tasks.append(snapped_outflow_points_task) outlet_vector_path = file_registry['snapped_outlets'] _ = graph.add_task( pygeoprocessing.routing.delineate_watersheds_d8, args=((file_registry['flow_dir_d8'], 1), outlet_vector_path, file_registry['watersheds']), kwargs={ 'working_dir': output_directory, 'target_layer_name': os.path.splitext(os.path.basename(file_registry['watersheds']))[0] }, target_path_list=[file_registry['watersheds']], dependent_task_list=delineation_dependent_tasks, task_name='delineate_watersheds_single_worker') graph.close() graph.join()
def main(): """Entry point.""" for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(CHURN_DIR, -1, 5.0) kernel_raster_path = os.path.join(CHURN_DIR, 'radial_kernel.tif') kernel_task = task_graph.add_task( func=create_flat_radial_convolution_mask, args=(0.00277778, 2000., kernel_raster_path), target_path_list=[kernel_raster_path], task_name='make convolution kernel') hab_fetch_path_map = {} # download hab mask and ppl fed equivalent raster for raster_id, raster_url in BASE_RASTER_URL_MAP.items(): raster_path = os.path.join(ECOSHARD_DIR, os.path.basename(raster_url)) _ = task_graph.add_task( func=raster_calculations_core.download_url, args=(raster_url, raster_path), target_path_list=[raster_path], task_name='fetch hab mask') hab_fetch_path_map[raster_id] = raster_path task_graph.join() hab_mask_raster_info = pygeoprocessing.get_raster_info( hab_fetch_path_map['hab_mask']) ppl_fed_raster_info = pygeoprocessing.get_raster_info( hab_fetch_path_map['ppl_fed']) ppl_fed_nodata_to_zero_path = os.path.join( CHURN_DIR, 'ppl_fed__nodata_to_zero.tif') task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( [(hab_fetch_path_map['ppl_fed'], 1), (ppl_fed_raster_info['nodata'][0], 'raw')], _nodata_to_zero_op, ppl_fed_nodata_to_zero_path, gdal.GDT_Float32, None), target_path_list=[ppl_fed_nodata_to_zero_path], task_name='hab mask nodata to zero') task_graph.join() # calculate extent of ppl fed by 2km. ppl_fed_reach_raster_path = os.path.join(CHURN_DIR, 'ppl_fed_reach.tif') ppl_fed_reach_task = task_graph.add_task( func=pygeoprocessing.convolve_2d, args=[ (ppl_fed_nodata_to_zero_path, 1), (kernel_raster_path, 1), ppl_fed_reach_raster_path], kwargs={ 'working_dir': CHURN_DIR, 'mask_nodata': False, 'raster_driver_creation_tuple': ( 'GTiff', ( 'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=ZSTD', 'PREDICTOR=1', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256', 'NUM_THREADS=2')), 'n_threads': 4}, dependent_task_list=[kernel_task], target_path_list=[ppl_fed_reach_raster_path], task_name=( 'calculate natural hab proportion' f' {os.path.basename(ppl_fed_reach_raster_path)}')) # mask ppl fed reach by the hab mask. raster_calculations_core.evaluate_calculation( { 'expression': 'ppl_fed_reach*(hab_mask>0.0)', 'symbol_to_path_map': { 'ppl_fed_reach': ppl_fed_reach_raster_path, 'hab_mask': hab_fetch_path_map['hab_mask'], }, 'target_pixel_size': hab_mask_raster_info['pixel_size'], 'target_nodata': TARGET_NODATA, 'target_raster_path': REALIZED_POLLINATION_RASTER_PATH, }, task_graph, CHURN_DIR) task_graph.join() compress_and_overview.compress_to( task_graph, REALIZED_POLLINATION_RASTER_PATH, 'bilinear', REALIZED_POLLINATION_COMPRESSED_RASTER_PATH) task_graph.close()
if args.bounding_box: target_bounding_box = args.bounding_box else: target_bounding_box = pygeoprocessing.merge_bounding_box_list( bounding_box_list, 'intersection') if args.pixel_size: target_pixel_size = (args.pixel_size, -args.pixel_size) else: target_pixel_size = (min_size, -min_size) LOGGER.info(f'target pixel size: {target_pixel_size}') LOGGER.info(f'target bounding box: {target_bounding_box}') LOGGER.debug('align rasters, this might take a while') task_graph = taskgraph.TaskGraph(args.workspace_dir, N_CPUS, 5.0) align_dir = os.path.join(args.workspace_dir, 'aligned_rasters') try: os.makedirs(align_dir) except OSError: pass # align rasters and cast to list because we'll rewrite # raster_id_to_path_map object for raster_id in raster_id_to_info_map: raster_path = raster_id_to_info_map[raster_id]['path'] raster_basename = os.path.splitext(os.path.basename(raster_path))[0] aligned_raster_path = os.path.join( align_dir, f'{raster_basename}_{target_bounding_box}_{target_pixel_size}.tif') raster_id_to_info_map[raster_id]['aligned_path'] = \
def main(): """Entry point.""" #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: # try: # os.makedirs(dir_path) # except OSError: # pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_wb_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL)) download_raster_task = task_graph.add_task( func=ecoshard.download_url, args=(RASTER_URL, raster_path), target_path_list=[raster_path], task_name='download raster') #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) #world_borders_layer = world_borders_vector.GetLayer() #wgs84_srs = osr.SpatialReference() #wgs84_srs.ImportFromEPSG(4326) # mask out everything that's not a country masked_raster_path = os.path.join( WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext( os.path.basename(raster_path))) # we need to define this because otherwise no nodata value is defined mask_nodata = -1 mask_task = task_graph.add_task( func=pygeoprocessing.mask_raster, args=( (raster_path, 1), world_borders_path, masked_raster_path), kwargs={ 'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS, 'target_mask_value': mask_nodata, }, target_path_list=[masked_raster_path], dependent_task_list=[download_wb_task, download_raster_task], task_name='mask raster') download_raster_task.join() raster_info = pygeoprocessing.get_raster_info(raster_path) country_name = "Global" country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') target_percentile_pickle_path = os.path.join( WORKSPACE_DIR, '%s.pkl' % ( os.path.basename(os.path.splitext(raster_path)[0]))) calculate_percentiles_task = task_graph.add_task( func=calculate_percentiles, args=( raster_path, PERCENTILE_LIST, target_percentile_pickle_path), target_path_list=[target_percentile_pickle_path], dependent_task_list=[mask_task], task_name='calculate percentiles') calculate_percentiles_task.join() with open(target_percentile_pickle_path, 'rb') as pickle_file: percentile_values = pickle.load(pickle_file) LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) raster_info = pygeoprocessing.get_raster_info(raster_path) nodata = raster_info['nodata'][0] valid_pixel_count = 0 total_pixel_count = 0 total_pixels = ( raster_info['raster_size'][0] * raster_info['raster_size'][1]) for _, data_block in pygeoprocessing.iterblocks( (raster_path, 1), largest_block=2**28): nodata_mask = ~numpy.isclose(data_block, nodata) nonzero_count = numpy.count_nonzero(nodata_mask) if nonzero_count == 0: continue valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum((data_block[ nodata_mask & (data_block >= percentile_value)]).astype( numpy.float32)) total_pixel_count += data_block.size LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels) LOGGER.debug('current cdf array: %s', cdf_array) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close() return for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('nev_name') country_name= country_name.replace('.','') LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] valid_pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
parser.add_argument( '--watershed_fid_scenario_immediates', type=str, nargs='+', default=None, help=( 'list of `(watershed)_(fid)_(scenario_id)` identifiers to run ' 'instead of database')) args = parser.parse_args() for dir_path in [ WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR, TILE_DIR]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(CHURN_DIR, -1) task_graph.add_task( func=create_status_database, args=(STATUS_DATABASE_PATH, DATABASE_TOKEN_PATH), target_path_list=[DATABASE_TOKEN_PATH], ignore_path_list=[STATUS_DATABASE_PATH], task_name='create status database') LOGGER.debug( 'scheduling download of watersheds: %s', WATERSHEDS_URL) watersheds_zip_path = os.path.join( ECOSHARD_DIR, os.path.basename(WATERSHEDS_URL)) download_watersheds_task = task_graph.add_task( func=ecoshard.download_url, args=(WATERSHEDS_URL, watersheds_zip_path),
def execute(args): """Carbon. Calculate the amount of carbon stocks given a landscape, or the difference due to a future change, and/or the tradeoffs between that and a REDD scenario, and calculate economic valuation on those scenarios. The model can operate on a single scenario, a combined present and future scenario, as well as an additional REDD scenario. Args: args['workspace_dir'] (string): a path to the directory that will write output and other temporary files during calculation. args['results_suffix'] (string): appended to any output file name. args['lulc_cur_path'] (string): a path to a raster representing the current carbon stocks. args['calc_sequestration'] (bool): if true, sequestration should be calculated and 'lulc_fut_path' and 'do_redd' should be defined. args['lulc_fut_path'] (string): a path to a raster representing future landcover scenario. Optional, but if present and well defined will trigger a sequestration calculation. args['do_redd'] ( bool): if true, REDD analysis should be calculated and 'lulc_redd_path' should be defined args['lulc_redd_path'] (string): a path to a raster representing the alternative REDD scenario which is only possible if the args['lulc_fut_path'] is present and well defined. args['carbon_pools_path'] (string): path to CSV or that indexes carbon storage density to lulc codes. (required if 'do_uncertainty' is false) args['lulc_cur_year'] (int/string): an integer representing the year of `args['lulc_cur_path']` used if `args['do_valuation']` is True. args['lulc_fut_year'](int/string): an integer representing the year of `args['lulc_fut_path']` used in valuation if it exists. Required if `args['do_valuation']` is True and `args['lulc_fut_path']` is present and well defined. args['do_valuation'] (bool): if true then run the valuation model on available outputs. Calculate NPV for a future scenario or a REDD scenario and report in final HTML document. args['price_per_metric_ton_of_c'] (float): Is the present value of carbon per metric ton. Used if `args['do_valuation']` is present and True. args['discount_rate'] (float): Discount rate used if NPV calculations are required. Used if `args['do_valuation']` is present and True. args['rate_change'] (float): Annual rate of change in price of carbon as a percentage. Used if `args['do_valuation']` is present and True. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None. """ file_suffix = utils.make_suffix_string(args, 'results_suffix') intermediate_output_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') output_dir = args['workspace_dir'] utils.make_directories([intermediate_output_dir, output_dir]) LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), (_TMP_BASE_FILES, output_dir)], file_suffix) carbon_pool_table = utils.build_lookup_from_csv(args['carbon_pools_path'], 'lucode') work_token_dir = os.path.join(intermediate_output_dir, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Synchronous mode. graph = taskgraph.TaskGraph(work_token_dir, n_workers) cell_size_set = set() raster_size_set = set() valid_lulc_keys = [] valid_scenarios = [] tifs_to_summarize = set() # passed to _generate_report() for scenario_type in ['cur', 'fut', 'redd']: lulc_key = "lulc_%s_path" % (scenario_type) if lulc_key in args and args[lulc_key]: raster_info = pygeoprocessing.get_raster_info(args[lulc_key]) cell_size_set.add(raster_info['pixel_size']) raster_size_set.add(raster_info['raster_size']) valid_lulc_keys.append(lulc_key) valid_scenarios.append(scenario_type) if len(cell_size_set) > 1: raise ValueError( "the pixel sizes of %s are not equivalent. Here are the " "different sets that were found in processing: %s" % (valid_lulc_keys, cell_size_set)) if len(raster_size_set) > 1: raise ValueError( "the raster dimensions of %s are not equivalent. Here are the " "different sizes that were found in processing: %s" % (valid_lulc_keys, raster_size_set)) # calculate total carbon storage LOGGER.info('Map all carbon pools to carbon storage rasters.') carbon_map_task_lookup = {} sum_rasters_task_lookup = {} for scenario_type in valid_scenarios: carbon_map_task_lookup[scenario_type] = [] storage_path_list = [] for pool_type in ['c_above', 'c_below', 'c_soil', 'c_dead']: carbon_pool_by_type = dict([ (lucode, float(carbon_pool_table[lucode][pool_type])) for lucode in carbon_pool_table ]) lulc_key = 'lulc_%s_path' % scenario_type storage_key = '%s_%s' % (pool_type, scenario_type) LOGGER.info("Mapping carbon from '%s' to '%s' scenario.", lulc_key, storage_key) carbon_map_task = graph.add_task( _generate_carbon_map, args=(args[lulc_key], carbon_pool_by_type, file_registry[storage_key]), target_path_list=[file_registry[storage_key]], task_name='carbon_map_%s' % storage_key) storage_path_list.append(file_registry[storage_key]) carbon_map_task_lookup[scenario_type].append(carbon_map_task) output_key = 'tot_c_' + scenario_type LOGGER.info("Calculate carbon storage for '%s'", output_key) sum_rasters_task = graph.add_task( _sum_rasters, args=(storage_path_list, file_registry[output_key]), target_path_list=[file_registry[output_key]], dependent_task_list=carbon_map_task_lookup[scenario_type], task_name='sum_rasters_for_total_c_%s' % output_key) sum_rasters_task_lookup[scenario_type] = sum_rasters_task tifs_to_summarize.add(file_registry[output_key]) # calculate sequestration diff_rasters_task_lookup = {} for scenario_type in ['fut', 'redd']: if scenario_type not in valid_scenarios: continue output_key = 'delta_cur_' + scenario_type LOGGER.info("Calculate sequestration scenario '%s'", output_key) storage_path_list = [ file_registry['tot_c_cur'], file_registry['tot_c_' + scenario_type] ] diff_rasters_task = graph.add_task( _diff_rasters, args=(storage_path_list, file_registry[output_key]), target_path_list=[file_registry[output_key]], dependent_task_list=[ sum_rasters_task_lookup['cur'], sum_rasters_task_lookup[scenario_type] ], task_name='diff_rasters_for_%s' % output_key) diff_rasters_task_lookup[scenario_type] = diff_rasters_task tifs_to_summarize.add(file_registry[output_key]) # calculate net present value calculate_npv_tasks = [] if 'do_valuation' in args and args['do_valuation']: LOGGER.info('Constructing valuation formula.') valuation_constant = _calculate_valuation_constant( int(args['lulc_cur_year']), int(args['lulc_fut_year']), float(args['discount_rate']), float(args['rate_change']), float(args['price_per_metric_ton_of_c'])) for scenario_type in ['fut', 'redd']: if scenario_type not in valid_scenarios: continue output_key = 'npv_%s' % scenario_type LOGGER.info("Calculating NPV for scenario '%s'", output_key) calculate_npv_task = graph.add_task( _calculate_npv, args=(file_registry['delta_cur_%s' % scenario_type], valuation_constant, file_registry[output_key]), target_path_list=[file_registry[output_key]], dependent_task_list=[diff_rasters_task_lookup[scenario_type]], task_name='calculate_%s' % output_key) calculate_npv_tasks.append(calculate_npv_task) tifs_to_summarize.add(file_registry[output_key]) # Report aggregate results tasks_to_report = (list(sum_rasters_task_lookup.values()) + list(diff_rasters_task_lookup.values()) + calculate_npv_tasks) _ = graph.add_task(_generate_report, args=(tifs_to_summarize, args, file_registry), target_path_list=[file_registry['html_report']], dependent_task_list=tasks_to_report, task_name='generate_report') graph.join() for tmp_filename_key in _TMP_BASE_FILES: try: tmp_filename = file_registry[tmp_filename_key] if os.path.exists(tmp_filename): os.remove(tmp_filename) except OSError as os_error: LOGGER.warning( "Can't remove temporary file: %s\nOriginal Exception:\n%s", file_registry[tmp_filename_key], os_error)
def execute(args): """Crop Production Percentile. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Args: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path: * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if bad_crop_name_list: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000 landcover_nodata = landcover_raster_info['nodata'][0] if landcover_nodata is None: LOGGER.warning("%s does not have nodata value defined; " "assuming all pixel values are valid" % args['landcover_raster_path']) # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection_wkt'], wgs84srs.ExportToWkt(), edge_samples=11) # Initialize a TaskGraph work_token_dir = os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) dependent_task_list = [] crop_lucode = None observed_yield_nodata = None for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) crop_climate_bin_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'near'), kwargs={'target_bb': landcover_wgs84_bounding_box}, target_path_list=[clipped_climate_bin_raster_path], task_name='crop_climate_bin') dependent_task_list.append(crop_climate_bin_task) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True) yield_percentile_headers = [ x for x in list(crop_climate_percentile_table.values())[0] if x != 'climate_bin' ] reclassify_error_details = { 'raster_name': f'{crop_name} Climate Bin', 'column_name': 'climate_bin', 'table_name': f'Climate {crop_name} Percentile Yield' } for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) # reclassify nodata to a valid value of 0 # we're assuming that the crop doesn't exist where there is no data # this is more likely than assuming the crop does exist, esp. # in the context of the provided climate bins map bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) create_coarse_yield_percentile_task = task_graph.add_task( func=utils.reclassify_raster, args=((clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD, reclassify_error_details), target_path_list=[coarse_yield_percentile_raster_path], dependent_task_list=[crop_climate_bin_task], task_name='create_coarse_yield_percentile_%s_%s' % (crop_name, yield_percentile_id)) dependent_task_list.append(create_coarse_yield_percentile_task) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) create_interpolated_yield_percentile_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubicspline'), kwargs={ 'target_projection_wkt': landcover_raster_info['projection_wkt'], 'target_bb': landcover_raster_info['bounding_box'] }, target_path_list=[interpolated_yield_percentile_raster_path], dependent_task_list=[create_coarse_yield_percentile_task], task_name='create_interpolated_yield_percentile_%s_%s' % (crop_name, yield_percentile_id)) dependent_task_list.append( create_interpolated_yield_percentile_task) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) create_percentile_production_task = task_graph.add_task( func=calculate_crop_production, args=(args['landcover_raster_path'], interpolated_yield_percentile_raster_path, crop_lucode, pixel_area_ha, percentile_crop_production_raster_path), target_path_list=[percentile_crop_production_raster_path], dependent_task_list=[ create_interpolated_yield_percentile_task ], task_name='create_percentile_production_%s_%s' % (crop_name, yield_percentile_id)) dependent_task_list.append(create_percentile_production_task) LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) clip_global_observed_yield_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'near'), kwargs={'target_bb': landcover_wgs84_bounding_box}, target_path_list=[clipped_observed_yield_raster_path], task_name='clip_global_observed_yield_%s_' % crop_name) dependent_task_list.append(clip_global_observed_yield_task) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) nodata_to_zero_for_observed_yield_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (clipped_observed_yield_raster_path, 1), (observed_yield_nodata, 'raw') ], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata), target_path_list=[zeroed_observed_yield_raster_path], dependent_task_list=[clip_global_observed_yield_task], task_name='nodata_to_zero_for_observed_yield_%s_' % crop_name) dependent_task_list.append(nodata_to_zero_for_observed_yield_task) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) interpolate_observed_yield_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubicspline'), kwargs={ 'target_projection_wkt': landcover_raster_info['projection_wkt'], 'target_bb': landcover_raster_info['bounding_box'] }, target_path_list=[interpolated_observed_yield_raster_path], dependent_task_list=[nodata_to_zero_for_observed_yield_task], task_name='interpolate_observed_yield_to_lulc_%s' % crop_name) dependent_task_list.append(interpolate_observed_yield_task) observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) calculate_observed_production_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1), (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw') ], _mask_observed_yield_op, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata), target_path_list=[observed_production_raster_path], dependent_task_list=[interpolate_observed_yield_task], task_name='calculate_observed_production_%s' % crop_name) dependent_task_list.append(calculate_observed_production_task) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) tabulate_results_task = task_graph.add_task( func=tabulate_results, args=(nutrient_table, yield_percentile_headers, crop_to_landcover_table, pixel_area_ha, args['landcover_raster_path'], landcover_nodata, output_dir, file_suffix, result_table_path), target_path_list=[result_table_path], dependent_task_list=dependent_task_list, task_name='tabulate_results') if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] not in ['', None]): LOGGER.info("aggregating result over query polygon") target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) aggregate_results_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) aggregate_results_task = task_graph.add_task( func=aggregate_to_polygons, args=(args['aggregate_polygon_path'], target_aggregate_vector_path, landcover_raster_info['projection_wkt'], crop_to_landcover_table, nutrient_table, yield_percentile_headers, output_dir, file_suffix, aggregate_results_table_path), target_path_list=[ target_aggregate_vector_path, aggregate_results_table_path ], dependent_task_list=dependent_task_list, task_name='aggregate_results_to_polygons') task_graph.close() task_graph.join()
def main(): """Entry point.""" parser = argparse.ArgumentParser( description='Global carbon regression scenario maker') parser.add_argument( '--bounding_box', type=float, nargs=4, default=[-180, -90, 180, 90], help=( "manual bounding box in the form of four consecutive floats: " "min_lng, min_lat, max_lng, max_lat, ex: " "-180.0, -58.3, 180.0, 81.5")) parser.add_argument( '--keyfile', help='path to keyfile that authorizes bucket access') parser.add_argument( '--n_workers', type=int, default=multiprocessing.cpu_count(), help='how many workers to allocate to taskgraph') args = parser.parse_args() if args.keyfile: subprocess.run( f'/usr/local/gcloud-sdk/google-cloud-sdk/bin/gcloud auth ' f'activate-service-account --key-file={args.keyfile}', shell=True, check=True) for dir_path in [WORKSPACE_DIR, ECOSHARD_DIR, CHURN_DIR, DATA_DIR]: try: os.makedirs(dir_path) except OSError: pass bounding_box_str = ','.join([str(x) for x in args.bounding_box]) clipped_data_dir = os.path.join(DATA_DIR, bounding_box_str) # Step 0: Download data task_graph = taskgraph.TaskGraph(CHURN_DIR, args.n_workers, 5.0) LOGGER.info("Step 0: Download data") fetch_data(args.bounding_box, clipped_data_dir, task_graph) # IPCC Approach # Create carbon stocks for ESA 2014 and restoration scenario rasterize_carbon_zone_task = None ipcc_carbon_scenario_raster_map = {} IPCC_CARBON_DIR = os.path.join(WORKSPACE_DIR, 'ipcc_carbon') try: os.makedirs(IPCC_CARBON_DIR) except OSError: pass for scenario_id, lulc_raster_path in LULC_SCENARIO_RASTER_PATH_MAP.items(): if rasterize_carbon_zone_task is None: rasterized_zones_raster_path = os.path.join( clipped_data_dir, 'carbon_zones.tif') rasterize_carbon_zone_task = task_graph.add_task( func=rasterize_carbon_zones, args=( lulc_raster_path, CARBON_ZONES_VECTOR_PATH, rasterized_zones_raster_path), target_path_list=[rasterized_zones_raster_path], task_name='rasterize carbon zones') zone_lucode_to_carbon_map = parse_carbon_lulc_table( IPCC_CARBON_TABLE_PATH) ipcc_carbon_scenario_raster_map[scenario_id] = os.path.join( IPCC_CARBON_DIR, f'ipcc_carbon_{scenario_id}_{bounding_box_str}.tif') # Units are in Mg/Ha but pixel area is in degrees^2 so multiply result # by (111120 m/deg)**2*1 ha / 10000m^2 and C into CO2 # TODO: I can convert this to varying area later if we want conversion_factor = ( pygeoprocessing.get_raster_info( lulc_raster_path)['pixel_size'][0]**2 * 111120**2 * (1/10000) * (15.9992*2+12.011)/12.011) task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( [(lulc_raster_path, 1), (rasterized_zones_raster_path, 1), (zone_lucode_to_carbon_map, 'raw'), (conversion_factor, 'raw')], ipcc_carbon_op, ipcc_carbon_scenario_raster_map[scenario_id], gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA), dependent_task_list=[rasterize_carbon_zone_task], target_path_list=[ipcc_carbon_scenario_raster_map[scenario_id]], task_name=f'''create carbon for { ipcc_carbon_scenario_raster_map[scenario_id]}''') # FOREST REGRESSION # 1) Make convolutions with custom kernel of 1, 2, 3, 5, 10, 20, 30, 50, # and 100 pixels for not_forest (see forest lulc codes), is_cropland # (classes 10-40), and is_urban (class 190) for LULC maps LOGGER.info("Forest Regression step 1") mask_path_task_map = collections.defaultdict(dict) for scenario_id, lulc_scenario_raster_path in \ LULC_SCENARIO_RASTER_PATH_MAP.items(): for mask_type, lulc_codes, inverse_mode in MASK_TYPES: scenario_lulc_mask_raster_path = os.path.join( clipped_data_dir, f'mask_of_{mask_type}_{scenario_id}.tif') mask_task = task_graph.add_task( func=mask_ranges, args=( lulc_scenario_raster_path, lulc_codes, inverse_mode == 'inv', scenario_lulc_mask_raster_path), target_path_list=[scenario_lulc_mask_raster_path], task_name=f'make {mask_type}_{scenario_id}') mask_path_task_map[scenario_id][mask_type] = ( scenario_lulc_mask_raster_path, mask_task) LOGGER.debug( f'this is the scenario lulc mask target: ' f'{scenario_lulc_mask_raster_path}') kernel_raster_path_map = {} for pixel_radius in reversed(sorted(CONVOLUTION_PIXEL_DIST_LIST)): kernel_raster_path = os.path.join( CHURN_DIR, f'{pixel_radius}_kernel.tif') kernel_task = task_graph.add_task( func=make_kernel_raster, args=(pixel_radius, kernel_raster_path), target_path_list=[kernel_raster_path], task_name=f'make kernel of radius {pixel_radius}') kernel_raster_path_map[pixel_radius] = kernel_raster_path convolution_task_list = [] for mask_type in mask_path_task_map[scenario_id]: scenario_mask_path, mask_task = \ mask_path_task_map[scenario_id][mask_type] LOGGER.debug( f'this is the scenario mask about to convolve: ' f'{scenario_mask_path} {mask_task}') convolution_mask_raster_path = os.path.join( clipped_data_dir, f'{scenario_id}_{mask_type}_gs{pixel_radius}.tif') convolution_task = task_graph.add_task( func=pygeoprocessing.convolve_2d, args=( (scenario_mask_path, 1), (kernel_raster_path, 1), convolution_mask_raster_path), dependent_task_list=[mask_task, kernel_task], target_path_list=[convolution_mask_raster_path], task_name=( f'convolve {pixel_radius} {mask_type}_' f'{scenario_id}')) convolution_task_list.append(convolution_task) task_graph.join() # 2) Evalute the forest regression for each scenario LOGGER.info("Forest Regression step 2") mult_by_columns_workspace = os.path.join( WORKSPACE_DIR, 'mult_by_columns_workspace', bounding_box_str) try: os.makedirs(mult_by_columns_workspace) except OSError: pass task_graph.join() FOREST_REGRESSION_RESULT_DIR = os.path.join( WORKSPACE_DIR, 'forest_regression_rasters') try: os.makedirs(FOREST_REGRESSION_RESULT_DIR) except OSError: pass forest_regression_scenario_raster_map = {} for scenario_id, lulc_scenario_raster_path in \ LULC_SCENARIO_RASTER_PATH_MAP.items(): conversion_factor = ( pygeoprocessing.get_raster_info( lulc_scenario_raster_path)['pixel_size'][0]**2 * 111120**2 * (1/10000) * 0.47 * # IPCC value to convert BM to C (15.9992*2+12.011)/12.011) # C into CO2 forest_regression_scenario_raster_map[scenario_id] = os.path.join( FOREST_REGRESSION_RESULT_DIR, f'forest_regression_{scenario_id}_{bounding_box_str}.tif') mult_by_columns_library.mult_by_columns( FOREST_REGRESSION_LASSO_TABLE_PATH, clipped_data_dir, mult_by_columns_workspace, 'lulc_esa_smoothed_2014_10sec', scenario_id, args.bounding_box, TARGET_PIXEL_SIZE, forest_regression_scenario_raster_map[scenario_id], task_graph, zero_nodata_symbols=ZERO_NODATA_SYMBOLS, target_nodata=MULT_BY_COLUMNS_NODATA, conversion_factor=conversion_factor) # NON-FOREST BIOMASS BACCINI_CO2_RESULT_DIR = os.path.join( WORKSPACE_DIR, 'baccini_co2_rasters') try: os.makedirs(BACCINI_CO2_RESULT_DIR) except OSError: pass LOGGER.info('convert baccini non forest into CO2') conversion_factor = ( pygeoprocessing.get_raster_info( lulc_raster_path)['pixel_size'][0]**2 * 111120**2 * (1/10000) * 0.47 * (15.9992*2+12.011)/12.011) # TODO: mult baccini by this conversion factor baccini_nodata = pygeoprocessing.get_raster_info( BACCINI_10s_2014_BIOMASS_RASTER_PATH)['nodata'][0] baccini_co2_raster_path = os.path.join( BACCINI_CO2_RESULT_DIR, f'baccini_co2_{bounding_box_str}.tif') task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( [(BACCINI_10s_2014_BIOMASS_RASTER_PATH, 1), (conversion_factor, 'raw'), (baccini_nodata, 'raw'), (MULT_BY_COLUMNS_NODATA, 'raw')], mult_by_const_op, baccini_co2_raster_path, gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA), target_path_list=[baccini_co2_raster_path], task_name='convert baccini biomass density to co2') task_graph.join() # combine both the non-forest and forest into one map for each # scenario based on their masks regression_carbon_scenario_path_map = {} REGRESSION_TOTAL_DIR = os.path.join(WORKSPACE_DIR, 'regression_total') try: os.makedirs(REGRESSION_TOTAL_DIR) except OSError: pass for scenario_id in LULC_SCENARIO_RASTER_PATH_MAP: regression_carbon_scenario_path_map[scenario_id] = os.path.join( REGRESSION_TOTAL_DIR, f'regression_carbon_{scenario_id}_{bounding_box_str}.tif') task_graph.add_task( func=raster_where, args=( mask_path_task_map[scenario_id]['forest_10sec'][0], forest_regression_scenario_raster_map[scenario_id], baccini_co2_raster_path, regression_carbon_scenario_path_map[scenario_id]), target_path_list=[ regression_carbon_scenario_path_map[scenario_id]], task_name=f'combine forest/nonforest for {scenario_id}') task_graph.join() # SCENARIOS/OPTIMIZATION # 1) Standard approach: the IPCC approach will be applied for ESA 2014 and # to the forest pixels only of a Potential Natural Vegetation (PNV) map. # An IPCC-based marginal value map will be created as the difference # between the two, and pixels selected by the largest marginal value # until the 3 Pg target is reached. # mask ipcc_carbon_scenario_raster_map to forest only from # restoration scenario masked_ipcc_carbon_raster_map = {} ipcc_mask_task_list = [] for scenario_id in LULC_SCENARIO_RASTER_PATH_MAP: masked_ipcc_carbon_raster_map[scenario_id] = os.path.join( WORKSPACE_DIR, f'ipcc_carbon_forest_only_{scenario_id}_{bounding_box_str}.tif') # specifically masking to 'restoration limited' mask_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( [(ipcc_carbon_scenario_raster_map[scenario_id], 1), (mask_path_task_map['restoration_limited']['forest_10sec'][0], 1), (MULT_BY_COLUMNS_NODATA, 'raw'), (MASK_NODATA, 'raw'), (MULT_BY_COLUMNS_NODATA, 'raw')], mult_rasters_op, masked_ipcc_carbon_raster_map[scenario_id], gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA), target_path_list=[masked_ipcc_carbon_raster_map[scenario_id]], task_name=f'mask out forest only ipcc {scenario_id}') ipcc_mask_task_list.append(mask_task) # subtract # masked_ipcc_carbon_raster_map[esa2014] # masked_ipcc_carbon_raster_map[restoration_limited] marginal_value_dir = os.path.join(WORKSPACE_DIR, 'marginal_values') try: os.makedirs(marginal_value_dir) except OSError: pass for marginal_value_id, (target_id, base_id) in MARGINAL_VALUE_MAPS.items(): marginal_value_raster = os.path.join( marginal_value_dir, f'marginal_value_{marginal_value_id}_{bounding_box_str}.tif') task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (masked_ipcc_carbon_raster_map[target_id], 1), (masked_ipcc_carbon_raster_map[base_id], 1), ], sub_pos_op, marginal_value_raster, gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA), dependent_task_list=ipcc_mask_task_list, target_path_list=[marginal_value_raster], task_name=f'make {marginal_value_id} marginal value raster') # TODO: mask out forest from IPCC to have a forest only map # TODO: set up raster calculation to subtract IPCC forest only from # 2) For the regression approach, the forest regression model will be # applied to the forest pixels and the non-forest regression model will # be applied to the non-forest pixels. The regression will also be # applied to the same PNV map for forest pixels only. The difference # between the two will create a regression-based marginal value map. In # this case, because the aim is to select for areas not only of high # marginal value for reforestation but also regeneration, a 30 km # resolution grid will be used to summarize values with edge effects # (since 30 km was the largest scale over which edge effects were seen to # operate). The marginal values will be summed and divided by the # difference in the number of forest pixels between PNV and ESA 2014-this # ratio can be seen as the "efficiency" of intervention in that 30 km # grid cell. Highest efficiency grid cells will be selected first, with # all viable non-forest pixels within them restored, until the 3 Pg # target is reached. # mask the regression rasters masked_regression_carbon_raster_map = {} regression_mask_task_list = [] for scenario_id in LULC_SCENARIO_RASTER_PATH_MAP: masked_regression_carbon_raster_map[scenario_id] = os.path.join( WORKSPACE_DIR, f'regression_carbon_forest_only_{scenario_id}_' f'{bounding_box_str}.tif') # specifically masking to 'restoration limited' mask_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=( [(regression_carbon_scenario_path_map[scenario_id], 1), (mask_path_task_map['restoration_limited']['forest_10sec'][0], 1), (MULT_BY_COLUMNS_NODATA, 'raw'), (MASK_NODATA, 'raw'), (MULT_BY_COLUMNS_NODATA, 'raw')], mult_rasters_op, masked_regression_carbon_raster_map[scenario_id], gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA), target_path_list=[ masked_regression_carbon_raster_map[scenario_id]], task_name=f'mask out forest only regression {scenario_id}') regression_mask_task_list.append(mask_task) regression_carbon_marginal_value_raster = os.path.join( marginal_value_dir, f'marginal_value_regression_{bounding_box_str}.tif') task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (masked_regression_carbon_raster_map['restoration_limited'], 1), (masked_regression_carbon_raster_map['esa2014'], 1), ], sub_pos_op, regression_carbon_marginal_value_raster, gdal.GDT_Float32, MULT_BY_COLUMNS_NODATA), dependent_task_list=regression_mask_task_list, target_path_list=[regression_carbon_marginal_value_raster], task_name='make regression marginal value raster') task_graph.close() task_graph.join()