def test_transform_bounding_box(self): """PyGeoprocessing: test bounding box transform.""" import pygeoprocessing vector_extent = [ 440446.6938076447695494, 4800590.4052893081679940, 606196.6938076447695494, 5087540.4052893081679940 ] expected_extents = [ -123.76825632966793, 43.350664712678984, -121.63016515055192, 45.941400531740214 ] # test from UTM 10N to WGS84 base_ref = osr.SpatialReference() base_ref.ImportFromEPSG(26910) new_ref = osr.SpatialReference() new_ref.ImportFromEPSG(4326) actual_extents = pygeoprocessing.transform_bounding_box( vector_extent, base_ref.ExportToWkt(), new_ref.ExportToWkt(), edge_samples=11) numpy.testing.assert_array_almost_equal(expected_extents, actual_extents)
def check_spatial_overlap(spatial_filepaths_list, different_projections_ok=False): """Check that the given spatial files spatially overlap. Args: spatial_filepaths_list (list): A list of files that can be opened with GDAL. Must be on the local filesystem. different_projections_ok=False (bool): Whether it's OK for the input spatial files to have different projections. If ``True``, all projections will be converted to WGS84 before overlap is checked. Returns: A string error message if an error is found. ``None`` otherwise. """ wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) wgs84_wkt = wgs84_srs.ExportToWkt() bounding_boxes = [] checked_file_list = [] for filepath in spatial_filepaths_list: try: info = pygeoprocessing.get_raster_info(filepath) except ValueError: info = pygeoprocessing.get_vector_info(filepath) if info['projection_wkt'] is None: return f'Spatial file {filepath} has no projection' if different_projections_ok: bounding_box = pygeoprocessing.transform_bounding_box( info['bounding_box'], info['projection_wkt'], wgs84_wkt) else: bounding_box = info['bounding_box'] if all([numpy.isinf(coord) for coord in bounding_box]): LOGGER.warning('Skipping infinite bounding box for file %s', filepath) continue bounding_boxes.append(bounding_box) checked_file_list.append(filepath) try: pygeoprocessing.merge_bounding_box_list(bounding_boxes, 'intersection') except ValueError as error: LOGGER.debug(error) formatted_lists = ' | '.join([ a + ': ' + str(b) for a, b in zip(checked_file_list, bounding_boxes) ]) message = f"Bounding boxes do not intersect: {formatted_lists}" return message return None
def execute(args): """Crop Production Percentile. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Args: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path: * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take place in the current process. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if bad_crop_name_list: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000 landcover_nodata = landcover_raster_info['nodata'][0] if landcover_nodata is None: LOGGER.warning("%s does not have nodata value defined; " "assuming all pixel values are valid" % args['landcover_raster_path']) # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection_wkt'], wgs84srs.ExportToWkt(), edge_samples=11) # Initialize a TaskGraph work_token_dir = os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) dependent_task_list = [] crop_lucode = None observed_yield_nodata = None for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) crop_climate_bin_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'near'), kwargs={'target_bb': landcover_wgs84_bounding_box}, target_path_list=[clipped_climate_bin_raster_path], task_name='crop_climate_bin') dependent_task_list.append(crop_climate_bin_task) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True) yield_percentile_headers = [ x for x in list(crop_climate_percentile_table.values())[0] if x != 'climate_bin' ] reclassify_error_details = { 'raster_name': f'{crop_name} Climate Bin', 'column_name': 'climate_bin', 'table_name': f'Climate {crop_name} Percentile Yield' } for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) # reclassify nodata to a valid value of 0 # we're assuming that the crop doesn't exist where there is no data # this is more likely than assuming the crop does exist, esp. # in the context of the provided climate bins map bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) create_coarse_yield_percentile_task = task_graph.add_task( func=utils.reclassify_raster, args=((clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD, reclassify_error_details), target_path_list=[coarse_yield_percentile_raster_path], dependent_task_list=[crop_climate_bin_task], task_name='create_coarse_yield_percentile_%s_%s' % (crop_name, yield_percentile_id)) dependent_task_list.append(create_coarse_yield_percentile_task) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) create_interpolated_yield_percentile_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubicspline'), kwargs={ 'target_projection_wkt': landcover_raster_info['projection_wkt'], 'target_bb': landcover_raster_info['bounding_box'] }, target_path_list=[interpolated_yield_percentile_raster_path], dependent_task_list=[create_coarse_yield_percentile_task], task_name='create_interpolated_yield_percentile_%s_%s' % (crop_name, yield_percentile_id)) dependent_task_list.append( create_interpolated_yield_percentile_task) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) create_percentile_production_task = task_graph.add_task( func=calculate_crop_production, args=(args['landcover_raster_path'], interpolated_yield_percentile_raster_path, crop_lucode, pixel_area_ha, percentile_crop_production_raster_path), target_path_list=[percentile_crop_production_raster_path], dependent_task_list=[ create_interpolated_yield_percentile_task ], task_name='create_percentile_production_%s_%s' % (crop_name, yield_percentile_id)) dependent_task_list.append(create_percentile_production_task) LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) clip_global_observed_yield_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'near'), kwargs={'target_bb': landcover_wgs84_bounding_box}, target_path_list=[clipped_observed_yield_raster_path], task_name='clip_global_observed_yield_%s_' % crop_name) dependent_task_list.append(clip_global_observed_yield_task) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) nodata_to_zero_for_observed_yield_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (clipped_observed_yield_raster_path, 1), (observed_yield_nodata, 'raw') ], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata), target_path_list=[zeroed_observed_yield_raster_path], dependent_task_list=[clip_global_observed_yield_task], task_name='nodata_to_zero_for_observed_yield_%s_' % crop_name) dependent_task_list.append(nodata_to_zero_for_observed_yield_task) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) interpolate_observed_yield_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubicspline'), kwargs={ 'target_projection_wkt': landcover_raster_info['projection_wkt'], 'target_bb': landcover_raster_info['bounding_box'] }, target_path_list=[interpolated_observed_yield_raster_path], dependent_task_list=[nodata_to_zero_for_observed_yield_task], task_name='interpolate_observed_yield_to_lulc_%s' % crop_name) dependent_task_list.append(interpolate_observed_yield_task) observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) calculate_observed_production_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1), (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw') ], _mask_observed_yield_op, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata), target_path_list=[observed_production_raster_path], dependent_task_list=[interpolate_observed_yield_task], task_name='calculate_observed_production_%s' % crop_name) dependent_task_list.append(calculate_observed_production_task) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) tabulate_results_task = task_graph.add_task( func=tabulate_results, args=(nutrient_table, yield_percentile_headers, crop_to_landcover_table, pixel_area_ha, args['landcover_raster_path'], landcover_nodata, output_dir, file_suffix, result_table_path), target_path_list=[result_table_path], dependent_task_list=dependent_task_list, task_name='tabulate_results') if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] not in ['', None]): LOGGER.info("aggregating result over query polygon") target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) aggregate_results_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) aggregate_results_task = task_graph.add_task( func=aggregate_to_polygons, args=(args['aggregate_polygon_path'], target_aggregate_vector_path, landcover_raster_info['projection_wkt'], crop_to_landcover_table, nutrient_table, yield_percentile_headers, output_dir, file_suffix, aggregate_results_table_path), target_path_list=[ target_aggregate_vector_path, aggregate_results_table_path ], dependent_task_list=dependent_task_list, task_name='aggregate_results_to_polygons') task_graph.close() task_graph.join()
def execute(args): """Crop Production Regression Model. This model will take a landcover (crop cover?), N, P, and K map and produce modeled yields, and a nutrient table. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_regression_yield_tables/[cropname]_* A ValueError is raised if strings don't match. args['fertilization_rate_table_path'] (string): path to CSV table that contains fertilization rates for the crops in the simulation, though it can contain additional crops not used in the simulation. The headers must be 'crop_name', 'nitrogen_rate', 'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the name string used to identify crops in the 'landcover_to_crop_table_path', and rates are in units kg/Ha. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ LOGGER.info( "Calculating total land area and warning if the landcover raster " "is missing lucodes") crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_to_fertlization_rate_table = utils.build_lookup_from_csv( args['fertilization_rate_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_lucodes = [ x[_EXPECTED_LUCODE_TABLE_HEADER] for x in crop_to_landcover_table.itervalues() ] unique_lucodes = numpy.array([]) total_area = 0.0 for _, lu_band_data in pygeoprocessing.iterblocks( args['landcover_raster_path']): unique_block = numpy.unique(lu_band_data) unique_lucodes = numpy.unique( numpy.concatenate((unique_lucodes, unique_block))) total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD)) missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes)) if len(missing_lucodes) > 0: LOGGER.warn( "The following lucodes are in the landcover to crop table but " "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): raise ValueError( "Expected climate bin map called %s for crop %s " "specified in %s", crop_climate_bin_raster_path, crop_name, args['landcover_to_crop_table_path']) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) crop_regression_table_path = os.path.join( args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) crop_regression_table = utils.build_lookup_from_csv( crop_regression_table_path, 'climate_bin', to_lower=True, numerical_cast=True, warn_if_missing=False) for bin_id in crop_regression_table: for header in _EXPECTED_REGRESSION_TABLE_HEADERS: if crop_regression_table[bin_id][header.lower()] == '': crop_regression_table[bin_id][header.lower()] = 0.0 yield_regression_headers = [ x for x in crop_regression_table.itervalues().next() if x != 'climate_bin' ] clipped_climate_bin_raster_path_info = ( pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path)) regression_parameter_raster_path_lookup = {} for yield_regression_id in yield_regression_headers: # there are extra headers in that table if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS: continue LOGGER.info("Map %s to climate bins.", yield_regression_id) regression_parameter_raster_path_lookup[yield_regression_id] = ( os.path.join( output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix))) bin_to_regression_value = dict([ (bin_id, crop_regression_table[bin_id][yield_regression_id]) for bin_id in crop_regression_table ]) bin_to_regression_value[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_regression_parameter_raster_path = os.path.join( output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_regression_value, coarse_regression_parameter_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info("Interpolate %s %s parameter to landcover resolution.", crop_name, yield_regression_id) pygeoprocessing.warp_raster( coarse_regression_parameter_raster_path, landcover_raster_info['pixel_size'], regression_parameter_raster_path_lookup[yield_regression_id], 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) # the regression model has identical mathematical equations for # the nitrogen, phosporous, and potassium. The only difference is # the scalars in the equation. So making a closure below to simplify # this coding so I don't repeat the same function 3 times for 3 # almost identical raster_calculator calls. def _x_yield_op_gen(fert_rate): """Create a raster calc op given the fertlization rate.""" def _x_yield_op(y_max, b_x, c_x, lulc_array): """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))""" result = numpy.empty(b_x.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) & (lulc_array == crop_lucode)) result[valid_mask] = y_max[valid_mask] * ( 1 - b_x[valid_mask] * numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha) return result return _x_yield_op LOGGER.info('Calc nitrogen yield') nitrogen_yield_raster_path = os.path.join( output_dir, _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']), nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc phosphorous yield') phosphorous_yield_raster_path = os.path.join( output_dir, _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name] ['phosphorous_rate']), phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc potassium yield') potassium_yield_raster_path = os.path.join( output_dir, _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['potassium_rate']), potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc the min of N, K, and P') crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) def _min_op(y_n, y_p, y_k): """Calculate the min of the three inputs and multiply by Ymax.""" result = numpy.empty(y_n.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) & (y_p != _NODATA_YIELD)) result[valid_mask] = (numpy.min( [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0)) return result pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1), (phosphorous_yield_raster_path, 1), (potassium_yield_raster_path, 1)], _min_op, crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) nutrient_headers = [ nutrient_id + '_' + mode for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for mode in ['modeled', 'observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,production_modeled,' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( crop_production_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup['modeled'] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: total_nutrient = (nutrient_factor * production_lookup['modeled'] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) LOGGER.info("Calculating zonal stats for %s", crop_name) crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_modeled' % crop_name] = (pygeoprocessing.zonal_statistics( (crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_modeled' % crop_name]: total_nutrient_table[nutrient_id]['modeled'][id_index] += ( nutrient_factor * total_yield_lookup['%s_modeled' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def execute(args): """Crop Production Percentile Model. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if len(bad_crop_name_list) > 0: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True, numerical_cast=True) yield_percentile_headers = [ x for x in crop_climate_percentile_table.itervalues().next() if x != 'climate_bin' ] for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) pygeoprocessing.warp_raster( coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) def _crop_production_op(lulc_array, yield_array): """Mask in yields that overlap with `crop_lucode`.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha) return result pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_yield_percentile_raster_path, 1)], _crop_production_op, percentile_crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop, assuming that # all the percentile rasters have non-zero production so it's okay to # use just one of the percentile rasters LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( percentile_crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) production_percentile_headers = [ 'production_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for yield_percentile_id in sorted(yield_percentile_headers) ] nutrient_headers = [ nutrient_id + '_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for yield_percentile_id in sorted(yield_percentile_headers) + ['yield_observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,' + ','.join(production_percentile_headers) + ',' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) for yield_percentile_id in sorted(yield_percentile_headers): yield_percentile_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( yield_percentile_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup[yield_percentile_id] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for yield_percentile_id in sorted(yield_percentile_headers): total_nutrient = (nutrient_factor * production_lookup[yield_percentile_id] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += ( nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def do_inference_worker(model, quad_offset_queue, quad_file_path_queue, inference_lock): """Calculate inference on data coming in on the URI_TO_PROCESS_LIST. Other notable global variable is QUAD_AVAILBLE_EVENT that's an event for waiting for new work that gets set when new works is recieved. Args: model (keras model): model used for bounding box prediction quad_offset_queue (queue): send to queue for quad processing quad_file_path_queue (queue): used for recieving quads that need to be inferenced. inference_lock (threading.Lock): used to ensure one shot of inference goes at a time. Returns: never """ global HEALTHY try: wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) subprocess_result = None while True: QUAD_AVAILBLE_EVENT.wait(5.0) if not URI_TO_PROCESS_LIST: continue start_time = time.time() quad_uri = URI_TO_PROCESS_LIST.pop() QUAD_URI_TO_STATUS_MAP[quad_uri] = 'processing' quad_raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(quad_uri)) LOGGER.info('download ' + quad_uri + ' to ' + quad_raster_path) subprocess_result = subprocess.run( '/usr/local/gcloud-sdk/google-cloud-sdk/bin/gsutil cp ' '"%s" %s' % (quad_uri, quad_raster_path), check=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) quad_info = pygeoprocessing.get_raster_info(quad_raster_path) n_cols, n_rows = quad_info['raster_size'] quad_id = os.path.basename(os.path.splitext(quad_raster_path)[0]) quad_slice_index = 0 non_max_supression_box_list = [] LOGGER.info('schedule clip of %s', quad_id) for xoff in range(0, n_cols, TRAINING_IMAGE_DIMS[0]): win_xsize = TRAINING_IMAGE_DIMS[0] if xoff + win_xsize >= n_cols: xoff = n_cols - win_xsize - 1 for yoff in range(0, n_rows, TRAINING_IMAGE_DIMS[1]): win_ysize = TRAINING_IMAGE_DIMS[1] if yoff + win_ysize >= n_rows: yoff = n_rows - win_ysize - 1 quad_png_path = os.path.join( WORKSPACE_DIR, '%s_%d.png' % (quad_id, quad_slice_index)) quad_slice_index += 1 quad_offset_queue.put((quad_png_path, quad_raster_path, xoff, yoff, win_xsize, win_ysize)) LOGGER.info('schedule inference of %s', quad_id) box_score_tuple_list = [] with inference_lock: while quad_slice_index > 0: quad_slice_index -= 1 xoff, yoff, scale, image = quad_file_path_queue.get() result = model.predict_on_batch(image) # correct boxes for image scale boxes, scores, labels = result boxes /= scale # convert box to a list from a numpy array and score to a # value from a single element array box_score_tuple_list.extend([([ box[0] + xoff, box[1] + yoff, box[2] + xoff, box[3] + yoff ], score) for box, score in zip(boxes[0], scores[0]) if score > 0.3]) # quad is now processed, it can be removed os.remove(quad_raster_path) while box_score_tuple_list: box, score = box_score_tuple_list.pop() shapely_box = shapely.geometry.box(*box) keep = True # this list makes a copy for test_box, test_score in list(box_score_tuple_list): shapely_test_box = shapely.geometry.box(*test_box) if shapely_test_box.intersects(shapely_box): if test_score > score: # keep the new one keep = False break if keep: non_max_supression_box_list.append(box) #quad_png_path = '%s.png' % os.path.splitext(quad_raster_path)[0] # make_quad_png( # quad_raster_path, quad_png_path, 0, 0, None, None) # render_bounding_boxes(non_max_supression_box_list, quad_png_path) lat_lng_bb_list = [] for bounding_box in non_max_supression_box_list: local_coord_bb = [] for offset in [0, 2]: coords = list( gdal.ApplyGeoTransform(quad_info['geotransform'], bounding_box[0 + offset], bounding_box[1 + offset])) local_coord_bb.extend(coords) transformed_bb = pygeoprocessing.transform_bounding_box( local_coord_bb, quad_info['projection_wkt'], wgs84_srs.ExportToWkt()) lat_lng_bb_list.append(transformed_bb) QUAD_URI_TO_STATUS_MAP[quad_uri] = lat_lng_bb_list LOGGER.info('done processing quad %s took %ss', quad_raster_path, str(time.time() - start_time)) if len(URI_TO_PROCESS_LIST) == 0: QUAD_AVAILBLE_EVENT.clear() except Exception: LOGGER.exception('error occured on inference worker') if subprocess_result: LOGGER.error(subprocess_result) QUAD_URI_TO_STATUS_MAP[quad_uri] = 'error' HEALTHY = False raise
def execute(args): """Crop Production Regression. This model will take a landcover (crop cover?), N, P, and K map and produce modeled yields, and a nutrient table. Args: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_regression_yield_tables/[cropname]_* A ValueError is raised if strings don't match. args['fertilization_rate_table_path'] (string): path to CSV table that contains fertilization rates for the crops in the simulation, though it can contain additional crops not used in the simulation. The headers must be 'crop_name', 'nitrogen_rate', 'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the name string used to identify crops in the 'landcover_to_crop_table_path', and rates are in units kg/Ha. args['aggregate_polygon_path'] (string): path to polygon vector that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path: * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) # Initialize a TaskGraph work_token_dir = os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir') try: n_workers = int(args['n_workers']) except (KeyError, ValueError, TypeError): # KeyError when n_workers is not present in args # ValueError when n_workers is an empty string. # TypeError when n_workers is None. n_workers = -1 # Single process mode. task_graph = taskgraph.TaskGraph(work_token_dir, n_workers) dependent_task_list = [] LOGGER.info("Checking if the landcover raster is missing lucodes") crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True) crop_to_fertlization_rate_table = utils.build_lookup_from_csv( args['fertilization_rate_table_path'], 'crop_name', to_lower=True) crop_lucodes = [ x[_EXPECTED_LUCODE_TABLE_HEADER] for x in crop_to_landcover_table.values() ] unique_lucodes = numpy.array([]) for _, lu_band_data in pygeoprocessing.iterblocks( (args['landcover_raster_path'], 1)): unique_block = numpy.unique(lu_band_data) unique_lucodes = numpy.unique( numpy.concatenate((unique_lucodes, unique_block))) missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes)) if len(missing_lucodes) > 0: LOGGER.warning( "The following lucodes are in the landcover to crop table but " "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): raise ValueError( "Expected climate bin map called %s for crop %s " "specified in %s", crop_climate_bin_raster_path, crop_name, args['landcover_to_crop_table_path']) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] if landcover_nodata is None: LOGGER.warning("%s does not have nodata value defined; " "assuming all pixel values are valid" % args['landcover_raster_path']) # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection_wkt'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) crop_climate_bin_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'near'), kwargs={'target_bb': landcover_wgs84_bounding_box}, target_path_list=[clipped_climate_bin_raster_path], task_name='crop_climate_bin') dependent_task_list.append(crop_climate_bin_task) crop_regression_table_path = os.path.join( args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) crop_regression_table = utils.build_lookup_from_csv( crop_regression_table_path, 'climate_bin', to_lower=True) for bin_id in crop_regression_table: for header in _EXPECTED_REGRESSION_TABLE_HEADERS: if crop_regression_table[bin_id][header.lower()] == '': crop_regression_table[bin_id][header.lower()] = 0.0 yield_regression_headers = [ x for x in list(crop_regression_table.values())[0] if x != 'climate_bin' ] reclassify_error_details = { 'raster_name': f'{crop_name} Climate Bin', 'column_name': 'climate_bin', 'table_name': f'Climate {crop_name} Regression Yield' } regression_parameter_raster_path_lookup = {} for yield_regression_id in yield_regression_headers: # there are extra headers in that table if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS: continue LOGGER.info("Map %s to climate bins.", yield_regression_id) regression_parameter_raster_path_lookup[yield_regression_id] = ( os.path.join( output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix))) bin_to_regression_value = dict([ (bin_id, crop_regression_table[bin_id][yield_regression_id]) for bin_id in crop_regression_table ]) bin_to_regression_value[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_regression_parameter_raster_path = os.path.join( output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix)) create_coarse_regression_parameter_task = task_graph.add_task( func=utils.reclassify_raster, args=((clipped_climate_bin_raster_path, 1), bin_to_regression_value, coarse_regression_parameter_raster_path, gdal.GDT_Float32, _NODATA_YIELD, reclassify_error_details), target_path_list=[coarse_regression_parameter_raster_path], dependent_task_list=[crop_climate_bin_task], task_name='create_coarse_regression_parameter_%s_%s' % (crop_name, yield_regression_id)) dependent_task_list.append(create_coarse_regression_parameter_task) LOGGER.info("Interpolate %s %s parameter to landcover resolution.", crop_name, yield_regression_id) create_interpolated_parameter_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(coarse_regression_parameter_raster_path, landcover_raster_info['pixel_size'], regression_parameter_raster_path_lookup[ yield_regression_id], 'cubicspline'), kwargs={ 'target_projection_wkt': landcover_raster_info['projection_wkt'], 'target_bb': landcover_raster_info['bounding_box'] }, target_path_list=[ regression_parameter_raster_path_lookup[ yield_regression_id] ], dependent_task_list=[create_coarse_regression_parameter_task], task_name='create_interpolated_parameter_%s_%s' % (crop_name, yield_regression_id)) dependent_task_list.append(create_interpolated_parameter_task) LOGGER.info('Calc nitrogen yield') nitrogen_yield_raster_path = os.path.join( output_dir, _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix)) calc_nitrogen_yield_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1), (crop_to_fertlization_rate_table[crop_name]['nitrogen_rate'], 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw') ], _x_yield_op, nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD), target_path_list=[nitrogen_yield_raster_path], dependent_task_list=dependent_task_list, task_name='calculate_nitrogen_yield_%s' % crop_name) LOGGER.info('Calc phosphorous yield') phosphorous_yield_raster_path = os.path.join( output_dir, _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix)) calc_phosphorous_yield_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1), (crop_to_fertlization_rate_table[crop_name] ['phosphorous_rate'], 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw') ], _x_yield_op, phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD), target_path_list=[phosphorous_yield_raster_path], dependent_task_list=dependent_task_list, task_name='calculate_phosphorous_yield_%s' % crop_name) LOGGER.info('Calc potassium yield') potassium_yield_raster_path = os.path.join( output_dir, _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix)) calc_potassium_yield_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1), (crop_to_fertlization_rate_table[crop_name]['potassium_rate'], 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw') ], _x_yield_op, potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD), target_path_list=[potassium_yield_raster_path], dependent_task_list=dependent_task_list, task_name='calculate_potassium_yield_%s' % crop_name) dependent_task_list.extend( (calc_nitrogen_yield_task, calc_phosphorous_yield_task, calc_potassium_yield_task)) LOGGER.info('Calc the min of N, K, and P') crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) calc_min_NKP_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(nitrogen_yield_raster_path, 1), (phosphorous_yield_raster_path, 1), (potassium_yield_raster_path, 1) ], _min_op, crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD), target_path_list=[crop_production_raster_path], dependent_task_list=dependent_task_list, task_name='calc_min_of_NKP') dependent_task_list.append(calc_min_NKP_task) LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) clip_global_observed_yield_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'near'), kwargs={'target_bb': landcover_wgs84_bounding_box}, target_path_list=[clipped_observed_yield_raster_path], task_name='clip_global_observed_yield_%s_' % crop_name) dependent_task_list.append(clip_global_observed_yield_task) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) nodata_to_zero_for_observed_yield_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([ (clipped_observed_yield_raster_path, 1), (observed_yield_nodata, 'raw') ], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata), target_path_list=[zeroed_observed_yield_raster_path], dependent_task_list=[clip_global_observed_yield_task], task_name='nodata_to_zero_for_observed_yield_%s_' % crop_name) dependent_task_list.append(nodata_to_zero_for_observed_yield_task) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) interpolate_observed_yield_task = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubicspline'), kwargs={ 'target_projection_wkt': landcover_raster_info['projection_wkt'], 'target_bb': landcover_raster_info['bounding_box'] }, target_path_list=[interpolated_observed_yield_raster_path], dependent_task_list=[nodata_to_zero_for_observed_yield_task], task_name='interpolate_observed_yield_to_lulc_%s' % crop_name) dependent_task_list.append(interpolate_observed_yield_task) observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) calculate_observed_production_task = task_graph.add_task( func=pygeoprocessing.raster_calculator, args=([(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1), (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw') ], _mask_observed_yield_op, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata), target_path_list=[observed_production_raster_path], dependent_task_list=[interpolate_observed_yield_task], task_name='calculate_observed_production_%s' % crop_name) dependent_task_list.append(calculate_observed_production_task) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) _ = task_graph.add_task(func=tabulate_regression_results, args=(nutrient_table, crop_to_landcover_table, pixel_area_ha, args['landcover_raster_path'], landcover_nodata, output_dir, file_suffix, result_table_path), target_path_list=[result_table_path], dependent_task_list=dependent_task_list, task_name='tabulate_results') if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] not in ['', None]): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) aggregate_results_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) _ = task_graph.add_task( func=aggregate_regression_results_to_polygons, args=(args['aggregate_polygon_path'], target_aggregate_vector_path, landcover_raster_info['projection_wkt'], crop_to_landcover_table, nutrient_table, output_dir, file_suffix, aggregate_results_table_path), target_path_list=[ target_aggregate_vector_path, aggregate_results_table_path ], dependent_task_list=dependent_task_list, task_name='aggregate_results_to_polygons') task_graph.close() task_graph.join()
def single_run_ndr(watershed_basename, watershed_fid, bucket_uri_prefix, scenario_id, error_queue): """Run a single instance of NDR.""" try: LOGGER.debug('running %s %d', watershed_basename, watershed_fid) # create local workspace ws_prefix = '%s_%d' % (watershed_basename, watershed_fid) local_workspace = os.path.join(WORKSPACE_DIR, ws_prefix) try: os.makedirs(local_workspace) except OSError: LOGGER.exception('unable to create %s', local_workspace) # extract the watershed to workspace/data watershed_root_path = os.path.join( ECOSHARD_DIR, 'watersheds_globe_HydroSHEDS_15arcseconds_' 'blake2b_14ac9c77d2076d51b0258fd94d9378d4', 'watersheds_globe_HydroSHEDS_15arcseconds', '%s.shp' % watershed_basename) epsg_srs = get_utm_epsg_srs(watershed_root_path, watershed_fid) local_watershed_path = os.path.join(local_workspace, '%s.gpkg' % ws_prefix) # the dem is in lat/lng and is also a big set of tiles. Make a # VRT which is the bounds of the lat/lng of the watershed and # use that as the dem path argument watershed_vector = gdal.OpenEx(watershed_root_path, gdal.OF_VECTOR) watershed_layer = watershed_vector.GetLayer() watershed_feature = watershed_layer.GetFeature(watershed_fid) watershed_geom = watershed_feature.GetGeometryRef() x1, x2, y1, y2 = watershed_geom.GetEnvelope() watershed_geom = None watershed_feature = None watershed_layer = None watershed_vector = None watershed_bounding_box = [ min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2) ] vrt_options = gdal.BuildVRTOptions(outputBounds=(min(x1, x2) - 0.1, min(y1, y2) - 0.1, max(x1, x2) + 0.1, max(y1, y2) + 0.1)) dem_dir_path = os.path.join(PATH_MAP['dem_path'], 'global_dem_3s') dem_vrt_path = os.path.join( dem_dir_path, '%s_%s_vrt.vrt' % (watershed_basename, watershed_fid)) gdal.BuildVRT(dem_vrt_path, glob.glob(os.path.join(dem_dir_path, '*.tif')), options=vrt_options) wgs84_sr = osr.SpatialReference() wgs84_sr.ImportFromEPSG(4326) target_bounding_box = pygeoprocessing.transform_bounding_box( watershed_bounding_box, wgs84_sr.ExportToWkt(), epsg_srs.ExportToWkt()) reproject_geometry_to_target(watershed_root_path, watershed_fid, epsg_srs.ExportToWkt(), local_watershed_path) args = { 'workspace_dir': local_workspace, 'dem_path': dem_vrt_path, 'lulc_path': PATH_MAP[scenario_id]['lulc_path'], 'runoff_proxy_path': PATH_MAP['precip_path'], 'ag_load_path': PATH_MAP[scenario_id]['fertilizer_path'], 'watersheds_path': local_watershed_path, 'biophysical_table_path': (PATH_MAP['biophysical_table_path']), 'calc_n': True, 'calc_p': False, 'results_suffix': '', 'threshold_flow_accumulation': (GLOBAL_NDR_ARGS['threshold_flow_accumulation']), 'k_param': GLOBAL_NDR_ARGS['k_param'], 'n_workers': -1, 'target_sr_wkt': epsg_srs.ExportToWkt(), 'target_pixel_size': TARGET_PIXEL_SIZE, 'target_bounding_box': target_bounding_box } inspring.ndr.ndr.execute(args) zipfile_path = '%s.zip' % ws_prefix LOGGER.debug("zipping %s to %s", args['workspace_dir'], zipfile_path) zipdir(args['workspace_dir'], zipfile_path) zipfile_s3_uri = ( "%s/%s/%s" % (bucket_uri_prefix, scenario_id, os.path.basename(zipfile_path))) subprocess.run([ "/usr/local/bin/aws s3 cp %s %s" % (zipfile_path, zipfile_s3_uri) ], shell=True, check=True) shutil.rmtree(args['workspace_dir']) os.remove(dem_vrt_path) # strip off the "s3://" part of the uri prefix bucket_id, bucket_subdir = re.match('s3://([^/]*)/(.*)', bucket_uri_prefix).groups() workspace_url = ('https://%s.s3-us-west-1.amazonaws.com/' '%s/%s/%s' % (bucket_id, bucket_subdir, scenario_id, os.path.basename(zipfile_path))) os.remove(zipfile_path) try: head_request = requests.head(workspace_url) if not head_request: raise RuntimeError( "something bad happened when checking if url " "workspace was live: %s %s" % (workspace_url, str(head_request))) except ConnectionError: LOGGER.exception('a connection error when checking live url ' 'workspace') raise except Exception: LOGGER.exception('something bad happened when running ndr') error_queue.put(traceback.format_exc()) raise
"""Demo some clipping.""" import logging import pygeoprocessing logging.basicConfig( level=logging.DEBUG, format=( '%(asctime)s (%(relativeCreated)d) %(processName)s %(levelname)s ' '%(name)s [%(funcName)s:%(lineno)d] %(message)s')) LOGGER = logging.getLogger(__name__) if __name__ == '__main__': raster_path = '../session2/DEM_md5_53d4998eec75d803a318fafd28c40a3e.tif' aoi_vector_path = './session2/aoi.gpkg' raster_info = pygeoprocessing.get_raster_info(raster_path) vector_info = pygeoprocessing.get_vector_info(aoi_vector_path) raster_projected_bounding_box = pygeoprocessing.transform_bounding_box( vector_info['bounding_box'], vector_info['projection_wkt'], raster_info['projection_wkt']) target_clipped_raster_path = 'DEM_clip.tif' pygeoprocessing.warp_raster( raster_path, raster_info['pixel_size'], target_clipped_raster_path, 'near', target_bb=raster_projected_bounding_box)
def postprocessing_worker( postprocessing_queue, country_borders_vector_path, work_database_path, grid_done_queue): """Get detected images, annotate them, and stick them in the db.""" try: while True: payload = postprocessing_queue.get() if payload == 'STOP': postprocessing_queue.put('STOP') break grid_id, boxes, scores, image_path, xoff, yoff, quad_info = payload non_max_supression_box_list = [] # convert box to a list from a numpy array and score to a value # from a single element array box_score_tuple_list = [ (list(box), score) for box, score in zip(boxes[0], scores[0]) if score > 0.3] while box_score_tuple_list: box, score = box_score_tuple_list.pop() shapely_box = shapely.geometry.box(*box) keep = True # this list makes a copy for test_box, test_score in list(box_score_tuple_list): shapely_test_box = shapely.geometry.box(*test_box) if shapely_test_box.intersects(shapely_box): if test_score > score: # keep the new one keep = False break if keep: non_max_supression_box_list.append((box, score)) if not non_max_supression_box_list: # no dams detected os.remove(image_path) grid_done_queue.put((grid_id, -1)) continue # if non_max_supression_box_list: # LOGGER.debug('found %d dams', len(non_max_supression_box_list)) # raw_image = read_image_bgr(image_path) # for box, score in non_max_supression_box_list: # detected_box = shapely.geometry.box(*box) # color = (255, 102, 179) # draw_box(raw_image, detected_box.bounds, color, 1) # draw_caption(raw_image, detected_box.bounds, str(score)) # cv2.imwrite(image_path, raw_image) # else: # # no dams detected # os.remove(image_path) # grid_done_queue.put((grid_id, -1)) # continue # transform local bbs so they're relative to the png lng_lat_score_list = [] for bounding_box, score in non_max_supression_box_list: global_bounding_box = [ bounding_box[0]+xoff, bounding_box[1]+yoff, bounding_box[2]+xoff, bounding_box[3]+yoff] # convert to lat/lng geotransform = quad_info['geotransform'] x_a, y_a = [x for x in gdal.ApplyGeoTransform( geotransform, global_bounding_box[0], global_bounding_box[1])] x_b, y_b = [x for x in gdal.ApplyGeoTransform( geotransform, global_bounding_box[2], global_bounding_box[3])] x_min, x_max = sorted([x_a, x_b]) y_min, y_max = sorted([y_a, y_b]) x_y_bounding_box = [ x_min, y_min, x_max, y_max] LOGGER.debug('original bounding box: %s', bounding_box) LOGGER.debug('xoff: %s yoff: %s', xoff, yoff) LOGGER.debug('global_bounding_box: %s', global_bounding_box) LOGGER.debug('xy bounding box: %s', x_y_bounding_box) lng_lat_bounding_box = \ pygeoprocessing.transform_bounding_box( x_y_bounding_box, quad_info['projection'], WGS84_WKT) LOGGER.debug('lng_lat_bounding_box: %s', lng_lat_bounding_box) # get country intersection list shapely_box = shapely.geometry.box( *lng_lat_bounding_box) country_intersection_list = \ get_country_intersection_list( shapely_box, country_borders_vector_path) lng_lat_score_list.append(( lng_lat_bounding_box + [ float(score), ','.join(country_intersection_list), image_path])) # upload .pngs to bucket this is old code but i want to keep it # try: # quad_uri = ( # 'gs://natgeo-dams-data/detected_dam_data/' # 'annotated_imagery/%s' % os.path.basename( # image_path)) # subprocess.run( # 'gsutil mv %s %s' # % (image_path, quad_uri), shell=True, # check=True) # except subprocess.CalledProcessError: # LOGGER.warning( # 'file might already exist -- not uploading') # if os.path.exists(image_path): # os.remove(image_path) _execute_sqlite( """ INSERT INTO detected_dams (lng_min, lat_min, lng_max, lat_max, probability, country_list, image_uri) VALUES (?, ?, ?, ?, ?, ?, ?) """, work_database_path, argument_list=lng_lat_score_list, mode='modify', execute='many') grid_done_queue.put((grid_id, -1)) try: os.remove(image_path) except Exception: LOGGER.exception( "couldn't remove %s after postprocessing", image_path) except Exception: LOGGER.exception('error occured') raise
def process_quad(quad_uri, quad_id, dams_database_path): """Process quad into bounding box annotated chunks. Parameters: quad_uri (str): gs:// path to quad to download. quad_id (str): ID in the database so work can be updated. dams_database_path (str): path to the database that can be updated to include the processing state complete and the quad processed. Returns: True when complete. """ task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1) quad_raster_path = os.path.join(TRAINING_IMAGERY_DIR, os.path.basename(quad_uri)) download_quad_task = task_graph.add_task( func=copy_from_gs, args=(quad_uri, quad_raster_path), target_path_list=[quad_raster_path], task_name='download %s' % quad_uri) download_quad_task.join() quad_info = pygeoprocessing.get_raster_info(quad_raster_path) n_cols, n_rows = quad_info['raster_size'] # extract the bounding boxes bb_srs = osr.SpatialReference() bb_srs.ImportFromEPSG(4326) bounding_box_blob_list = _execute_sqlite(''' SELECT bounding_box FROM quad_bounding_box_uri_table WHERE quad_id=? ''', dams_database_path, argument_list=[quad_id], fetch='all') working_dam_bb_list = [] # will be used to collapose duplicates later for index, (bounding_box_blob, ) in enumerate(bounding_box_blob_list): bounding_box = pickle.loads(bounding_box_blob) LOGGER.debug('%s: %s', quad_uri, bounding_box) local_bb = pygeoprocessing.transform_bounding_box( bounding_box, bb_srs.ExportToWkt(), quad_info['projection'], edge_samples=11) inv_gt = gdal.InvGeoTransform(quad_info['geotransform']) ul_i, ul_j = [ int(x) for x in gdal.ApplyGeoTransform(inv_gt, local_bb[0], local_bb[1]) ] lr_i, lr_j = [ int(x) for x in gdal.ApplyGeoTransform(inv_gt, local_bb[2], local_bb[3]) ] ul_i, lr_i = sorted([ul_i, lr_i]) ul_j, lr_j = sorted([ul_j, lr_j]) # possible the dam may lie outside of the quad, if so clip to the # edge of the quad if ul_j < 0: ul_j = 0 if ul_i < 0: ul_i = 0 if lr_i >= n_cols: lr_i = n_cols - 1 if lr_j >= n_rows: lr_j = n_rows - 1 # if < 0.5 ratio, bump up to 0.5 ratio bb_xsize = max(1, lr_i - ul_i) bb_ysize = max(1, lr_j - ul_j) if bb_xsize / bb_ysize < 0.5: delta_xsize = max(2, 0.5 * bb_ysize - bb_xsize) ul_i -= delta_xsize / 2 lr_i += delta_xsize / 2 elif bb_ysize / bb_xsize < 0.5: delta_ysize = max(2, 0.5 * bb_xsize - bb_ysize) ul_j -= delta_ysize / 2 lr_j += delta_ysize / 2 dam_bb = [ul_i, ul_j, lr_i, lr_j] # this is a sanity check if ul_i >= n_cols or ul_j >= n_rows or lr_i < 0 or lr_j < 0: raise ValueError( 'transformed coordinates outside of raster bounds: ' 'lat/lng: %s\nlocal: %sraster_bb: %s\ntransformed: %s' % (bounding_box, local_bb, quad_info['bounding_box'], dam_bb)) working_dam_bb_list.append(dam_bb) bounding_box_rtree = rtree.index.Index() index_to_bb_list = [] while working_dam_bb_list: current_bb = shapely.geometry.box(*working_dam_bb_list.pop()) for index in range(len(working_dam_bb_list) - 1, -1, -1): test_bb = shapely.geometry.box(*working_dam_bb_list[index]) if current_bb.intersects(test_bb): current_bb = current_bb.union(test_bb) del working_dam_bb_list[index] LOGGER.debug('going to insert this: %s', str((len(index_to_bb_list), current_bb.bounds))) bounding_box_rtree.insert(len(index_to_bb_list), current_bb.bounds) index_to_bb_list.append(current_bb.bounds) quad_slice_index = 0 annotation_string_list = [] for xoff in range(0, n_cols, TRAINING_IMAGE_DIMS[0]): win_xsize = TRAINING_IMAGE_DIMS[0] if xoff + win_xsize >= n_cols: xoff = n_cols - win_xsize - 1 for yoff in range(0, n_rows, TRAINING_IMAGE_DIMS[1]): win_ysize = TRAINING_IMAGE_DIMS[1] if yoff + win_ysize >= n_rows: yoff = n_rows - win_ysize - 1 bb_indexes = list( bounding_box_rtree.intersection( (xoff, yoff, xoff + win_xsize, yoff + win_ysize))) # see if any of the bounding boxes intersect in which case make # a single big one if bb_indexes: LOGGER.debug('these local bbs at %d %d: %s', xoff, yoff, str(bb_indexes)) # clip out the png and name after number of bbs per image quad_png_path = os.path.join( TRAINING_IMAGERY_DIR, '%d_%s_%d.png' % (len(bb_indexes), quad_id, quad_slice_index)) quad_slice_index += 1 try: make_quad_png(quad_raster_path, quad_png_path, xoff, yoff, win_xsize, win_ysize) # transform local bbs so they're relative to the png for bb_index in bb_indexes: base_bb = list(index_to_bb_list[bb_index]) # if the centroid is out of bounds, go with the other # quad that contains it bb_xcentroid = base_bb[0] + (base_bb[2] - base_bb[0]) / 2 bb_ycentroid = base_bb[1] + (base_bb[3] - base_bb[1]) / 2 if (bb_xcentroid - xoff < 0 or bb_xcentroid - xoff >= TRAINING_IMAGE_DIMS[0] or bb_ycentroid - yoff < 0 or bb_ycentroid - yoff >= TRAINING_IMAGE_DIMS[1]): continue # make sure it's not tiny if base_bb[2] - base_bb[0] < 16: delta = 16 - (base_bb[2] - base_bb[0]) base_bb[0] -= delta // 2 base_bb[2] += delta // 2 if base_bb[3] - base_bb[1] < 16: delta = 16 - (base_bb[3] - base_bb[1]) base_bb[1] -= delta // 2 base_bb[3] += delta // 2 base_bb[0] = max(0, base_bb[0] - xoff) base_bb[1] = max(0, base_bb[1] - yoff) base_bb[2] = \ min(TRAINING_IMAGE_DIMS[0], base_bb[2]-xoff) base_bb[3] = \ min(TRAINING_IMAGE_DIMS[1], base_bb[3]-yoff) annotation_string_list.append([ '%s,%d,%d,%d,%d,dam' % (quad_png_path, base_bb[0], base_bb[1], base_bb[2], base_bb[3]) ]) except Exception: LOGGER.exception('skipping %s' % quad_raster_path) LOGGER.debug('updating annotation table with this: %s', str(annotation_string_list)) _execute_sqlite(''' INSERT OR REPLACE INTO annotation_table (record) VALUES (?); ''', dams_database_path, argument_list=annotation_string_list, execute='many', mode='modify') _execute_sqlite(''' UPDATE quad_processing_status SET processed=1 WHERE quad_id=? ''', dams_database_path, argument_list=[quad_id], mode='modify') task_graph.join() task_graph.close() os.remove(quad_raster_path)
def fetch_quad(session, quad_database_path, mosaic_id, quad_id, cache_dir, to_copy_queue, global_report_queue, grid_id): """Fetch quad from planet DB. Args: session (Session): session object to use for authentication quad_database_path (str): path to quad datbase mosaic_id (str): Planet mosaic ID to search for quad_id (str): Planet quad ID in the given mosaic to fetch cache_dir (str): path to directory to write temporary files in to_copy_queue (Queue): put 'OK' here when done with processing global_report_queue (Queue): used to report of the quad need not be downloaded. grid_id (str): unique id to report to global report queue if quad is predownloaded. Returns: None. """ try: LOGGER.debug(f'fetching {quad_id}') count = _execute_sqlite(''' SELECT count(quad_id) FROM quad_cache_table WHERE quad_id=?; ''', quad_database_path, argument_list=[quad_id], fetch='one') LOGGER.debug(f'result of count query {count}') if count[0] > 0: LOGGER.debug('already fetched %s', quad_id) global_report_queue.put(grid_id) return get_quad_url = (f'https://api.planet.com/basemaps/v1/mosaics/' f'{mosaic_id}/quads/{quad_id}') quads_json = session.get(get_quad_url, timeout=REQUEST_TIMEOUT) download_url = (quads_json.json())['_links']['download'] local_quad_path = os.path.join(cache_dir, '%s.tif' % quad_id) quad_uri = ('gs://natgeo-dams-data/cached-planet-quads/%s' % os.path.basename(local_quad_path)) LOGGER.debug(f'download {download_url} to {local_quad_path}') ecoshard.download_url(download_url, local_quad_path) local_quad_info = pygeoprocessing.get_raster_info(local_quad_path) lng_lat_bb = pygeoprocessing.transform_bounding_box( local_quad_info['bounding_box'], local_quad_info['projection_wkt'], WGS84_WKT) sqlite_update_variables = [] sqlite_update_variables.append(quad_id) sqlite_update_variables.extend(lng_lat_bb) sqlite_update_variables.append( # file size in bytes pathlib.Path(local_quad_path).stat().st_size) sqlite_update_variables.append(quad_uri) LOGGER.debug(f'put {quad_uri} to copy from {sqlite_update_variables}') to_copy_queue.put((local_quad_path, quad_uri, sqlite_update_variables)) except Exception: LOGGER.exception('error on quad %s' % quad_id) raise
base_ref.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) target_ref.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) # Create a coordinate transformation transformer = osr.CreateCoordinateTransformation(base_ref, target_ref) back_transformer = osr.CreateCoordinateTransformation(target_ref, base_ref) trans_x, trans_y, _ = transformer.TransformPoint(args.lng, args.lat) print(f'({trans_x}, {trans_y})') back_lng, back_lat, _ = back_transformer.TransformPoint(trans_x, trans_y) print(f'({back_lat}, {back_lng})') world_borders_vector = gdal.OpenEx( 'TM_WORLD_BORDERS-0.3_simplified_md5_47f2059be8d4016072aa6abe77762021.gpkg', gdal.OF_VECTOR) world_borders_layer = world_borders_vector.GetLayer() for country_feature in world_borders_layer: if country_feature.GetField( "NAME").lower() == args.country_name.lower(): country_geometry = country_feature.GetGeometryRef() country_bb = [ country_geometry.GetEnvelope()[i] for i in [0, 2, 1, 3] ] print(country_bb) transformbb = pygeoprocessing.transform_bounding_box( country_bb, osr.SRS_WKT_WGS84_LAT_LONG, world_eckert_iv_wkt, edge_samples=11) print(f'transformbb: {transformbb}')
def main(): """Entry point.""" parser = argparse.ArgumentParser(description='People Travel Coverage') parser.add_argument('--population_key', required=True, help='population ecoshard key to simulate') parser.add_argument('--max_travel_time', required=True, type=float, help='travel time in minutes') parser.add_argument('--pixel_size_m', required=True, type=float, help='pixel size in meters') parser.add_argument('--countries', type=str, nargs='+', help='comma separated list of countries to simulate') args = parser.parse_args() population_key = args.population_key max_travel_time = args.max_travel_time for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR]: os.makedirs(dir_path, exist_ok=True) task_graph = taskgraph.TaskGraph(CHURN_DIR, multiprocessing.cpu_count() // 4, 5.0) ecoshard_path_map = {} for ecoshard_id, ecoshard_url in RASTER_ECOSHARD_URL_MAP.items(): ecoshard_path = os.path.join(ECOSHARD_DIR, os.path.basename(ecoshard_url)) _ = task_graph.add_task(func=ecoshard.download_url, args=(ecoshard_url, ecoshard_path), target_path_list=[ecoshard_path], task_name=f'fetch {ecoshard_url}') ecoshard_path_map[ecoshard_id] = ecoshard_path task_graph.join() target_population_density_raster_path = os.path.join( CHURN_DIR, f'density_{os.path.basename(ecoshard_path_map[population_key])}') population_density_task = task_graph.add_task( func=create_population_density, args=(ecoshard_path_map[population_key], target_population_density_raster_path), target_path_list=[target_population_density_raster_path], task_name=f'create population density for {population_key}') population_density_task.join() ecoshard_path_map[population_key] = target_population_density_raster_path world_borders_vector = gdal.OpenEx(ecoshard_path_map['world_borders'], gdal.OF_VECTOR) world_borders_layer = world_borders_vector.GetLayer() area_fid_list = [] world_eckert_iv_wkt = """PROJCRS["unknown", BASEGEOGCRS["GCS_unknown", DATUM["World Geodetic System 1984", ELLIPSOID["WGS 84",6378137,298.257223563, LENGTHUNIT["metre",1]], ID["EPSG",6326]], PRIMEM["Greenwich",0, ANGLEUNIT["Degree",0.0174532925199433]]], CONVERSION["unnamed", METHOD["Eckert IV"], PARAMETER["Longitude of natural origin",0, ANGLEUNIT["Degree",0.0174532925199433], ID["EPSG",8802]], PARAMETER["False easting",0, LENGTHUNIT["metre",1], ID["EPSG",8806]], PARAMETER["False northing",0, LENGTHUNIT["metre",1], ID["EPSG",8807]]], CS[Cartesian,2], AXIS["(E)",east, ORDER[1], LENGTHUNIT["metre",1, ID["EPSG",9001]]], AXIS["(N)",north, ORDER[2], LENGTHUNIT["metre",1, ID["EPSG",9001]]]]""" for country_feature in world_borders_layer: country_name = country_feature.GetField('NAME') if country_name in SKIP_THESE_COUNTRIES: LOGGER.debug('skipping %s', country_name) continue country_geom = country_feature.GetGeometryRef() LOGGER.debug(country_name) country_geom = country_feature.GetGeometryRef() area_fid_list.append( (country_geom.GetArea(), world_eckert_iv_wkt, country_name, country_feature.GetFID())) world_borders_layer.ResetReading() allowed_country_set = None if args.countries is not None: allowed_country_set = set([name.lower() for name in args.countries]) people_access_path_list = [] normalized_people_access_path_list = [] for country_index, (country_area, target_wkt, country_name, country_fid) in enumerate( sorted(area_fid_list, reverse=True)): # put the index on there so we can see which one is done first if args.countries is not None and (country_name.lower() not in allowed_country_set): continue country_workspace = os.path.join(COUNTRY_WORKSPACE_DIR, f'{country_index}_{country_name}') os.makedirs(country_workspace, exist_ok=True) base_raster_path_list = [ ecoshard_path_map['friction_surface'], ecoshard_path_map[population_key], ecoshard_path_map['habitat_mask'], ] # swizzle so it's xmin, ymin, xmax, ymax country_feature = world_borders_layer.GetFeature(country_fid) LOGGER.debug(f'country name: {country_feature.GetField("NAME")}') country_geometry = country_feature.GetGeometryRef() country_bb = [country_geometry.GetEnvelope()[i] for i in [0, 2, 1, 3]] # make sure the bounding coordinates snap to pixel grid in global coords LOGGER.debug(f'lat/lng country_bb: {country_bb}') target_bounding_box = pygeoprocessing.transform_bounding_box( country_bb, world_borders_layer.GetSpatialRef().ExportToWkt(), target_wkt, edge_samples=11) # make sure the bounding coordinates snap to pixel grid target_bounding_box[0] -= target_bounding_box[0] % TARGET_CELL_LENGTH_M target_bounding_box[1] -= target_bounding_box[1] % TARGET_CELL_LENGTH_M target_bounding_box[2] += target_bounding_box[2] % TARGET_CELL_LENGTH_M target_bounding_box[3] += target_bounding_box[3] % TARGET_CELL_LENGTH_M LOGGER.debug(f'projected country_bb: {target_bounding_box}') sinusoidal_friction_path = os.path.join( country_workspace, f'{country_name}_friction.tif') sinusoidal_population_path = os.path.join( country_workspace, f'{country_name}_population_{population_key}.tif') sinusoidal_hab_path = os.path.join( country_workspace, f'sinusoidal_{country_name}_hab.tif') sinusoidal_raster_path_list = [ sinusoidal_friction_path, sinusoidal_population_path, sinusoidal_hab_path ] projection_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=(base_raster_path_list, sinusoidal_raster_path_list, ['near'] * len(base_raster_path_list), (TARGET_CELL_LENGTH_M, -TARGET_CELL_LENGTH_M), target_bounding_box), kwargs={ 'target_projection_wkt': world_eckert_iv_wkt, 'vector_mask_options': { 'mask_vector_path': ecoshard_path_map['world_borders'], 'mask_vector_where_filter': f'"fid"={country_fid}' } }, target_path_list=sinusoidal_raster_path_list, task_name=f'project and clip rasters for {country_name}') people_access_path = os.path.join( country_workspace, f'people_access_{country_name}_{population_key}_{max_travel_time}m.tif' ) normalized_people_access_path = os.path.join( country_workspace, f'norm_people_access_{country_name}_{max_travel_time}m.tif') _ = task_graph.add_task( func=people_access, args=(country_name, sinusoidal_friction_path, sinusoidal_population_path, sinusoidal_hab_path, max_travel_time, people_access_path, normalized_people_access_path), target_path_list=[ people_access_path, normalized_people_access_path ], dependent_task_list=[projection_task], task_name='calculating people access for %s' % country_name) people_access_path_list.append((people_access_path, 1)) normalized_people_access_path_list.append( (normalized_people_access_path, 1)) LOGGER.debug('create target global population layers') # warp population layer to target projection warped_pop_raster_path = os.path.join( WORKSPACE_DIR, f"warped_{os.path.basename(ecoshard_path_map[population_key])}") _ = task_graph.add_task( func=pygeoprocessing.warp_raster, args=(ecoshard_path_map[population_key], (TARGET_CELL_LENGTH_M, -TARGET_CELL_LENGTH_M), warped_pop_raster_path, 'near'), kwargs={ 'target_projection_wkt': world_eckert_iv_wkt, 'target_bb': [-16921202.923, -8460601.461, 16921797.077, 8461398.539], 'working_dir': WORKSPACE_DIR }, target_path_list=[warped_pop_raster_path], task_name=f'warp {warped_pop_raster_path}') task_graph.close() task_graph.join() # create access and normalized access paths target_people_global_access_path = os.path.join( WORKSPACE_DIR, f'global_people_access_{population_key}_{max_travel_time}m.tif') pygeoprocessing.new_raster_from_base(warped_pop_raster_path, target_people_global_access_path, gdal.GDT_Float32, [-1]) target_normalized_people_global_access_path = os.path.join( WORKSPACE_DIR, f'global_normalized_people_access_{population_key}_{max_travel_time}m.tif' ) pygeoprocessing.new_raster_from_base( warped_pop_raster_path, target_normalized_people_global_access_path, gdal.GDT_Float32, [-1]) pygeoprocessing.stitch_rasters(people_access_path_list, ['near'] * len(people_access_path_list), (target_people_global_access_path, 1), overlap_algorithm='etch') people_global_access_raster = gdal.OpenEx(target_people_global_access_path, gdal.OF_RASTER | gdal.GA_Update) people_global_access_band = people_global_access_raster.GetRasterBand(1) people_global_access_band.ComputeStatistics(0) people_global_access_band = None pygeoprocessing.stitch_rasters( normalized_people_access_path_list, ['near'] * len(normalized_people_access_path_list), (target_normalized_people_global_access_path, 1), overlap_algorithm='etch') normalized_people_global_access_raster = gdal.OpenEx( target_normalized_people_global_access_path, gdal.OF_RASTER | gdal.GA_Update) normalized_people_global_access_band = normalized_people_global_access_raster.GetRasterBand( 1) normalized_people_global_access_band.ComputeStatistics(0) normalized_people_global_access_band = None
def process_watershed(job_id, watershed_vector_path, watershed_fid, dem_path, hab_path, pop_raster_path_list, target_beneficiaries_path_list, target_normalized_beneficiaries_path_list, target_hab_normalized_beneficiaries_path_list, target_stitch_work_queue_list): """Calculate downstream beneficiaries for this watershed. Args: job_id (str): unique ID identifying this job, can be used to create unique workspaces. watershed_vector_path (str): path to watershed vector watershed_fid (str): watershed FID to process dem_path (str): path to DEM raster hab_path (str): path to habitat mask raster pop_raster_path_list (list): list of population rasters to route target_beneficiaries_path_list (str): list of target downstream beneficiary rasters to create, parallel with `pop_raster_path_list`. target_normalized_beneficiaries_path_list (list): list of target normalized downstream beneficiary rasters, parallel with other lists. target_hab_normalized_beneficiaries_path_list (list): list of target hab normalized downstream beneficiary rasters, parallel with other lists. target_stitch_work_queue_list (list): list of work queue tuples to put done signals in when each beneficiary raster is done. The first element is for the standard target, the second for the normalized raster. Return: None. """ working_dir = os.path.join( os.path.dirname(target_beneficiaries_path_list[0])) os.makedirs(working_dir, exist_ok=True) LOGGER.debug(f'create working directory for {job_id} at {working_dir}') task_graph = taskgraph.TaskGraph(working_dir, -1) watershed_info = pygeoprocessing.get_vector_info(watershed_vector_path) watershed_vector = gdal.OpenEx(watershed_vector_path, gdal.OF_VECTOR) watershed_layer = watershed_vector.GetLayer() watershed_feature = watershed_layer.GetFeature(watershed_fid) watershed_geom = watershed_feature.GetGeometryRef() watershed_centroid = watershed_geom.Centroid() utm_code = (math.floor((watershed_centroid.GetX() + 180) / 6) % 60) + 1 lat_code = 6 if watershed_centroid.GetY() > 0 else 7 epsg_code = int('32%d%02d' % (lat_code, utm_code)) epsg_sr = osr.SpatialReference() epsg_sr.ImportFromEPSG(epsg_code) watershed_envelope = watershed_geom.GetEnvelope() # swizzle the envelope order that by default is xmin/xmax/ymin/ymax lat_lng_watershed_bb = [watershed_envelope[i] for i in [0, 2, 1, 3]] target_watershed_bb = pygeoprocessing.transform_bounding_box( lat_lng_watershed_bb, watershed_info['projection_wkt'], epsg_sr.ExportToWkt()) watershed_vector = None watershed_layer = None watershed_feature = None watershed_geom = None watershed_centroid = None watershed_envelope = None target_pixel_size = (300, -300) warped_dem_raster_path = os.path.join(working_dir, f'{job_id}_dem.tif') warped_habitat_raster_path = os.path.join(working_dir, f'{job_id}_hab.tif') align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=([dem_path, hab_path], [warped_dem_raster_path, warped_habitat_raster_path], ['near', 'mode'], target_pixel_size, target_watershed_bb), kwargs={ 'target_projection_wkt': epsg_sr.ExportToWkt(), 'vector_mask_options': { 'mask_vector_path': watershed_vector_path, 'mask_vector_where_filter': f'"FID"={watershed_fid}' }, }, target_path_list=[warped_dem_raster_path, warped_habitat_raster_path], task_name=( f'align and clip and warp dem/hab to {warped_dem_raster_path} ' f'{warped_habitat_raster_path}')) filled_dem_raster_path = os.path.join(working_dir, f'{job_id}_filled_dem.tif') fill_pits_task = task_graph.add_task( func=pygeoprocessing.routing.fill_pits, args=((warped_dem_raster_path, 1), filled_dem_raster_path), kwargs={ 'working_dir': working_dir, 'max_pixel_fill_count': 1000000 }, dependent_task_list=[align_task], target_path_list=[filled_dem_raster_path], task_name=f'fill dem pits to {filled_dem_raster_path}') flow_dir_mfd_raster_path = os.path.join(working_dir, f'{job_id}_flow_dir_mfd.tif') flow_dir_mfd_task = task_graph.add_task( func=pygeoprocessing.routing.flow_dir_mfd, args=((filled_dem_raster_path, 1), flow_dir_mfd_raster_path), kwargs={'working_dir': working_dir}, dependent_task_list=[fill_pits_task], target_path_list=[flow_dir_mfd_raster_path], task_name=f'calc flow dir for {flow_dir_mfd_raster_path}') outlet_vector_path = os.path.join(working_dir, f'{job_id}_outlet_vector.gpkg') detect_outlets_task = task_graph.add_task( func=pygeoprocessing.routing.detect_outlets, args=((flow_dir_mfd_raster_path, 1), 'mfd', outlet_vector_path), dependent_task_list=[flow_dir_mfd_task], target_path_list=[outlet_vector_path], task_name=f'detect outlets {outlet_vector_path}') outlet_raster_path = os.path.join(working_dir, f'{job_id}_outlet_raster.tif') create_outlet_raster_task = task_graph.add_task( func=_create_outlet_raster, args=(outlet_vector_path, flow_dir_mfd_raster_path, outlet_raster_path), dependent_task_list=[detect_outlets_task], target_path_list=[outlet_raster_path], task_name=f'create outlet raster {outlet_raster_path}') flow_accum_mfd_raster_path = os.path.join(working_dir, f'{job_id}_flow_accum.tif') flow_accum_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=((flow_dir_mfd_raster_path, 1), flow_accum_mfd_raster_path), dependent_task_list=[flow_dir_mfd_task], target_path_list=[flow_accum_mfd_raster_path], task_name=f'calc upstream flow area for {flow_accum_mfd_raster_path}') hab_upstream_area_raster_path = os.path.join(working_dir, f'{job_id}_hab_upstream.tif') hab_upstream_task = task_graph.add_task( func=pygeoprocessing.routing.flow_accumulation_mfd, args=((flow_dir_mfd_raster_path, 1), hab_upstream_area_raster_path), kwargs={'weight_raster_path_band': (warped_habitat_raster_path, 1)}, dependent_task_list=[flow_dir_mfd_task], target_path_list=[hab_upstream_area_raster_path], task_name=( f'calc upstream hab area for {hab_upstream_area_raster_path}')) for (pop_raster_path, target_beneficiaries_path, target_normalized_beneficiaries_path, target_hab_normalized_beneficiaries_path, stitch_queue_tuple) in zip( pop_raster_path_list, target_beneficiaries_path_list, target_normalized_beneficiaries_path_list, target_hab_normalized_beneficiaries_path_list, target_stitch_work_queue_list): LOGGER.debug(f'processing {target_beneficiaries_path} and normalized') aligned_pop_raster_path = os.path.join( working_dir, f'''{job_id}_{os.path.basename( os.path.splitext(pop_raster_path)[0])}.tif''') pop_warp_task = task_graph.add_task( func=_warp_and_wgs84_area_scale, args=(pop_raster_path, warped_dem_raster_path, aligned_pop_raster_path, 'near', lat_lng_watershed_bb, watershed_vector_path, watershed_fid, working_dir), dependent_task_list=[align_task], target_path_list=[aligned_pop_raster_path], task_name=f'align {aligned_pop_raster_path}') downstream_bene_task = task_graph.add_task( func=pygeoprocessing.routing.distance_to_channel_mfd, args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1), target_beneficiaries_path), kwargs={'weight_raster_path_band': (aligned_pop_raster_path, 1)}, dependent_task_list=[ pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task ], target_path_list=[target_beneficiaries_path], task_name=('calc downstream beneficiaries for ' f'{target_beneficiaries_path}')) # divide aligned_pop_raster_path by flow accum to get normalized then # route it downstream pop_normal_by_upstream_raster_path = '%s_norm%s' % os.path.splitext( aligned_pop_raster_path) normalize_by_dist_task = task_graph.add_task( func=normalize, args=(aligned_pop_raster_path, flow_accum_mfd_raster_path, pop_normal_by_upstream_raster_path), dependent_task_list=[flow_accum_task, align_task], target_path_list=[pop_normal_by_upstream_raster_path], task_name=(f'normalized beneficiaries for ' f'{pop_normal_by_upstream_raster_path}')) prescaled_normalized_beneficiaries_path = ( '%s_prescaled%s' % os.path.splitext(target_normalized_beneficiaries_path)) downstream_norm_bene_task = task_graph.add_task( func=pygeoprocessing.routing.distance_to_channel_mfd, args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1), prescaled_normalized_beneficiaries_path), kwargs={ 'weight_raster_path_band': (pop_normal_by_upstream_raster_path, 1) }, dependent_task_list=[ pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task, normalize_by_dist_task ], target_path_list=[prescaled_normalized_beneficiaries_path], task_name=('calc downstream normalized beneficiaries for ' f'{prescaled_normalized_beneficiaries_path}')) task_graph.add_task( func=rescale_by_base, args=(aligned_pop_raster_path, prescaled_normalized_beneficiaries_path, target_normalized_beneficiaries_path), target_path_list=[target_normalized_beneficiaries_path], dependent_task_list=[downstream_norm_bene_task], task_name=f'rescale {target_normalized_beneficiaries_path}') # divide aligned_pop_raster_path by hab accum to get normalized by # hab then route it downstream pop_hab_normal_by_upstream_raster_path = ( '%s_hab_norm%s' % os.path.splitext(aligned_pop_raster_path)) normalize_by_dist_task = task_graph.add_task( func=normalize, args=(aligned_pop_raster_path, hab_upstream_area_raster_path, pop_hab_normal_by_upstream_raster_path), dependent_task_list=[hab_upstream_task, align_task], target_path_list=[pop_hab_normal_by_upstream_raster_path], task_name=(f'normalized beneficiaries for ' f'{pop_hab_normal_by_upstream_raster_path}')) hab_pre_mask_normalized_beneficiaries_path = ( '%s_pre_mask%s' % os.path.splitext(target_hab_normalized_beneficiaries_path)) downstream_norm_hab_bene_task = task_graph.add_task( func=pygeoprocessing.routing.distance_to_channel_mfd, args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1), hab_pre_mask_normalized_beneficiaries_path), kwargs={ 'weight_raster_path_band': (pop_hab_normal_by_upstream_raster_path, 1) }, dependent_task_list=[ pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task, normalize_by_dist_task ], target_path_list=[hab_pre_mask_normalized_beneficiaries_path], task_name=('calc downstream normalized beneficiaries for ' f'{hab_pre_mask_normalized_beneficiaries_path}')) # mask this result to the target prescaled_hab_normalized_beneficiaries_path = ( '%s_prescaled%s' % os.path.splitext(target_hab_normalized_beneficiaries_path)) mask_downstream_norm_bene_task = task_graph.add_task( func=_mask_raster, args=(hab_pre_mask_normalized_beneficiaries_path, warped_habitat_raster_path, prescaled_hab_normalized_beneficiaries_path), dependent_task_list=[downstream_norm_hab_bene_task, align_task], target_path_list=[prescaled_hab_normalized_beneficiaries_path], task_name=f'mask {prescaled_hab_normalized_beneficiaries_path}') task_graph.add_task( func=rescale_by_base, args=(aligned_pop_raster_path, prescaled_hab_normalized_beneficiaries_path, target_hab_normalized_beneficiaries_path), target_path_list=[target_hab_normalized_beneficiaries_path], dependent_task_list=[mask_downstream_norm_bene_task], task_name=f'rescale {target_hab_normalized_beneficiaries_path}') task_graph.join() stitch_queue_tuple[0].put( (target_beneficiaries_path, working_dir, job_id)) stitch_queue_tuple[1].put( (target_normalized_beneficiaries_path, working_dir, job_id)) stitch_queue_tuple[2].put( (target_hab_normalized_beneficiaries_path, working_dir, job_id)) task_graph.close() task_graph.join() task_graph = None
def main(): """Entry point.""" parser = argparse.ArgumentParser( description=('Search for matching rasters to stitch into one big ' 'raster.')) parser.add_argument('--target_projection_epsg', required=True, help='EPSG code of target projection') parser.add_argument( '--target_cell_size', required=True, help=('A single float indicating the desired square pixel size of ' 'the stitched raster.')) parser.add_argument( '--resample_method', default='near', help=('One of near|bilinear|cubic|cubicspline|lanczos|average|mode|max' 'min|med|q1|q3')) parser.add_argument('--target_raster_path', required=True, help='Path to target raster.') parser.add_argument('--raster_list', nargs='+', help='List of rasters or wildcards to stitch.') parser.add_argument( '--raster_pattern', nargs=2, help=('Recursive directory search for raster pattern such that ' 'the first argument is the directory to search and the second ' 'is the filename pattern.')) parser.add_argument( '--overlap_algorithm', default='replace', help=('can be one of etch|replace|add, default is replace')) parser.add_argument( '--_n_limit', type=int, help=('limit the number of stitches to this number, default is to ' 'stitch all found rasters')) parser.add_argument( '--area_weight_m2_to_wgs84', action='store_true', help=('if true, rescales values to be proportional to area change ' 'for wgs84 coordinates')) args = parser.parse_args() if not args.raster_list != args.raster_pattern: raise ValueError( 'only one of --raster_list or --raster_pattern must be ' 'specified: \n' f'args.raster_list={args.raster_list}\n' f'args.raster_pattern={args.raster_pattern}\n') LOGGER.info('searching for matching files') if args.raster_list: raster_path_list = list(raster_path for raster_glob in args.raster_list for raster_path in glob.glob(raster_glob)) else: base_dir = args.raster_pattern[0] file_pattern = args.raster_pattern[1] LOGGER.info(f'searching {base_dir} for {file_pattern}') raster_path_list = list( itertools.islice((raster_path for walk_info in os.walk(base_dir) for raster_path in glob.glob( os.path.join(walk_info[0], file_pattern))), 0, args._n_limit)) LOGGER.info(f'found {len(raster_path_list)} files that matched') target_projection = osr.SpatialReference() target_projection.ImportFromEPSG(int(args.target_projection_epsg)) if len(raster_path_list) == 0: raise RuntimeError( f'no rasters were found with the pattern "{file_pattern}"') LOGGER.info('calculating target bounding box') target_bounding_box_list = [] raster_path_set = set() for raster_path in raster_path_list: if raster_path in raster_path_set: LOGGER.warning(f'{raster_path} already scheduled') continue raster_path_set.add(raster_path) raster_info = pygeoprocessing.get_raster_info(raster_path) bounding_box = raster_info['bounding_box'] target_bounding_box = pygeoprocessing.transform_bounding_box( bounding_box, raster_info['projection_wkt'], target_projection.ExportToWkt()) target_bounding_box_list.append(target_bounding_box) target_bounding_box = pygeoprocessing.merge_bounding_box_list( target_bounding_box_list, 'union') gtiff_driver = gdal.GetDriverByName('GTiff') n_cols = int( math.ceil((target_bounding_box[2] - target_bounding_box[0]) / float(args.target_cell_size))) n_rows = int( math.ceil((target_bounding_box[3] - target_bounding_box[1]) / float(args.target_cell_size))) geotransform = (target_bounding_box[0], float(args.target_cell_size), 0.0, target_bounding_box[3], 0.0, -float(args.target_cell_size)) target_raster = gtiff_driver.Create( os.path.join('.', args.target_raster_path), n_cols, n_rows, 1, raster_info['datatype'], options=('TILED=YES', 'BIGTIFF=YES', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256', 'COMPRESS=LZW', 'SPARSE_OK=TRUE')) target_raster.SetProjection(target_projection.ExportToWkt()) target_raster.SetGeoTransform(geotransform) target_band = target_raster.GetRasterBand(1) target_band.SetNoDataValue(raster_info['nodata'][0]) target_band = None target_raster = None LOGGER.info('calling stitch_rasters') pygeoprocessing.stitch_rasters( [(path, 1) for path in raster_path_list], [args.resample_method] * len(raster_path_list), (args.target_raster_path, 1), overlap_algorithm=args.overlap_algorithm, area_weight_m2_to_wgs84=args.area_weight_m2_to_wgs84) LOGGER.debug('build overviews...') ecoshard.build_overviews(args.target_raster_path) LOGGER.info('all done')