def aggregate_results(base_aggregate_areas_path, target_vector_path, srs_wkt, aggregations): """Aggregate outputs into regions of interest. Args: base_aggregate_areas_path (str): path to vector of polygon(s) to aggregate over. This is the original input. target_vector_path (str): path to write out the results. This will be a copy of the base vector with added fields, reprojected to the target WKT and saved in geopackage format. srs_wkt (str): a Well-Known Text representation of the target spatial reference. The base vector is reprojected to this spatial reference before aggregating the rasters over it. aggregations (list[tuple(str,str,str)]): list of tuples describing the datasets to aggregate. Each tuple has 3 items. The first is the path to a raster to aggregate. The second is the field name for this aggregated data in the output vector. The third is either 'mean' or 'sum' indicating the aggregation to perform. Returns: None """ pygeoprocessing.reproject_vector(base_aggregate_areas_path, srs_wkt, target_vector_path, driver_name='GPKG') aggregate_vector = gdal.OpenEx(target_vector_path, gdal.GA_Update) aggregate_layer = aggregate_vector.GetLayer() for raster_path, field_id, aggregation_op in aggregations: # aggregate the raster by the vector region(s) aggregate_stats = pygeoprocessing.zonal_statistics( (raster_path, 1), target_vector_path) # set up the field to hold the aggregate data aggregate_field = ogr.FieldDefn(field_id, ogr.OFTReal) aggregate_field.SetWidth(24) aggregate_field.SetPrecision(11) aggregate_layer.CreateField(aggregate_field) aggregate_layer.ResetReading() # save the aggregate data to the field for each feature for feature in aggregate_layer: feature_id = feature.GetFID() if aggregation_op == 'mean': pixel_count = aggregate_stats[feature_id]['count'] try: value = (aggregate_stats[feature_id]['sum'] / pixel_count) except ZeroDivisionError: LOGGER.warning( f'Polygon {feature_id} does not overlap {raster_path}') value = 0.0 elif aggregation_op == 'sum': value = aggregate_stats[feature_id]['sum'] feature.SetField(field_id, float(value)) aggregate_layer.SetFeature(feature) # save the aggregate vector layer and clean up references aggregate_layer.SyncToDisk() aggregate_layer = None gdal.Dataset.__swig_destroy__(aggregate_vector) aggregate_vector = None
def test_non_projected_layers(self): """HRA: test habitat and stressor layers that are not projected.""" import natcap.invest.hra args = HraRegressionTests.generate_base_args(self.workspace_dir) _make_criteria_csv(args['criteria_table_path'], self.workspace_dir) _make_aoi_vector(args['aoi_vector_path']) # Make projected files and write their filepaths to info csv. info_table_path = os.path.join(self.workspace_dir, 'info.csv') _make_info_csv( info_table_path, self.workspace_dir, projected=True, rel_path=False) # create geographic spatial reference wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) wgs84_wkt = wgs84_srs.ExportToWkt() # move created habitat vector to a sub directory so the reprojected # file can be saved where the csv PATH expects it tmp_out = os.path.join(self.workspace_dir, 'tmp_move') os.mkdir(tmp_out) for filename in os.listdir(self.workspace_dir): if filename.startswith("habitat_0"): shutil.move( os.path.join(self.workspace_dir, filename), os.path.join(tmp_out, filename)) habitat_path = os.path.join(tmp_out, 'habitat_0.shp') habitat_wgs84_path = os.path.join(self.workspace_dir, 'habitat_0.shp') # reproject habitat layer to geographic pygeoprocessing.reproject_vector( habitat_path, wgs84_wkt, habitat_wgs84_path) args['info_table_path'] = info_table_path with self.assertRaises(ValueError) as cm: natcap.invest.hra.execute(args) expected_message = "The following layer does not have a spatial" actual_message = str(cm.exception) self.assertTrue(expected_message in actual_message, actual_message)
def aggregate_to_polygons(base_aggregate_vector_path, target_aggregate_vector_path, landcover_raster_projection, crop_to_landcover_table, nutrient_table, yield_percentile_headers, output_dir, file_suffix, target_aggregate_table_path): """Write table with aggregate results of yield and nutrient values. Use zonal statistics to summarize total observed and interpolated production and nutrient information for each polygon in base_aggregate_vector_path. Args: base_aggregate_vector_path (string): path to polygon vector target_aggregate_vector_path (string): path to re-projected copy of polygon vector landcover_raster_projection (string): a WKT projection string crop_to_landcover_table (dict): landcover codes keyed by crop names nutrient_table (dict): a lookup of nutrient values by crop in the form of nutrient_table[<crop>][<nutrient>]. yield_percentile_headers (list): list of strings indicating percentiles at which yield was calculated. output_dir (string): the file path to the output workspace. file_suffix (string): string to appened to any output filenames. target_aggregate_table_path (string): path to 'aggregate_results.csv' in the output workspace Returns: None """ # reproject polygon to LULC's projection pygeoprocessing.reproject_vector(base_aggregate_vector_path, landcover_raster_projection, target_aggregate_vector_path, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict(float) )) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1 - nutrient_table[crop_name]['Percentrefuse'] / 100) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path)) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += (nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path)) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # report everything to a table with open(target_aggregate_table_path, 'w') as aggregate_table: # write header aggregate_table.write('FID,') aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(list(total_nutrient_table.values())[0]) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in list(total_yield_lookup.values())[0]: aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( list(total_nutrient_table.values())[0]): aggregate_table.write( ',%s' % total_nutrient_table[nutrient_id][model_type][id_index] ) aggregate_table.write('\n')
def execute(args): """Crop Production Regression Model. This model will take a landcover (crop cover?), N, P, and K map and produce modeled yields, and a nutrient table. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_regression_yield_tables/[cropname]_* A ValueError is raised if strings don't match. args['fertilization_rate_table_path'] (string): path to CSV table that contains fertilization rates for the crops in the simulation, though it can contain additional crops not used in the simulation. The headers must be 'crop_name', 'nitrogen_rate', 'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the name string used to identify crops in the 'landcover_to_crop_table_path', and rates are in units kg/Ha. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ LOGGER.info( "Calculating total land area and warning if the landcover raster " "is missing lucodes") crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_to_fertlization_rate_table = utils.build_lookup_from_csv( args['fertilization_rate_table_path'], 'crop_name', to_lower=True, numerical_cast=True) crop_lucodes = [ x[_EXPECTED_LUCODE_TABLE_HEADER] for x in crop_to_landcover_table.itervalues() ] unique_lucodes = numpy.array([]) total_area = 0.0 for _, lu_band_data in pygeoprocessing.iterblocks( args['landcover_raster_path']): unique_block = numpy.unique(lu_band_data) unique_lucodes = numpy.unique( numpy.concatenate((unique_lucodes, unique_block))) total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD)) missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes)) if len(missing_lucodes) > 0: LOGGER.warn( "The following lucodes are in the landcover to crop table but " "aren't in the landcover raster: %s", missing_lucodes) LOGGER.info("Checking that crops correspond to known types.") for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): raise ValueError( "Expected climate bin map called %s for crop %s " "specified in %s", crop_climate_bin_raster_path, crop_name, args['landcover_to_crop_table_path']) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) crop_regression_table_path = os.path.join( args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name) crop_regression_table = utils.build_lookup_from_csv( crop_regression_table_path, 'climate_bin', to_lower=True, numerical_cast=True, warn_if_missing=False) for bin_id in crop_regression_table: for header in _EXPECTED_REGRESSION_TABLE_HEADERS: if crop_regression_table[bin_id][header.lower()] == '': crop_regression_table[bin_id][header.lower()] = 0.0 yield_regression_headers = [ x for x in crop_regression_table.itervalues().next() if x != 'climate_bin' ] clipped_climate_bin_raster_path_info = ( pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path)) regression_parameter_raster_path_lookup = {} for yield_regression_id in yield_regression_headers: # there are extra headers in that table if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS: continue LOGGER.info("Map %s to climate bins.", yield_regression_id) regression_parameter_raster_path_lookup[yield_regression_id] = ( os.path.join( output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix))) bin_to_regression_value = dict([ (bin_id, crop_regression_table[bin_id][yield_regression_id]) for bin_id in crop_regression_table ]) bin_to_regression_value[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_regression_parameter_raster_path = os.path.join( output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN % (crop_name, yield_regression_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_regression_value, coarse_regression_parameter_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info("Interpolate %s %s parameter to landcover resolution.", crop_name, yield_regression_id) pygeoprocessing.warp_raster( coarse_regression_parameter_raster_path, landcover_raster_info['pixel_size'], regression_parameter_raster_path_lookup[yield_regression_id], 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) # the regression model has identical mathematical equations for # the nitrogen, phosporous, and potassium. The only difference is # the scalars in the equation. So making a closure below to simplify # this coding so I don't repeat the same function 3 times for 3 # almost identical raster_calculator calls. def _x_yield_op_gen(fert_rate): """Create a raster calc op given the fertlization rate.""" def _x_yield_op(y_max, b_x, c_x, lulc_array): """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))""" result = numpy.empty(b_x.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) & (lulc_array == crop_lucode)) result[valid_mask] = y_max[valid_mask] * ( 1 - b_x[valid_mask] * numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha) return result return _x_yield_op LOGGER.info('Calc nitrogen yield') nitrogen_yield_raster_path = os.path.join( output_dir, _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_n'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']), nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc phosphorous yield') phosphorous_yield_raster_path = os.path.join( output_dir, _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_nut'], 1), (regression_parameter_raster_path_lookup['c_p2o5'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name] ['phosphorous_rate']), phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc potassium yield') potassium_yield_raster_path = os.path.join( output_dir, _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(regression_parameter_raster_path_lookup['yield_ceiling'], 1), (regression_parameter_raster_path_lookup['b_k2o'], 1), (regression_parameter_raster_path_lookup['c_k2o'], 1), (args['landcover_raster_path'], 1)], _x_yield_op_gen( crop_to_fertlization_rate_table[crop_name]['potassium_rate']), potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info('Calc the min of N, K, and P') crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) def _min_op(y_n, y_p, y_k): """Calculate the min of the three inputs and multiply by Ymax.""" result = numpy.empty(y_n.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) & (y_p != _NODATA_YIELD)) result[valid_mask] = (numpy.min( [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0)) return result pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1), (phosphorous_yield_raster_path, 1), (potassium_yield_raster_path, 1)], _min_op, crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) nutrient_headers = [ nutrient_id + '_' + mode for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for mode in ['modeled', 'observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,production_modeled,' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( crop_production_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup['modeled'] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: total_nutrient = (nutrient_factor * production_lookup['modeled'] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) LOGGER.info("Calculating zonal stats for %s", crop_name) crop_production_raster_path = os.path.join( output_dir, _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_modeled' % crop_name] = (pygeoprocessing.zonal_statistics( (crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_modeled' % crop_name]: total_nutrient_table[nutrient_id]['modeled'][id_index] += ( nutrient_factor * total_yield_lookup['%s_modeled' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')
def _build_affected_vector(base_watershed_vector_path, target_wkt, damage_table_path, built_infrastructure_vector_path, target_watershed_result_vector_path): """Construct the affected area vector. The ``base_watershed_vector_path`` will be intersected with the ``built_infrastructure_vector_path`` to get the affected build area. Parameters: base_watershed_vector_path (str): path to base watershed vector, target_wkt (str): desired target projection. damage_table_path (None or str): path to a CSV table containing fields 'Type' and 'Damage'. For every value of 'Type' in the built_infrastructure_vector there must be a corresponding entry in this table. If None, this field is ignored. built_infrastructure_vector_path (str): path to infrastructure vector containing at least the integer field 'Type'. target_watershed_result_vector_path (str): path to desired target watershed result vector that will have an additional field called 'aff_bld'. Returns: None. """ if damage_table_path is not None and damage_table_path != '': damage_type_map = utils.build_lookup_from_csv(damage_table_path, 'type', to_lower=True, warn_if_missing=True) else: damage_type_map = None if os.path.exists(target_watershed_result_vector_path): LOGGER.warn('%s exists, removing to make a current one', target_watershed_result_vector_path) os.remove(target_watershed_result_vector_path) pygeoprocessing.reproject_vector(base_watershed_vector_path, target_wkt, target_watershed_result_vector_path, driver_name='GPKG') target_srs = osr.SpatialReference() target_srs.ImportFromWkt(target_wkt) infrastructure_rtree = rtree.index.Index() infrastructure_geometry_list = [] infrastructure_vector = gdal.OpenEx(built_infrastructure_vector_path, gdal.OF_VECTOR) infrastructure_layer = infrastructure_vector.GetLayer() infrastructure_srs = infrastructure_layer.GetSpatialRef() infrastructure_to_target = osr.CoordinateTransformation( infrastructure_srs, target_srs) infrastructure_layer_defn = infrastructure_layer.GetLayerDefn() for field_name in ['type', 'Type', 'TYPE']: type_index = infrastructure_layer_defn.GetFieldIndex(field_name) if type_index != -1: break if type_index == -1: raise ValueError("Could not find field 'Type' in %s", built_infrastructure_vector_path) LOGGER.info("building infrastructure lookup dict") for infrastructure_feature in infrastructure_layer: infrastructure_geom = infrastructure_feature.GetGeometryRef().Clone() infrastructure_geom.Transform(infrastructure_to_target) infrastructure_geometry_list.append({ 'geom': shapely.wkb.loads(infrastructure_geom.ExportToWkb()), }) if damage_type_map is not None: infrastructure_geometry_list[-1]['damage'] = (damage_type_map[ infrastructure_feature.GetField(type_index)]['damage']) infrastructure_rtree.insert( len(infrastructure_geometry_list) - 1, infrastructure_geometry_list[-1]['geom'].bounds) infrastructure_vector = None infrastructure_layer = None watershed_vector = gdal.OpenEx(target_watershed_result_vector_path, gdal.OF_VECTOR | gdal.OF_UPDATE) watershed_layer = watershed_vector.GetLayer() watershed_layer.CreateField(ogr.FieldDefn('aff_bld', ogr.OFTReal)) watershed_layer.SyncToDisk() last_time = time.time() for watershed_index, watershed_feature in enumerate(watershed_layer): current_time = time.time() if current_time - last_time > 5.0: LOGGER.info("processing watershed result %.2f%%", (100.0 * (watershed_index + 1)) / watershed_layer.GetFeatureCount()) last_time = current_time watershed_shapely = shapely.wkb.loads( watershed_feature.GetGeometryRef().ExportToWkb()) watershed_prep_geom = shapely.prepared.prep(watershed_shapely) total_damage = 0.0 for infrastructure_index in infrastructure_rtree.intersection( watershed_shapely.bounds): infrastructure_geom = infrastructure_geometry_list[ infrastructure_index]['geom'] if damage_type_map: if watershed_prep_geom.intersects(infrastructure_geom): total_damage += ( watershed_shapely.intersection( infrastructure_geom).area * infrastructure_geometry_list[infrastructure_index] ['damage']) if damage_type_map: watershed_feature.SetField('aff_bld', total_damage) watershed_layer.SetFeature(watershed_feature) watershed_layer.SyncToDisk() watershed_layer = None watershed_vector = None
def _build_spatial_index(base_raster_path, local_model_dir, tropical_forest_edge_carbon_model_vector_path, target_spatial_index_pickle_path): """Build a kd-tree index. Build a kd-tree index of the locally projected globally georeferenced carbon edge model parameters. Args: base_raster_path (string): path to a raster that is used to define the bounding box and projection of the local model. local_model_dir (string): path to a directory where we can write a shapefile of the locally projected global data model grid. Function will create a file called 'local_carbon_shape.shp' in that location and overwrite one if it exists. tropical_forest_edge_carbon_model_vector_path (string): a path to an OGR shapefile that has the parameters for the global carbon edge model. Each georeferenced feature should have fields 'theta1', 'theta2', 'theta3', and 'method' spatial_index_pickle_path (string): path to the pickle file to store a tuple of: scipy.spatial.cKDTree (georeferenced locally projected model points) theta_model_parameters (parallel Nx3 array of theta parameters) method_model_parameter (parallel N array of model numbers (1..3)) Returns: None """ # Reproject the global model into local coordinate system carbon_model_reproject_path = os.path.join(local_model_dir, 'local_carbon_shape.shp') lulc_projection_wkt = pygeoprocessing.get_raster_info( base_raster_path)['projection_wkt'] pygeoprocessing.reproject_vector( tropical_forest_edge_carbon_model_vector_path, lulc_projection_wkt, carbon_model_reproject_path) model_vector = gdal.OpenEx(carbon_model_reproject_path) model_layer = model_vector.GetLayer() kd_points = [] theta_model_parameters = [] method_model_parameter = [] # put all the polygons in the kd_tree because it's fast and simple for poly_feature in model_layer: poly_geom = poly_feature.GetGeometryRef() poly_centroid = poly_geom.Centroid() # put in row/col order since rasters are row/col indexed kd_points.append([poly_centroid.GetY(), poly_centroid.GetX()]) theta_model_parameters.append([ poly_feature.GetField(feature_id) for feature_id in ['theta1', 'theta2', 'theta3'] ]) method_model_parameter.append(poly_feature.GetField('method')) method_model_parameter = numpy.array(method_model_parameter, dtype=numpy.int32) theta_model_parameters = numpy.array(theta_model_parameters, dtype=numpy.float32) LOGGER.info('Building kd_tree') kd_tree = scipy.spatial.cKDTree(kd_points) LOGGER.info('Done building kd_tree with %d points', len(kd_points)) with open(target_spatial_index_pickle_path, 'wb') as picklefile: picklefile.write( pickle.dumps( (kd_tree, theta_model_parameters, method_model_parameter)))
def execute(args): """Crop Production Percentile Model. This model will take a landcover (crop cover?) map and produce yields, production, and observed crop yields, a nutrient table, and a clipped observed map. Parameters: args['workspace_dir'] (string): output directory for intermediate, temporary, and final files args['results_suffix'] (string): (optional) string to append to any output file names args['landcover_raster_path'] (string): path to landcover raster args['landcover_to_crop_table_path'] (string): path to a table that converts landcover types to crop names that has two headers: * lucode: integer value corresponding to a landcover code in `args['landcover_raster_path']`. * crop_name: a string that must match one of the crops in args['model_data_path']/climate_bin_maps/[cropname]_* A ValueError is raised if strings don't match. args['aggregate_polygon_path'] (string): path to polygon shapefile that will be used to aggregate crop yields and total nutrient value. (optional, if value is None, then skipped) args['aggregate_polygon_id'] (string): This is the id field in args['aggregate_polygon_path'] to be used to index the final aggregate results. If args['aggregate_polygon_path'] is not provided, this value is ignored. args['model_data_path'] (string): path to the InVEST Crop Production global data directory. This model expects that the following directories are subdirectories of this path * climate_bin_maps (contains [cropname]_climate_bin.tif files) * climate_percentile_yield (contains [cropname]_percentile_yield_table.csv files) Please see the InVEST user's guide chapter on crop production for details about how to download these data. Returns: None. """ crop_to_landcover_table = utils.build_lookup_from_csv( args['landcover_to_crop_table_path'], 'crop_name', to_lower=True, numerical_cast=True) bad_crop_name_list = [] for crop_name in crop_to_landcover_table: crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) if not os.path.exists(crop_climate_bin_raster_path): bad_crop_name_list.append(crop_name) if len(bad_crop_name_list) > 0: raise ValueError( "The following crop names were provided in %s but no such crops " "exist for this model: %s" % (args['landcover_to_crop_table_path'], bad_crop_name_list)) file_suffix = utils.make_suffix_string(args, 'results_suffix') output_dir = os.path.join(args['workspace_dir']) utils.make_directories( [output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)]) landcover_raster_info = pygeoprocessing.get_raster_info( args['landcover_raster_path']) pixel_area_ha = numpy.product( [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0 landcover_nodata = landcover_raster_info['nodata'][0] # Calculate lat/lng bounding box for landcover map wgs84srs = osr.SpatialReference() wgs84srs.ImportFromEPSG(4326) # EPSG4326 is WGS84 lat/lng landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box( landcover_raster_info['bounding_box'], landcover_raster_info['projection'], wgs84srs.ExportToWkt(), edge_samples=11) crop_lucode = None observed_yield_nodata = None production_area = collections.defaultdict(float) for crop_name in crop_to_landcover_table: crop_lucode = crop_to_landcover_table[crop_name][ _EXPECTED_LUCODE_TABLE_HEADER] LOGGER.info("Processing crop %s", crop_name) crop_climate_bin_raster_path = os.path.join( args['model_data_path'], _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name) LOGGER.info( "Clipping global climate bin raster to landcover bounding box.") clipped_climate_bin_raster_path = os.path.join( output_dir, _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix)) crop_climate_bin_raster_info = pygeoprocessing.get_raster_info( crop_climate_bin_raster_path) pygeoprocessing.warp_raster(crop_climate_bin_raster_path, crop_climate_bin_raster_info['pixel_size'], clipped_climate_bin_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) climate_percentile_yield_table_path = os.path.join( args['model_data_path'], _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name) crop_climate_percentile_table = utils.build_lookup_from_csv( climate_percentile_yield_table_path, 'climate_bin', to_lower=True, numerical_cast=True) yield_percentile_headers = [ x for x in crop_climate_percentile_table.itervalues().next() if x != 'climate_bin' ] for yield_percentile_id in yield_percentile_headers: LOGGER.info("Map %s to climate bins.", yield_percentile_id) interpolated_yield_percentile_raster_path = os.path.join( output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) bin_to_percentile_yield = dict([ (bin_id, crop_climate_percentile_table[bin_id][yield_percentile_id]) for bin_id in crop_climate_percentile_table ]) bin_to_percentile_yield[crop_climate_bin_raster_info['nodata'] [0]] = 0.0 coarse_yield_percentile_raster_path = os.path.join( output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) pygeoprocessing.reclassify_raster( (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield, coarse_yield_percentile_raster_path, gdal.GDT_Float32, _NODATA_YIELD) LOGGER.info( "Interpolate %s %s yield raster to landcover resolution.", crop_name, yield_percentile_id) pygeoprocessing.warp_raster( coarse_yield_percentile_raster_path, landcover_raster_info['pixel_size'], interpolated_yield_percentile_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) LOGGER.info("Calculate yield for %s at %s", crop_name, yield_percentile_id) percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) def _crop_production_op(lulc_array, yield_array): """Mask in yields that overlap with `crop_lucode`.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = _NODATA_YIELD valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha) return result pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_yield_percentile_raster_path, 1)], _crop_production_op, percentile_crop_production_raster_path, gdal.GDT_Float32, _NODATA_YIELD) # calculate the non-zero production area for that crop, assuming that # all the percentile rasters have non-zero production so it's okay to # use just one of the percentile rasters LOGGER.info("Calculating production area.") for _, band_values in pygeoprocessing.iterblocks( percentile_crop_production_raster_path): production_area[crop_name] += numpy.count_nonzero( (band_values != _NODATA_YIELD) & (band_values > 0.0)) production_area[crop_name] *= pixel_area_ha LOGGER.info("Calculate observed yield for %s", crop_name) global_observed_yield_raster_path = os.path.join( args['model_data_path'], _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name) global_observed_yield_raster_info = ( pygeoprocessing.get_raster_info(global_observed_yield_raster_path)) clipped_observed_yield_raster_path = os.path.join( output_dir, _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.warp_raster( global_observed_yield_raster_path, global_observed_yield_raster_info['pixel_size'], clipped_observed_yield_raster_path, 'nearest', target_bb=landcover_wgs84_bounding_box) observed_yield_nodata = ( global_observed_yield_raster_info['nodata'][0]) zeroed_observed_yield_raster_path = os.path.join( output_dir, _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) def _zero_observed_yield_op(observed_yield_array): """Calculate observed 'actual' yield.""" result = numpy.empty(observed_yield_array.shape, dtype=numpy.float32) result[:] = 0.0 valid_mask = observed_yield_array != observed_yield_nodata result[valid_mask] = observed_yield_array[valid_mask] return result pygeoprocessing.raster_calculator( [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op, zeroed_observed_yield_raster_path, gdal.GDT_Float32, observed_yield_nodata) interpolated_observed_yield_raster_path = os.path.join( output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix)) LOGGER.info("Interpolating observed %s raster to landcover.", crop_name) pygeoprocessing.warp_raster( zeroed_observed_yield_raster_path, landcover_raster_info['pixel_size'], interpolated_observed_yield_raster_path, 'cubic_spline', target_sr_wkt=landcover_raster_info['projection'], target_bb=landcover_raster_info['bounding_box']) def _mask_observed_yield(lulc_array, observed_yield_array): """Mask total observed yield to crop lulc type.""" result = numpy.empty(lulc_array.shape, dtype=numpy.float32) result[:] = observed_yield_nodata valid_mask = lulc_array != landcover_nodata lulc_mask = lulc_array == crop_lucode result[valid_mask] = 0 result[lulc_mask] = (observed_yield_array[lulc_mask] * pixel_area_ha) return result observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) pygeoprocessing.raster_calculator( [(args['landcover_raster_path'], 1), (interpolated_observed_yield_raster_path, 1)], _mask_observed_yield, observed_production_raster_path, gdal.GDT_Float32, observed_yield_nodata) # both 'crop_nutrient.csv' and 'crop' are known data/header values for # this model data. nutrient_table = utils.build_lookup_from_csv(os.path.join( args['model_data_path'], 'crop_nutrient.csv'), 'crop', to_lower=False) LOGGER.info("Generating report table") result_table_path = os.path.join(output_dir, 'result_table%s.csv' % file_suffix) production_percentile_headers = [ 'production_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for yield_percentile_id in sorted(yield_percentile_headers) ] nutrient_headers = [ nutrient_id + '_' + re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for yield_percentile_id in sorted(yield_percentile_headers) + ['yield_observed'] ] with open(result_table_path, 'wb') as result_table: result_table.write('crop,area (ha),' + 'production_observed,' + ','.join(production_percentile_headers) + ',' + ','.join(nutrient_headers) + '\n') for crop_name in sorted(crop_to_landcover_table): result_table.write(crop_name) result_table.write(',%f' % production_area[crop_name]) production_lookup = {} yield_sum = 0.0 observed_production_raster_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) observed_yield_nodata = pygeoprocessing.get_raster_info( observed_production_raster_path)['nodata'][0] for _, yield_block in pygeoprocessing.iterblocks( observed_production_raster_path): yield_sum += numpy.sum( yield_block[observed_yield_nodata != yield_block]) production_lookup['observed'] = yield_sum result_table.write(",%f" % yield_sum) for yield_percentile_id in sorted(yield_percentile_headers): yield_percentile_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) yield_sum = 0.0 for _, yield_block in pygeoprocessing.iterblocks( yield_percentile_raster_path): yield_sum += numpy.sum( yield_block[_NODATA_YIELD != yield_block]) production_lookup[yield_percentile_id] = yield_sum result_table.write(",%f" % yield_sum) # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for yield_percentile_id in sorted(yield_percentile_headers): total_nutrient = (nutrient_factor * production_lookup[yield_percentile_id] * nutrient_table[crop_name][nutrient_id]) result_table.write(",%f" % (total_nutrient)) result_table.write( ",%f" % (nutrient_factor * production_lookup['observed'] * nutrient_table[crop_name][nutrient_id])) result_table.write('\n') total_area = 0.0 for _, band_values in pygeoprocessing.iterblocks( args['landcover_raster_path']): total_area += numpy.count_nonzero( (band_values != landcover_nodata)) result_table.write('\n,total area (both crop and non-crop)\n,%f\n' % (total_area * pixel_area_ha)) if ('aggregate_polygon_path' in args and args['aggregate_polygon_path'] is not None): LOGGER.info("aggregating result over query polygon") # reproject polygon to LULC's projection target_aggregate_vector_path = os.path.join( output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix)) pygeoprocessing.reproject_vector(args['aggregate_polygon_path'], landcover_raster_info['projection'], target_aggregate_vector_path, layer_index=0, driver_name='ESRI Shapefile') # loop over every crop and query with pgp function total_yield_lookup = {} total_nutrient_table = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict( float))) for crop_name in crop_to_landcover_table: # convert 100g to Mg and fraction left over from refuse nutrient_factor = 1e4 * ( 1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0) # loop over percentiles for yield_percentile_id in yield_percentile_headers: percentile_crop_production_raster_path = os.path.join( output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN % (crop_name, yield_percentile_id, file_suffix)) LOGGER.info("Calculating zonal stats for %s %s", crop_name, yield_percentile_id) total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)] = ( pygeoprocessing.zonal_statistics( (percentile_crop_production_raster_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)]: total_nutrient_table[nutrient_id][yield_percentile_id][ id_index] += ( nutrient_factor * total_yield_lookup[ '%s_%s' % (crop_name, yield_percentile_id)][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # process observed observed_yield_path = os.path.join( output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix)) total_yield_lookup['%s_observed' % crop_name] = (pygeoprocessing.zonal_statistics( (observed_yield_path, 1), target_aggregate_vector_path, str(args['aggregate_polygon_id']))) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for id_index in total_yield_lookup['%s_observed' % crop_name]: total_nutrient_table[nutrient_id]['observed'][ id_index] += ( nutrient_factor * total_yield_lookup['%s_observed' % crop_name][id_index]['sum'] * nutrient_table[crop_name][nutrient_id]) # use that result to calculate nutrient totals # report everything to a table aggregate_table_path = os.path.join( output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) with open(aggregate_table_path, 'wb') as aggregate_table: # write header aggregate_table.write('%s,' % args['aggregate_polygon_id']) aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',') aggregate_table.write(','.join([ '%s_%s' % (nutrient_id, model_type) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for model_type in sorted(total_nutrient_table.itervalues().next()) ])) aggregate_table.write('\n') # iterate by polygon index for id_index in total_yield_lookup.itervalues().next(): aggregate_table.write('%s,' % id_index) aggregate_table.write(','.join([ str(total_yield_lookup[yield_header][id_index]['sum']) for yield_header in sorted(total_yield_lookup) ])) for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS: for model_type in sorted( total_nutrient_table.itervalues().next()): aggregate_table.write(',%s' % total_nutrient_table[nutrient_id] [model_type][id_index]) aggregate_table.write('\n')