コード例 #1
0
ファイル: sdr.py プロジェクト: wsj-7416/invest
def _generate_report(watersheds_path, usle_path, sed_export_path,
                     sed_retention_path, watershed_results_sdr_path):
    """Create shapefile with USLE, sed export, and sed retention fields."""
    field_summaries = {
        'usle_tot':
        pygeoprocessing.zonal_statistics((usle_path, 1), watersheds_path),
        'sed_export':
        pygeoprocessing.zonal_statistics((sed_export_path, 1),
                                         watersheds_path),
        'sed_retent':
        pygeoprocessing.zonal_statistics((sed_retention_path, 1),
                                         watersheds_path),
    }

    original_datasource = gdal.OpenEx(watersheds_path, gdal.OF_VECTOR)
    driver = gdal.GetDriverByName('ESRI Shapefile')
    datasource_copy = driver.CreateCopy(watershed_results_sdr_path,
                                        original_datasource)
    layer = datasource_copy.GetLayer()

    for field_name in field_summaries:
        field_def = ogr.FieldDefn(field_name, ogr.OFTReal)
        field_def.SetWidth(24)
        field_def.SetPrecision(11)
        layer.CreateField(field_def)

    layer.ResetReading()
    for feature in layer:
        feature_id = feature.GetFID()
        for field_name in field_summaries:
            feature.SetField(
                field_name,
                float(field_summaries[field_name][feature_id]['sum']))
        layer.SetFeature(feature)
コード例 #2
0
def average_value_in_aoi(raster_list, aoi_path):
    """Calculate the average value in a list of rasters inside an aoi.

    Args:
        raster_list (list): list of paths to rasters that should be summarized
        aoi_path (string): path to polygon vector defining the aoi. Should have
            a single feature

    Returns:
        average value across pixels within the aoi across rasters in
            raster_list
    """
    running_sum = 0
    running_count = 0
    for path in raster_list:
        zonal_stat_dict = pygeoprocessing.zonal_statistics((path, 1), aoi_path)
        if len([*zonal_stat_dict]) > 1:
            raise ValueError("Vector path contains >1 feature")
        running_sum = running_sum + zonal_stat_dict[0]['sum']
        running_count = running_count + zonal_stat_dict[0]['count']
    try:
        mean_value = float(running_sum) / running_count
    except ZeroDivisionError:
        mean_value = 'NA'
    return mean_value
コード例 #3
0
def zonal_stats_by_objectid(raster_path, band):
    """Calculate zonal stats inside watersheds by OBJECTID.

    Use the pygeoprocessing zonal_statistics() function to calculate zonal
    statistics from the given raster inside watershed features in the shapefile
    _BASIN_SHP_PATH. Re-map the zonal statistics to be indexed by the field
    OBJECTID.  Convert the zonal stats nested dictionary to a pandas dataframe.

    Parameters:
        raster_path (string): path to the base raster to analyze with zonal
            stats
        band (int): band index of the raster to analyze

    Returns:
        data frame where the index is OBJECTID of the watershed layer,
            containing the columns 'min', 'max', 'count', 'nodata_count',
            and 'sum'

    """
    fid_to_objectid = map_FID_to_field(_BASIN_SHP_PATH, "OBJECTID")
    zonal_stats_dict = pygeoprocessing.zonal_statistics(
        (raster_path, band), _BASIN_SHP_PATH)
    objectid_zonal_stats_dict = {
        objectid: zonal_stats_dict[fid] for (fid, objectid) in
        fid_to_objectid.items()
    }
    objectid_df = pandas.DataFrame(objectid_zonal_stats_dict)
    objectid_df_t = objectid_df.transpose()
    objectid_df_t['OBJECTID'] = objectid_df_t.index
コード例 #4
0
ファイル: stormwater.py プロジェクト: natcap/invest
def aggregate_results(base_aggregate_areas_path, target_vector_path, srs_wkt,
                      aggregations):
    """Aggregate outputs into regions of interest.

    Args:
        base_aggregate_areas_path (str): path to vector of polygon(s) to
            aggregate over. This is the original input.
        target_vector_path (str): path to write out the results. This will be a
            copy of the base vector with added fields, reprojected to the
            target WKT and saved in geopackage format.
        srs_wkt (str): a Well-Known Text representation of the target spatial
            reference. The base vector is reprojected to this spatial reference
            before aggregating the rasters over it.
        aggregations (list[tuple(str,str,str)]): list of tuples describing the
            datasets to aggregate. Each tuple has 3 items. The first is the
            path to a raster to aggregate. The second is the field name for
            this aggregated data in the output vector. The third is either
            'mean' or 'sum' indicating the aggregation to perform.

    Returns:
        None
    """
    pygeoprocessing.reproject_vector(base_aggregate_areas_path, srs_wkt,
                                     target_vector_path, driver_name='GPKG')
    aggregate_vector = gdal.OpenEx(target_vector_path, gdal.GA_Update)
    aggregate_layer = aggregate_vector.GetLayer()

    for raster_path, field_id, aggregation_op in aggregations:
        # aggregate the raster by the vector region(s)
        aggregate_stats = pygeoprocessing.zonal_statistics(
            (raster_path, 1), target_vector_path)

        # set up the field to hold the aggregate data
        aggregate_field = ogr.FieldDefn(field_id, ogr.OFTReal)
        aggregate_field.SetWidth(24)
        aggregate_field.SetPrecision(11)
        aggregate_layer.CreateField(aggregate_field)
        aggregate_layer.ResetReading()

        # save the aggregate data to the field for each feature
        for feature in aggregate_layer:
            feature_id = feature.GetFID()
            if aggregation_op == 'mean':
                pixel_count = aggregate_stats[feature_id]['count']
                try:
                    value = (aggregate_stats[feature_id]['sum'] / pixel_count)
                except ZeroDivisionError:
                    LOGGER.warning(
                        f'Polygon {feature_id} does not overlap {raster_path}')
                    value = 0.0
            elif aggregation_op == 'sum':
                value = aggregate_stats[feature_id]['sum']
            feature.SetField(field_id, float(value))
            aggregate_layer.SetFeature(feature)

    # save the aggregate vector layer and clean up references
    aggregate_layer.SyncToDisk()
    aggregate_layer = None
    gdal.Dataset.__swig_destroy__(aggregate_vector)
    aggregate_vector = None
コード例 #5
0
def normalize(value_raster_path, target_path, aoi_path, weight):
    """Calculate a normalized raster according to values in boundary.

    Args:
        value_raster_path (string): path to raster containing values that
            should be normalized
        target_path (string): path to location where normalized raster should
            be created

    Side effects:
        creates or modifies a raster at the location of ``target_path``

    Returns:
        None

    """
    def normalize_op(raster, min_value, max_value, weight_value):
        """Normalize values inside a raster.

        Calculate normalized values that lie between 0 and 1 according to
        (value - min) / (max - min). Then, multiply these values by 100 and
        return an array of normalized integer values lying between 0 and 100.
        Then, multiply these by `weight` in case the normalized version should
        count for more than 1 relative to other normalized inputs.

        Args:
            raster (numpy.ndarray): values to be normalized
            min_value (float or int): minimum value in the raster
            max_value (float or int): maximum value in the raster
            weight_value (float or int): weight to apply to normalized values

        Returns:
            array of normalized integer values multiplied by weight

        """
        valid_mask = (~numpy.isclose(raster, raster_nodata))
        float_ar = numpy.empty(raster.shape, dtype=numpy.float32)
        float_ar[:] = _TARGET_NODATA
        float_ar[valid_mask] = ((raster[valid_mask] - min_value) /
                                (max_value - min_value))
        result = numpy.empty(raster.shape, dtype=numpy.int16)
        result[:] = _TARGET_NODATA
        result[valid_mask] = float_ar[valid_mask] * 100 * weight_value
        return result

    # calculate zonal max and min with zonal statistics inside boundary aoi
    zonal_stats = pygeoprocessing.zonal_statistics((value_raster_path, 1),
                                                   aoi_path)
    zonal_min = zonal_stats[0]['min']
    zonal_max = zonal_stats[0]['max']

    # calculate normalized raster according to zonal max and min
    raster_nodata = pygeoprocessing.get_raster_info(
        value_raster_path)['nodata'][0]
    pygeoprocessing.raster_calculator([(value_raster_path, 1),
                                       (zonal_min, 'raw'), (zonal_max, 'raw'),
                                       (weight, 'raw')], normalize_op,
                                      target_path, gdal.GDT_Int16,
                                      _TARGET_NODATA)
コード例 #6
0
 def monthly_op(base_data):
     summary_dict = {
         'run_id': [],
         'year': [],
         'month': [],
         'output': [],
         'mean_val': [],
     }
     for run_id in base_data['run_list']:
         run_output_dir = os.path.join(base_data['outer_dir'], run_id,
                                       'output')
         for output_bn in ['standing_biomass', 'diet_sufficiency']:
             for year in base_data['year_list']:
                 for month in range(1, 13):
                     raster_path = os.path.join(run_output_dir,
                                                '{}_{}_{}.tif').format(
                                                    output_bn, year, month)
                     try:
                         zstat_dict = pygeoprocessing.zonal_statistics(
                             (raster_path, 1), base_data['aoi_path'])
                     except ValueError:
                         continue
                     try:
                         mean_val = (float(zstat_dict[0]['sum']) /
                                     zstat_dict[0]['count'])
                     except ZeroDivisionError:
                         mean_val = 'NA'
                     summary_dict['run_id'].append(run_id)
                     summary_dict['year'].append(year)
                     summary_dict['month'].append(month)
                     summary_dict['output'].append(output_bn)
                     summary_dict['mean_val'].append(mean_val)
         # for debug_bn in ['intake', 'emaint']:
         #     for year in base_data['year_list']:
         #         for month in range(1, 13):
         #             raster_path = os.path.join(
         #                 run_output_dir, '{}_{}.tif').format(
         #                     debug_bn, month)
         #             try:
         #                 zstat_dict = pygeoprocessing.zonal_statistics(
         #                     (raster_path, 1), base_data['aoi_path'])
         #             except ValueError:
         #                 continue
         #             try:
         #                 mean_val = (
         #                     float(zstat_dict[0]['sum']) /
         #                     zstat_dict[0]['count'])
         #             except ZeroDivisionError:
         #                 mean_val = 'NA'
         #             summary_dict['run_id'].append(run_id)
         #             summary_dict['year'].append(year)
         #             summary_dict['month'].append(month)
         #             summary_dict['output'].append(debug_bn)
         #             summary_dict['mean_val'].append(mean_val)
         summary_df = pandas.DataFrame(summary_dict)
         save_as = os.path.join(base_data['summary_output_dir'],
                                'monthly_value_summary.csv')
         summary_df.to_csv(save_as, index=False)
コード例 #7
0
def _generate_report(
        watersheds_path, usle_path, sed_export_path, sed_retention_path,
        watershed_results_sdr_path):
    """Create shapefile with USLE, sed export, and sed retention fields."""
    field_summaries = {
        'usle_tot': pygeoprocessing.zonal_statistics(
            (usle_path, 1), watersheds_path, 'ws_id'),
        'sed_export': pygeoprocessing.zonal_statistics(
            (sed_export_path, 1), watersheds_path, 'ws_id'),
        'sed_retent': pygeoprocessing.zonal_statistics(
            (sed_retention_path, 1), watersheds_path, 'ws_id'),
        }

    original_datasource = gdal.OpenEx(watersheds_path, gdal.OF_VECTOR)
    # Delete if existing shapefile with the same name and path
    if os.path.isfile(watershed_results_sdr_path):
        os.remove(watershed_results_sdr_path)
    driver = gdal.GetDriverByName('ESRI Shapefile')
    datasource_copy = driver.CreateCopy(
        watershed_results_sdr_path, original_datasource)
    layer = datasource_copy.GetLayer()

    for field_name in field_summaries:
        field_def = ogr.FieldDefn(field_name, ogr.OFTReal)
        field_def.SetWidth(24)
        field_def.SetPrecision(11)
        layer.CreateField(field_def)

    # initialize each feature field to 0.0
    for feature_id in xrange(layer.GetFeatureCount()):
        feature = layer.GetFeature(feature_id)
        for field_name in field_summaries:
            ws_id = feature.GetFieldAsInteger('ws_id')
            feature.SetField(
                field_name, float(field_summaries[field_name][ws_id]['sum']))
        layer.SetFeature(feature)
コード例 #8
0
def check_guam_rasters():
    """double check rasters uploaded to CREST."""
    target_raster_dir = "D:/NFWF_PhaseIII/Guam/FOR CREST/for_upload"
    input_path_list = [
        f for f in os.listdir(target_raster_dir) if f.endswith('.tif')]
    for input_bn in input_path_list:
        input_path = os.path.join(target_raster_dir, input_bn)
        raster_info = pygeoprocessing.get_raster_info(input_path)
        pixel_size = raster_info['pixel_size'][0]
        nodata = raster_info['nodata'][0]
        datatype = raster_info['datatype']
        max_val = pygeoprocessing.zonal_statistics(
            (input_path, 1), _BOUNDARY_SHP)[0]['max']
        print("stats for {}:".format(os.path.basename(input_path)))
        print("datatype: {}, pixelsize: {}, nodata: {}, max val: {}".format(
            datatype, pixel_size, nodata, max_val))
コード例 #9
0
def create_local_buffer_region(raster_to_sample_path, sample_point_lat_lng_wkt,
                               buffer_vector_path):
    """Creates a buffer geometry from a point and samples the raster.

    Parameters:
        raster_to_sample_path (str): path to a raster to sum the values on.
        buffer_vector_path (str): path to a target vector created by this call
            centered on `sample_point_lat_lng_wkt` in a local coordinate system.

    """
    sample_point = ogr.CreateGeometryFromWkt(sample_point_lat_lng_wkt)
    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)

    target_srs = osr.SpatialReference()
    target_srs.ImportFromWkt(
        pygeoprocessing.get_raster_info(raster_to_sample_path)['projection'])

    coord_trans = osr.CoordinateTransformation(wgs84_srs, target_srs)
    sample_point.Transform(coord_trans)
    buffer_geom = sample_point.Buffer(10000)

    target_driver = gdal.GetDriverByName('GPKG')
    target_vector = target_driver.Create(buffer_vector_path, 0, 0, 0,
                                         gdal.GDT_Unknown)
    layer_name = os.path.splitext(os.path.basename(buffer_vector_path))[0]
    target_layer = target_vector.CreateLayer(layer_name, target_srs,
                                             ogr.wkbPolygon)
    target_layer.CreateField(ogr.FieldDefn('sum', ogr.OFTReal))
    feature_defn = target_layer.GetLayerDefn()
    buffer_feature = ogr.Feature(feature_defn)
    buffer_feature.SetGeometry(buffer_geom)
    target_layer.CreateFeature(buffer_feature)
    target_layer.SyncToDisk()

    buffer_stats = pygeoprocessing.zonal_statistics(
        (raster_to_sample_path, 1),
        buffer_vector_path,
        polygons_might_overlap=False,
        working_dir=CHURN_DIR)
    LOGGER.debug(buffer_stats)
    buffer_feature.SetField('sum', buffer_stats[1]['sum'])
    target_layer.SetFeature(buffer_feature)
    target_layer.SyncToDisk()
    target_layer = None
    target_vector = None
コード例 #10
0
def calc_rankings(save_as):
    """Calculate zonal mean index inside community footprints.

    Args:
        save_as (string): the path to save zonal mean statistics.

    """
    zonal_dict = {
        'fid': [],
        # 'threat_mean': [],
        'exposure_mean': [],
    }
    
    # zonal mean of threat
    # print("calculating zonal threat")
    # threat_stats = pygeoprocessing.zonal_statistics(
    #     (_THREAT_PATH, 1), _FOOTPRINTS_PATH)
    # zonal mean of exposure
    print("calculating zonal exposure")
    exposure_stats = pygeoprocessing.zonal_statistics(
        (_EXPOSURE_PATH, 1), _FOOTPRINTS_PATH)
    
    for fid in exposure_stats:
        try:
            exposure_mean = (
                exposure_stats[fid]['sum'] / exposure_stats[fid]['count'])
        except ZeroDivisionError:
            # do not include communities that lie outside threat index
            continue
        zonal_dict['fid'].append(fid)
        # zonal_dict['threat_mean'].append(
        #     (threat_stats[fid]['sum'] / threat_stats[fid]['count']))
        zonal_dict['exposure_mean'].append(exposure_mean)
    zonal_df = pandas.DataFrame.from_dict(zonal_dict, orient='columns')

    # add NAMELSAD field so that these can be compared to STA table
    aggregate_vector = gdal.OpenEx(_FOOTPRINTS_PATH, gdal.OF_VECTOR)
    aggregate_layer = aggregate_vector.GetLayer()
    fid_list = [feature.GetFID() for feature in aggregate_layer]
    name_list = [feature.GetField('NAMELSAD') for feature in aggregate_layer]
    match_dict = {'fid': fid_list, 'NAMELSAD': name_list}
    match_df = pandas.DataFrame.from_dict(match_dict, orient='columns')
    zonal_plus_fid = zonal_df.merge(
        match_df, how='outer', on='fid', suffixes=(None, None))
    zonal_plus_fid.to_csv(save_as)
コード例 #11
0
def zonal_stats_tofile(base_vector_path, raster_path, target_stats_pickle):
    """Calculate zonal statistics for watersheds and write results to a file.

    Args:
        base_vector_path (string): Path to the watershed shapefile in the
            output workspace.
        raster_path (string): Path to raster to aggregate.
        target_stats_pickle (string): Path to pickle file to store dictionary
            returned by zonal stats.

    Returns:
        None

    """
    ws_stats_dict = pygeoprocessing.zonal_statistics(
        (raster_path, 1), base_vector_path, ignore_nodata=True)
    with open(target_stats_pickle, 'wb') as picklefile:
        picklefile.write(pickle.dumps(ws_stats_dict))
コード例 #12
0
def _pickle_zonal_stats(base_vector_path, base_raster_path,
                        target_pickle_path):
    """Calculate Zonal Stats for a vector/raster pair and pickle result.

    Parameters:
        base_vector_path (str): path to vector file
        base_raster_path (str): path to raster file to aggregate over.
        target_pickle_path (str): path to desired target pickle file that will
            be a pickle of the pygeoprocessing.zonal_stats function.

    Returns:
        None.

    """
    zonal_stats = pygeoprocessing.zonal_statistics((base_raster_path, 1),
                                                   base_vector_path)
    with open(target_pickle_path, 'wb') as pickle_file:
        pickle.dump(zonal_stats, pickle_file)
コード例 #13
0
def num_nodata_pixels(raster_list, aoi_path):
    """Count number of nodata values across a list of rasters inside an aoi.

    Args:
        raster_list (list): list of paths to rasters that should be summarized
        aoi_path (string): path to polygon vector defining the aoi. Should have
            a single feature

    Returns:
        number of pixels with valid values within the aoi across rasters in
            raster_list
    """
    running_count = 0
    for path in raster_list:
        zonal_stat_dict = pygeoprocessing.zonal_statistics((path, 1), aoi_path)
        if len([*zonal_stat_dict]) > 1:
            raise ValueError("Vector path contains >1 feature")
        running_count = running_count + zonal_stat_dict[0]['nodata_count']
    return running_count
コード例 #14
0
ファイル: globio.py プロジェクト: phargogh/invest
def _summarize_results_in_aoi(aoi_path, summary_aoi_path, msa_path):
    """Aggregate MSA results to AOI polygons with zonal statistics.

    Parameters:
        aoi_path (string): path to aoi shapefile containing polygons.
        summary_aoi_path (string):
            path to copy of aoi shapefile with summary stats added.
        msa_path (string): path to msa results raster to summarize.

    Returns:
        None

    """
    # copy the aoi to an output shapefile
    original_datasource = gdal.OpenEx(aoi_path, gdal.OF_VECTOR | gdal.GA_ReadOnly)
    # Delete if existing shapefile with the same name
    if os.path.isfile(summary_aoi_path):
        os.remove(summary_aoi_path)
    # Copy the input shapefile into the designated output folder
    driver = gdal.GetDriverByName('ESRI Shapefile')
    datasource_copy = driver.CreateCopy(
        summary_aoi_path, original_datasource)
    layer = datasource_copy.GetLayer()
    msa_summary_field_def = ogr.FieldDefn('msa_mean', ogr.OFTReal)
    msa_summary_field_def.SetWidth(24)
    msa_summary_field_def.SetPrecision(11)
    layer.CreateField(msa_summary_field_def)
    layer.SyncToDisk()

    msa_summary = pygeoprocessing.zonal_statistics(
        (msa_path, 1), summary_aoi_path)
    for feature in layer:
        feature_fid = feature.GetFID()
        # count == 0 if polygon outside raster bounds or only over nodata
        if msa_summary[feature_fid]['count'] != 0:
            field_val = (
                float(msa_summary[feature_fid]['sum'])
                / float(msa_summary[feature_fid]['count']))
            feature.SetField('msa_mean', field_val)
            layer.SetFeature(feature)
コード例 #15
0
def _aggregate_and_pickle_total(base_raster_path_band, aggregate_vector_path,
                                target_pickle_path):
    """Aggregate base raster path to vector path FIDs and pickle result.

    Parameters:
        base_raster_path_band (tuple): raster/path band to aggregate over.
        aggregate_vector_path (string): path to vector to use geometry to
            aggregate over.
        target_pickle_path (string): path to a file that will contain the
            result of a pygeoprocessing.zonal_statistics call over
            base_raster_path_band from aggregate_vector_path.

    Returns:
        None.

    """
    result = pygeoprocessing.zonal_statistics(
        base_raster_path_band,
        aggregate_vector_path,
        working_dir=os.path.dirname(target_pickle_path))

    with open(target_pickle_path, 'w') as target_pickle_file:
        pickle.dump(result, target_pickle_file)
コード例 #16
0
def aggregate_to_polygons(base_aggregate_vector_path,
                          target_aggregate_vector_path,
                          landcover_raster_projection, crop_to_landcover_table,
                          nutrient_table, yield_percentile_headers, output_dir,
                          file_suffix, target_aggregate_table_path):
    """Write table with aggregate results of yield and nutrient values.

    Use zonal statistics to summarize total observed and interpolated
    production and nutrient information for each polygon in
    base_aggregate_vector_path.

    Args:
        base_aggregate_vector_path (string): path to polygon vector
        target_aggregate_vector_path (string):
            path to re-projected copy of polygon vector
        landcover_raster_projection (string): a WKT projection string
        crop_to_landcover_table (dict): landcover codes keyed by crop names
        nutrient_table (dict): a lookup of nutrient values by crop in the
            form of nutrient_table[<crop>][<nutrient>].
        yield_percentile_headers (list): list of strings indicating percentiles
            at which yield was calculated.
        output_dir (string): the file path to the output workspace.
        file_suffix (string): string to appened to any output filenames.
        target_aggregate_table_path (string): path to 'aggregate_results.csv'
            in the output workspace

    Returns:
        None

    """
    # reproject polygon to LULC's projection
    pygeoprocessing.reproject_vector(base_aggregate_vector_path,
                                     landcover_raster_projection,
                                     target_aggregate_vector_path,
                                     driver_name='ESRI Shapefile')

    # loop over every crop and query with pgp function
    total_yield_lookup = {}
    total_nutrient_table = collections.defaultdict(
        lambda: collections.defaultdict(lambda: collections.defaultdict(float)
                                        ))
    for crop_name in crop_to_landcover_table:
        # convert 100g to Mg and fraction left over from refuse
        nutrient_factor = 1e4 * (
            1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
        # loop over percentiles
        for yield_percentile_id in yield_percentile_headers:
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            LOGGER.info("Calculating zonal stats for %s  %s", crop_name,
                        yield_percentile_id)
            total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)] = (
                pygeoprocessing.zonal_statistics(
                    (percentile_crop_production_raster_path, 1),
                    target_aggregate_vector_path))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_%s' %
                                                   (crop_name,
                                                    yield_percentile_id)]:
                    total_nutrient_table[nutrient_id][yield_percentile_id][
                        id_index] += (nutrient_factor * total_yield_lookup[
                            '%s_%s' %
                            (crop_name, yield_percentile_id)][id_index]['sum']
                                      * nutrient_table[crop_name][nutrient_id])

        # process observed
        observed_yield_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
        total_yield_lookup['%s_observed' %
                           crop_name] = (pygeoprocessing.zonal_statistics(
                               (observed_yield_path, 1),
                               target_aggregate_vector_path))
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
            for id_index in total_yield_lookup['%s_observed' % crop_name]:
                total_nutrient_table[nutrient_id]['observed'][id_index] += (
                    nutrient_factor *
                    total_yield_lookup['%s_observed' %
                                       crop_name][id_index]['sum'] *
                    nutrient_table[crop_name][nutrient_id])

    # report everything to a table
    with open(target_aggregate_table_path, 'w') as aggregate_table:
        # write header
        aggregate_table.write('FID,')
        aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
        aggregate_table.write(','.join([
            '%s_%s' % (nutrient_id, model_type)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
            for model_type in sorted(list(total_nutrient_table.values())[0])
        ]))
        aggregate_table.write('\n')

        # iterate by polygon index
        for id_index in list(total_yield_lookup.values())[0]:
            aggregate_table.write('%s,' % id_index)
            aggregate_table.write(','.join([
                str(total_yield_lookup[yield_header][id_index]['sum'])
                for yield_header in sorted(total_yield_lookup)
            ]))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for model_type in sorted(
                        list(total_nutrient_table.values())[0]):
                    aggregate_table.write(
                        ',%s' %
                        total_nutrient_table[nutrient_id][model_type][id_index]
                    )
            aggregate_table.write('\n')
コード例 #17
0
vector_info = pygeoprocessing.get_vector_info(fetch_ray_vector_path)
model_resolution = 500
file_suffix = ''
base_bathy_path = 'C:/Users/dmf/projects/invest/data/invest-sample-data/Base_Data/Marine/DEMs/global_dem'
target_bathy_path = 'bathy_utm.tif'
working_dir = 'temp_zonal_stats'
target_fetch_depth_path = 'fetch_depth_bahamas.gpkg'

start = time.time()

cv.clip_and_project_raster(base_bathy_path, vector_info['bounding_box'],
                           vector_info['projection'], model_resolution,
                           working_dir, file_suffix, target_bathy_path)

result = pygeoprocessing.zonal_statistics((target_bathy_path, 1),
                                          fetch_ray_vector_path,
                                          polygons_might_overlap=False,
                                          working_dir=working_dir)

shutil.copy(fetch_ray_vector_path, target_fetch_depth_path)

target_vector = gdal.OpenEx(target_fetch_depth_path,
                            gdal.OF_VECTOR | gdal.GA_Update)
target_layer = target_vector.GetLayer()
target_layer.CreateField(ogr.FieldDefn('depth', ogr.OFTReal))
target_layer.StartTransaction()

for feature in target_layer:
    fid = feature.GetFID()
    depth = 9999
    if result[fid]['count'] > 0:
        depth = float(result[fid]['sum']) / float(result[fid]['count'])
コード例 #18
0
def execute(args):
    """Crop Production Regression Model.

    This model will take a landcover (crop cover?), N, P, and K map and
    produce modeled yields, and a nutrient table.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_regression_yield_tables/[cropname]_*
              A ValueError is raised if strings don't match.
        args['fertilization_rate_table_path'] (string): path to CSV table
            that contains fertilization rates for the crops in the simulation,
            though it can contain additional crops not used in the simulation.
            The headers must be 'crop_name', 'nitrogen_rate',
            'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the
            name string used to identify crops in the
            'landcover_to_crop_table_path', and rates are in units kg/Ha.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    LOGGER.info(
        "Calculating total land area and warning if the landcover raster "
        "is missing lucodes")
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
        args['fertilization_rate_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_lucodes = [
        x[_EXPECTED_LUCODE_TABLE_HEADER]
        for x in crop_to_landcover_table.itervalues()
    ]

    unique_lucodes = numpy.array([])
    total_area = 0.0
    for _, lu_band_data in pygeoprocessing.iterblocks(
            args['landcover_raster_path']):
        unique_block = numpy.unique(lu_band_data)
        unique_lucodes = numpy.unique(
            numpy.concatenate((unique_lucodes, unique_block)))
        total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD))

    missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes))
    if len(missing_lucodes) > 0:
        LOGGER.warn(
            "The following lucodes are in the landcover to crop table but "
            "aren't in the landcover raster: %s", missing_lucodes)

    LOGGER.info("Checking that crops correspond to known types.")
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            raise ValueError(
                "Expected climate bin map called %s for crop %s "
                "specified in %s", crop_climate_bin_raster_path, crop_name,
                args['landcover_to_crop_table_path'])

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        crop_regression_table_path = os.path.join(
            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

        crop_regression_table = utils.build_lookup_from_csv(
            crop_regression_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True,
            warn_if_missing=False)
        for bin_id in crop_regression_table:
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
                if crop_regression_table[bin_id][header.lower()] == '':
                    crop_regression_table[bin_id][header.lower()] = 0.0

        yield_regression_headers = [
            x for x in crop_regression_table.itervalues().next()
            if x != 'climate_bin'
        ]

        clipped_climate_bin_raster_path_info = (
            pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path))

        regression_parameter_raster_path_lookup = {}
        for yield_regression_id in yield_regression_headers:
            # there are extra headers in that table
            if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS:
                continue
            LOGGER.info("Map %s to climate bins.", yield_regression_id)
            regression_parameter_raster_path_lookup[yield_regression_id] = (
                os.path.join(
                    output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN %
                    (crop_name, yield_regression_id, file_suffix)))
            bin_to_regression_value = dict([
                (bin_id, crop_regression_table[bin_id][yield_regression_id])
                for bin_id in crop_regression_table
            ])
            bin_to_regression_value[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_regression_parameter_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN %
                (crop_name, yield_regression_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_regression_value,
                coarse_regression_parameter_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info("Interpolate %s %s parameter to landcover resolution.",
                        crop_name, yield_regression_id)
            pygeoprocessing.warp_raster(
                coarse_regression_parameter_raster_path,
                landcover_raster_info['pixel_size'],
                regression_parameter_raster_path_lookup[yield_regression_id],
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

        # the regression model has identical mathematical equations for
        # the nitrogen, phosporous, and potassium.  The only difference is
        # the scalars in the equation.  So making a closure below to simplify
        # this coding so I don't repeat the same function 3 times for 3
        # almost identical raster_calculator calls.
        def _x_yield_op_gen(fert_rate):
            """Create a raster calc op given the fertlization rate."""
            def _x_yield_op(y_max, b_x, c_x, lulc_array):
                """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))"""
                result = numpy.empty(b_x.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) &
                              (lulc_array == crop_lucode))
                result[valid_mask] = y_max[valid_mask] * (
                    1 - b_x[valid_mask] *
                    numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha)
                return result

            return _x_yield_op

        LOGGER.info('Calc nitrogen yield')
        nitrogen_yield_raster_path = os.path.join(
            output_dir,
            _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_n'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']),
            nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc phosphorous yield')
        phosphorous_yield_raster_path = os.path.join(
            output_dir,
            _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_p2o5'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name]
                            ['phosphorous_rate']),
            phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc potassium yield')
        potassium_yield_raster_path = os.path.join(
            output_dir,
            _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_k2o'], 1),
             (regression_parameter_raster_path_lookup['c_k2o'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['potassium_rate']),
            potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc the min of N, K, and P')
        crop_production_raster_path = os.path.join(
            output_dir,
            _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        def _min_op(y_n, y_p, y_k):
            """Calculate the min of the three inputs and multiply by Ymax."""
            result = numpy.empty(y_n.shape, dtype=numpy.float32)
            result[:] = _NODATA_YIELD
            valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) &
                          (y_p != _NODATA_YIELD))
            result[valid_mask] = (numpy.min(
                [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0))
            return result

        pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1),
                                           (phosphorous_yield_raster_path, 1),
                                           (potassium_yield_raster_path, 1)],
                                          _min_op, crop_production_raster_path,
                                          gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))
        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    nutrient_headers = [
        nutrient_id + '_' + mode
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for mode in ['modeled', 'observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' +
                           'production_observed,production_modeled,' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            yield_sum = 0.0
            for _, yield_block in pygeoprocessing.iterblocks(
                    crop_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[_NODATA_YIELD != yield_block])
            production_lookup['modeled'] = yield_sum
            result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                total_nutrient = (nutrient_factor *
                                  production_lookup['modeled'] *
                                  nutrient_table[crop_name][nutrient_id])
                result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            LOGGER.info("Calculating zonal stats for %s", crop_name)
            crop_production_raster_path = os.path.join(
                output_dir,
                _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_modeled' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (crop_production_raster_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_modeled' % crop_name]:
                    total_nutrient_table[nutrient_id]['modeled'][id_index] += (
                        nutrient_factor *
                        total_yield_lookup['%s_modeled' %
                                           crop_name][id_index]['sum'] *
                        nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')
コード例 #19
0
def _aggregate_carbon_map(aoi_vector_path, carbon_map_path,
                          target_aggregate_vector_path):
    """Helper function to aggregate carbon values for the given serviceshed.

    Generates a new shapefile that's a copy of 'aoi_vector_path' in
    'workspace_dir' with mean and sum values from the raster at
    'carbon_map_path'

    Args:
        aoi_vector_path (string): path to shapefile that will be used to
            aggregate raster at'carbon_map_path'.
        workspace_dir (string): path to a directory that function can copy
            the shapefile at aoi_vector_path into.
        carbon_map_path (string): path to raster that will be aggregated by
            the given serviceshed polygons
        target_aggregate_vector_path (string): path to an ESRI shapefile that
            will be created by this function as the aggregating output.

    Returns:
        None

    """
    aoi_vector = gdal.OpenEx(aoi_vector_path, gdal.OF_VECTOR)
    driver = gdal.GetDriverByName('ESRI Shapefile')

    if os.path.exists(target_aggregate_vector_path):
        os.remove(target_aggregate_vector_path)
    target_aggregate_vector = driver.CreateCopy(target_aggregate_vector_path,
                                                aoi_vector)
    aoi_vector = None
    target_aggregate_layer = target_aggregate_vector.GetLayer()

    # make an identifying id per polygon that can be used for aggregation
    while True:
        serviceshed_defn = target_aggregate_layer.GetLayerDefn()
        poly_id_field = str(uuid.uuid4())[-8:]
        if serviceshed_defn.GetFieldIndex(poly_id_field) == -1:
            break
    layer_id_field = ogr.FieldDefn(poly_id_field, ogr.OFTInteger)
    target_aggregate_layer.CreateField(layer_id_field)
    target_aggregate_layer.StartTransaction()
    for poly_index, poly_feat in enumerate(target_aggregate_layer):
        poly_feat.SetField(poly_id_field, poly_index)
        target_aggregate_layer.SetFeature(poly_feat)
    target_aggregate_layer.CommitTransaction()
    target_aggregate_layer.SyncToDisk()

    # aggregate carbon stocks by the new ID field
    serviceshed_stats = pygeoprocessing.zonal_statistics(
        (carbon_map_path, 1), target_aggregate_vector_path)

    # don't need a random poly id anymore
    target_aggregate_layer.DeleteField(
        serviceshed_defn.GetFieldIndex(poly_id_field))

    carbon_sum_field = ogr.FieldDefn('c_sum', ogr.OFTReal)
    carbon_sum_field.SetWidth(24)
    carbon_sum_field.SetPrecision(11)
    carbon_mean_field = ogr.FieldDefn('c_ha_mean', ogr.OFTReal)
    carbon_mean_field.SetWidth(24)
    carbon_mean_field.SetPrecision(11)

    target_aggregate_layer.CreateField(carbon_sum_field)
    target_aggregate_layer.CreateField(carbon_mean_field)

    target_aggregate_layer.ResetReading()
    target_aggregate_layer.StartTransaction()

    for poly_feat in target_aggregate_layer:
        poly_fid = poly_feat.GetFID()
        poly_feat.SetField('c_sum', serviceshed_stats[poly_fid]['sum'])
        # calculates mean pixel value per ha in for each feature in AOI
        poly_geom = poly_feat.GetGeometryRef()
        poly_area_ha = poly_geom.GetArea() / 1e4  # converts m^2 to hectare
        poly_geom = None
        poly_feat.SetField('c_ha_mean',
                           serviceshed_stats[poly_fid]['sum'] / poly_area_ha)

        target_aggregate_layer.SetFeature(poly_feat)
    target_aggregate_layer.CommitTransaction()
コード例 #20
0
def input_rankings(save_as):
    """Calculate zonal mean of threat index inputs.

    Args:
        save_as (string): the path to save zonal mean statistics.

    """
    # erosion_path = "D:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v2_101421/AK_erosion_v2_rc.tif"
    # flooding_path = "D:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v2_101421/AK_floodprone_v2.tif"
    erosion_path = "E:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v3_110921/AK_erosion_v3.tif"
    flooding_path = "E:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v3_110921/AK_floodprone_v3.tif"
    permafrost_path = "E:/NFWF_PhaseII/Alaska/Revise_threat_index/threat_v2_101421/AK_permafrost_v2.tif"
    zonal_dict = {
        'fid': [],
        'threat_mean': [],
        # 'erosion_mean': [],
        # 'flooding_mean': [],
        # 'permafrost_mean': [],
    }
    
    # zonal mean of threat
    print("calculating zonal threat")
    threat_stats = pygeoprocessing.zonal_statistics(
        (_THREAT_PATH, 1), _FOOTPRINTS_PATH)
    # zonal mean of erosion
    # print("calculating zonal erosion")
    # erosion_stats = pygeoprocessing.zonal_statistics(
    #     (erosion_path, 1), _FOOTPRINTS_PATH)
    # # zonal mean of flooding
    # print("calculating zonal flooding")
    # flooding_stats = pygeoprocessing.zonal_statistics(
    #     (flooding_path, 1), _FOOTPRINTS_PATH)
    # # zonal mean permafrost
    # print("calculating zonal permafrost")
    # permafrost_stats = pygeoprocessing.zonal_statistics(
    #     (permafrost_path, 1), _FOOTPRINTS_PATH)
    
    for fid in threat_stats:
        try:
            threat_mean = threat_stats[fid]['sum'] / threat_stats[fid]['count']            
        except ZeroDivisionError:
            # do not include communities that lie outside threat index
            continue
        zonal_dict['fid'].append(fid)
        zonal_dict['threat_mean'].append(threat_mean)
        # zonal_dict['erosion_mean'].append(
        #     (erosion_stats[fid]['sum'] / erosion_stats[fid]['count']))
        # zonal_dict['flooding_mean'].append(
        #     (flooding_stats[fid]['sum'] / flooding_stats[fid]['count']))
        # zonal_dict['permafrost_mean'].append(
        #     (permafrost_stats[fid]['sum'] /
        #         permafrost_stats[fid]['count']))
    zonal_df = pandas.DataFrame.from_dict(zonal_dict, orient='columns')
    # zonal_df['threat_rank'] = zonal_df[
    #     'threat_mean'].rank(method='dense', ascending=False)
    # zonal_df['erosion_rank'] = zonal_df[
    #     'erosion_mean'].rank(method='dense', ascending=False)
    # zonal_df['flooding_rank'] = zonal_df[
    #     'flooding_mean'].rank(method='dense', ascending=False)
    # zonal_df['permafrost_rank'] = zonal_df[
    #     'permafrost_mean'].rank(method='dense', ascending=False)

    # add NAMELSAD field so that these can be compared to STA table
    aggregate_vector = gdal.OpenEx(_FOOTPRINTS_PATH, gdal.OF_VECTOR)
    aggregate_layer = aggregate_vector.GetLayer()
    fid_list = [feature.GetFID() for feature in aggregate_layer]
    name_list = [feature.GetField('NAMELSAD') for feature in aggregate_layer]
    match_dict = {'fid': fid_list, 'NAMELSAD': name_list}
    match_df = pandas.DataFrame.from_dict(match_dict, orient='columns')
    zonal_plus_fid = zonal_df.merge(
        match_df, how='outer', on='fid', suffixes=(None, None))
    zonal_plus_fid.to_csv(save_as)
コード例 #21
0
def summarize_outputs(base_data):
    """Summarize outputs from runs of RPM."""
    def perc_change(baseline_ar, scenario_ar):
        """Calculate percent change from baseline."""
        valid_mask = ((~numpy.isclose(baseline_ar, input_nodata)) &
                      (~numpy.isclose(scenario_ar, input_nodata)))
        result = numpy.empty(baseline_ar.shape, dtype=numpy.float32)
        result[:] = input_nodata
        result[valid_mask] = (
            (scenario_ar[valid_mask] - baseline_ar[valid_mask]) /
            baseline_ar[valid_mask] * 100)
        return result

    mean_val_dict = {
        'run_id': [],
        'output': [],
        'year': [],
        'pixel_mean': [],
    }

    diet_sufficiency_summary_dict = {
        'run_id': [],
        'year': [],
        'month': [],
        'aggregation_method': [],
        'pixel_mean': [],
    }

    perc_change_summary_dict = {
        'run_id': [],
        'output': [],
        'year': [],
        'mean_perc_change': [],
        'min_perc_change': [],
        'max_perc_change': [],
    }
    for run_id in base_data['run_list']:
        run_output_dir = os.path.join(base_data['outer_dir'], run_id, 'output')
        for output_bn in base_data['output_list']:
            for year in base_data['year_list']:
                year_raster_list = [
                    os.path.join(run_output_dir, '{}_{}_{}.tif').format(
                        output_bn, year, month) for month in range(1, 13)
                ]
                input_nodata = pygeoprocessing.get_raster_info(
                    year_raster_list[0])['nodata'][0]
                yearly_mean_path = os.path.join(
                    base_data['summary_output_dir'],
                    'yearly_mean_{}_{}_{}.tif'.format(output_bn, year, run_id))
                raster_list_mean(year_raster_list, input_nodata,
                                 yearly_mean_path, input_nodata)

                # descriptive statistics: monthly average across pixels
                stat_df = summarize_pixel_distribution(yearly_mean_path)
                mean_val_dict['run_id'].append(run_id)
                mean_val_dict['output'].append(output_bn)
                mean_val_dict['year'].append(year)
                mean_val_dict['pixel_mean'].append(stat_df['mean'])

        # number of months where average diet sufficiency across aoi was > 1
        for year in base_data['year_list']:
            for month in range(1, 13):
                output_path = os.path.join(
                    run_output_dir,
                    'diet_sufficiency_{}_{}.tif').format(year, month)
                zonal_stat_dict = pygeoprocessing.zonal_statistics(
                    (output_path, 1), base_data['aoi_path'])
                try:
                    mean_value = (float(zonal_stat_dict[0]['sum']) /
                                  zonal_stat_dict[0]['count'])
                except ZeroDivisionError:
                    mean_value = 'NA'
                diet_sufficiency_summary_dict['run_id'].append(run_id)
                diet_sufficiency_summary_dict['year'].append(year)
                diet_sufficiency_summary_dict['month'].append(month)
                diet_sufficiency_summary_dict['aggregation_method'].append(
                    'average_across_pixels')
                diet_sufficiency_summary_dict['pixel_mean'].append(mean_value)

    # summarize percent change from baseline
    for run_id in base_data['run_list']:
        if run_id == 'A':
            continue
        run_output_dir = os.path.join(base_data['outer_dir'], run_id, 'output')
        for output_bn in base_data['output_list']:
            for year in base_data['year_list']:
                baseline_path = os.path.join(
                    base_data['summary_output_dir'],
                    'yearly_mean_{}_{}_A.tif'.format(output_bn, year))
                scenario_path = os.path.join(
                    base_data['summary_output_dir'],
                    'yearly_mean_{}_{}_{}.tif'.format(output_bn, year, run_id))
                perc_change_path = os.path.join(
                    base_data['summary_output_dir'],
                    'perc_change_yearly_mean_{}_{}_{}.tif'.format(
                        output_bn, year, run_id))
                pygeoprocessing.raster_calculator(
                    [(path, 1)
                     for path in [baseline_path, scenario_path]], perc_change,
                    perc_change_path, gdal.GDT_Float32, input_nodata)
                # descriptive statistics: monthly average across pixels
                stat_df = summarize_pixel_distribution(perc_change_path)
                perc_change_summary_dict['run_id'].append(run_id)
                perc_change_summary_dict['output'].append(output_bn)
                perc_change_summary_dict['year'].append(year)
                perc_change_summary_dict['mean_perc_change'].append(
                    stat_df['mean'])
                perc_change_summary_dict['min_perc_change'].append(
                    stat_df['min'])
                perc_change_summary_dict['max_perc_change'].append(
                    stat_df['max'])

    summary_df = pandas.DataFrame(mean_val_dict)
    save_as = os.path.join(base_data['summary_output_dir'],
                           'average_value_summary.csv')
    summary_df.to_csv(save_as, index=False)

    diet_suff_df = pandas.DataFrame(diet_sufficiency_summary_dict)
    save_as = os.path.join(base_data['summary_output_dir'],
                           'monthly_diet_suff_summary.csv')
    diet_suff_df.to_csv(save_as, index=False)

    perc_change_df = pandas.DataFrame(perc_change_summary_dict)
    save_as = os.path.join(base_data['summary_output_dir'],
                           'perc_change_summary.csv')
    perc_change_df.to_csv(save_as, index=False)
コード例 #22
0
def _aggregate_recharge(
        aoi_path, l_path, vri_path, aggregate_vector_path):
    """Aggregate recharge values for the provided watersheds/AOIs.

    Generates a new shapefile that's a copy of 'aoi_path' in sum values from L
    and Vri.

    Parameters:
        aoi_path (string): path to shapefile that will be used to
            aggregate rasters
        l_path (string): path to (L) local recharge raster
        vri_path (string): path to Vri raster
        aggregate_vector_path (string): path to shapefile that will be created
            by this function as the aggregating output.  will contain fields
            'l_sum' and 'vri_sum' per original feature in `aoi_path`.  If this
            file exists on disk prior to the call it is overwritten with
            the result of this call.

    Returns:
        None
    """
    if os.path.exists(aggregate_vector_path):
        LOGGER.warning(
            '%s exists, deleting and writing new output',
            aggregate_vector_path)
        os.remove(aggregate_vector_path)

    original_aoi_vector = gdal.OpenEx(aoi_path, gdal.OF_VECTOR)

    driver = gdal.GetDriverByName('ESRI Shapefile')
    driver.CreateCopy(aggregate_vector_path, original_aoi_vector)
    gdal.Dataset.__swig_destroy__(original_aoi_vector)
    original_aoi_vector = None
    aggregate_vector = gdal.OpenEx(aggregate_vector_path, 1)
    aggregate_layer = aggregate_vector.GetLayer()

    for raster_path, aggregate_field_id, op_type in [
            (l_path, 'qb', 'mean'), (vri_path, 'vri_sum', 'sum')]:

        # aggregate carbon stocks by the new ID field
        aggregate_stats = pygeoprocessing.zonal_statistics(
            (raster_path, 1), aggregate_vector_path)

        aggregate_field = ogr.FieldDefn(aggregate_field_id, ogr.OFTReal)
        aggregate_field.SetWidth(24)
        aggregate_field.SetPrecision(11)
        aggregate_layer.CreateField(aggregate_field)

        aggregate_layer.ResetReading()
        for poly_index, poly_feat in enumerate(aggregate_layer):
            if op_type == 'mean':
                pixel_count = aggregate_stats[poly_index]['count']
                if pixel_count != 0:
                    value = (aggregate_stats[poly_index]['sum'] / pixel_count)
                else:
                    LOGGER.warn(
                        "no coverage for polygon %s", ', '.join(
                            [str(poly_feat.GetField(_)) for _ in range(
                                poly_feat.GetFieldCount())]))
                    value = 0.0
            elif op_type == 'sum':
                value = aggregate_stats[poly_index]['sum']
            poly_feat.SetField(aggregate_field_id, float(value))
            aggregate_layer.SetFeature(poly_feat)

    aggregate_layer.SyncToDisk()
    aggregate_layer = None
    gdal.Dataset.__swig_destroy__(aggregate_vector)
    aggregate_vector = None
コード例 #23
0
def execute(args):
    """Crop Production Percentile Model.

    This model will take a landcover (crop cover?) map and produce yields,
    production, and observed crop yields, a nutrient table, and a clipped
    observed map.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_bin_maps/[cropname]_*
              A ValueError is raised if strings don't match.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            bad_crop_name_list.append(crop_name)
    if len(bad_crop_name_list) > 0:
        raise ValueError(
            "The following crop names were provided in %s but no such crops "
            "exist for this model: %s" %
            (args['landcover_to_crop_table_path'], bad_crop_name_list))

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.build_lookup_from_csv(
            climate_percentile_yield_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True)
        yield_percentile_headers = [
            x for x in crop_climate_percentile_table.itervalues().next()
            if x != 'climate_bin'
        ]

        for yield_percentile_id in yield_percentile_headers:
            LOGGER.info("Map %s to climate bins.", yield_percentile_id)
            interpolated_yield_percentile_raster_path = os.path.join(
                output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            bin_to_percentile_yield = dict([
                (bin_id,
                 crop_climate_percentile_table[bin_id][yield_percentile_id])
                for bin_id in crop_climate_percentile_table
            ])
            bin_to_percentile_yield[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_yield_percentile_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield,
                coarse_yield_percentile_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info(
                "Interpolate %s %s yield raster to landcover resolution.",
                crop_name, yield_percentile_id)
            pygeoprocessing.warp_raster(
                coarse_yield_percentile_raster_path,
                landcover_raster_info['pixel_size'],
                interpolated_yield_percentile_raster_path,
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

            LOGGER.info("Calculate yield for %s at %s", crop_name,
                        yield_percentile_id)
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))

            def _crop_production_op(lulc_array, yield_array):
                """Mask in yields that overlap with `crop_lucode`."""
                result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = lulc_array != landcover_nodata
                lulc_mask = lulc_array == crop_lucode
                result[valid_mask] = 0
                result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha)
                return result

            pygeoprocessing.raster_calculator(
                [(args['landcover_raster_path'], 1),
                 (interpolated_yield_percentile_raster_path, 1)],
                _crop_production_op, percentile_crop_production_raster_path,
                gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop, assuming that
        # all the percentile rasters have non-zero production so it's okay to
        # use just one of the percentile rasters
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                percentile_crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))

        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    production_percentile_headers = [
        'production_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for yield_percentile_id in sorted(yield_percentile_headers)
    ]
    nutrient_headers = [
        nutrient_id + '_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for yield_percentile_id in sorted(yield_percentile_headers) +
        ['yield_observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' + 'production_observed,' +
                           ','.join(production_percentile_headers) + ',' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            for yield_percentile_id in sorted(yield_percentile_headers):
                yield_percentile_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                yield_sum = 0.0
                for _, yield_block in pygeoprocessing.iterblocks(
                        yield_percentile_raster_path):
                    yield_sum += numpy.sum(
                        yield_block[_NODATA_YIELD != yield_block])
                production_lookup[yield_percentile_id] = yield_sum
                result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for yield_percentile_id in sorted(yield_percentile_headers):
                    total_nutrient = (nutrient_factor *
                                      production_lookup[yield_percentile_id] *
                                      nutrient_table[crop_name][nutrient_id])
                    result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            # loop over percentiles
            for yield_percentile_id in yield_percentile_headers:
                percentile_crop_production_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                LOGGER.info("Calculating zonal stats for %s  %s", crop_name,
                            yield_percentile_id)
                total_yield_lookup[
                    '%s_%s' % (crop_name, yield_percentile_id)] = (
                        pygeoprocessing.zonal_statistics(
                            (percentile_crop_production_raster_path, 1),
                            target_aggregate_vector_path,
                            str(args['aggregate_polygon_id'])))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for id_index in total_yield_lookup['%s_%s' %
                                                       (crop_name,
                                                        yield_percentile_id)]:
                        total_nutrient_table[nutrient_id][yield_percentile_id][
                            id_index] += (
                                nutrient_factor * total_yield_lookup[
                                    '%s_%s' %
                                    (crop_name,
                                     yield_percentile_id)][id_index]['sum'] *
                                nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')