Example #1
0
def aggregate_results(base_aggregate_areas_path, target_vector_path, srs_wkt,
                      aggregations):
    """Aggregate outputs into regions of interest.

    Args:
        base_aggregate_areas_path (str): path to vector of polygon(s) to
            aggregate over. This is the original input.
        target_vector_path (str): path to write out the results. This will be a
            copy of the base vector with added fields, reprojected to the
            target WKT and saved in geopackage format.
        srs_wkt (str): a Well-Known Text representation of the target spatial
            reference. The base vector is reprojected to this spatial reference
            before aggregating the rasters over it.
        aggregations (list[tuple(str,str,str)]): list of tuples describing the
            datasets to aggregate. Each tuple has 3 items. The first is the
            path to a raster to aggregate. The second is the field name for
            this aggregated data in the output vector. The third is either
            'mean' or 'sum' indicating the aggregation to perform.

    Returns:
        None
    """
    pygeoprocessing.reproject_vector(base_aggregate_areas_path, srs_wkt,
                                     target_vector_path, driver_name='GPKG')
    aggregate_vector = gdal.OpenEx(target_vector_path, gdal.GA_Update)
    aggregate_layer = aggregate_vector.GetLayer()

    for raster_path, field_id, aggregation_op in aggregations:
        # aggregate the raster by the vector region(s)
        aggregate_stats = pygeoprocessing.zonal_statistics(
            (raster_path, 1), target_vector_path)

        # set up the field to hold the aggregate data
        aggregate_field = ogr.FieldDefn(field_id, ogr.OFTReal)
        aggregate_field.SetWidth(24)
        aggregate_field.SetPrecision(11)
        aggregate_layer.CreateField(aggregate_field)
        aggregate_layer.ResetReading()

        # save the aggregate data to the field for each feature
        for feature in aggregate_layer:
            feature_id = feature.GetFID()
            if aggregation_op == 'mean':
                pixel_count = aggregate_stats[feature_id]['count']
                try:
                    value = (aggregate_stats[feature_id]['sum'] / pixel_count)
                except ZeroDivisionError:
                    LOGGER.warning(
                        f'Polygon {feature_id} does not overlap {raster_path}')
                    value = 0.0
            elif aggregation_op == 'sum':
                value = aggregate_stats[feature_id]['sum']
            feature.SetField(field_id, float(value))
            aggregate_layer.SetFeature(feature)

    # save the aggregate vector layer and clean up references
    aggregate_layer.SyncToDisk()
    aggregate_layer = None
    gdal.Dataset.__swig_destroy__(aggregate_vector)
    aggregate_vector = None
Example #2
0
    def test_non_projected_layers(self):
        """HRA: test habitat and stressor layers that are not projected."""
        import natcap.invest.hra

        args = HraRegressionTests.generate_base_args(self.workspace_dir)
        _make_criteria_csv(args['criteria_table_path'], self.workspace_dir)
        _make_aoi_vector(args['aoi_vector_path'])

        # Make projected files and write their filepaths to info csv.
        info_table_path = os.path.join(self.workspace_dir, 'info.csv')
        _make_info_csv(
            info_table_path, self.workspace_dir, projected=True,
            rel_path=False)

        # create geographic spatial reference
        wgs84_srs = osr.SpatialReference()
        wgs84_srs.ImportFromEPSG(4326)
        wgs84_wkt = wgs84_srs.ExportToWkt()
        # move created habitat vector to a sub directory so the reprojected
        # file can be saved where the csv PATH expects it
        tmp_out = os.path.join(self.workspace_dir, 'tmp_move')
        os.mkdir(tmp_out)
        for filename in os.listdir(self.workspace_dir):
            if filename.startswith("habitat_0"):
                shutil.move(
                    os.path.join(self.workspace_dir, filename),
                    os.path.join(tmp_out, filename))
        habitat_path = os.path.join(tmp_out, 'habitat_0.shp')
        habitat_wgs84_path = os.path.join(self.workspace_dir, 'habitat_0.shp')
        # reproject habitat layer to geographic
        pygeoprocessing.reproject_vector(
            habitat_path, wgs84_wkt, habitat_wgs84_path)

        args['info_table_path'] = info_table_path

        with self.assertRaises(ValueError) as cm:
            natcap.invest.hra.execute(args)

        expected_message = "The following layer does not have a spatial"
        actual_message = str(cm.exception)
        self.assertTrue(expected_message in actual_message, actual_message)
Example #3
0
def aggregate_to_polygons(base_aggregate_vector_path,
                          target_aggregate_vector_path,
                          landcover_raster_projection, crop_to_landcover_table,
                          nutrient_table, yield_percentile_headers, output_dir,
                          file_suffix, target_aggregate_table_path):
    """Write table with aggregate results of yield and nutrient values.

    Use zonal statistics to summarize total observed and interpolated
    production and nutrient information for each polygon in
    base_aggregate_vector_path.

    Args:
        base_aggregate_vector_path (string): path to polygon vector
        target_aggregate_vector_path (string):
            path to re-projected copy of polygon vector
        landcover_raster_projection (string): a WKT projection string
        crop_to_landcover_table (dict): landcover codes keyed by crop names
        nutrient_table (dict): a lookup of nutrient values by crop in the
            form of nutrient_table[<crop>][<nutrient>].
        yield_percentile_headers (list): list of strings indicating percentiles
            at which yield was calculated.
        output_dir (string): the file path to the output workspace.
        file_suffix (string): string to appened to any output filenames.
        target_aggregate_table_path (string): path to 'aggregate_results.csv'
            in the output workspace

    Returns:
        None

    """
    # reproject polygon to LULC's projection
    pygeoprocessing.reproject_vector(base_aggregate_vector_path,
                                     landcover_raster_projection,
                                     target_aggregate_vector_path,
                                     driver_name='ESRI Shapefile')

    # loop over every crop and query with pgp function
    total_yield_lookup = {}
    total_nutrient_table = collections.defaultdict(
        lambda: collections.defaultdict(lambda: collections.defaultdict(float)
                                        ))
    for crop_name in crop_to_landcover_table:
        # convert 100g to Mg and fraction left over from refuse
        nutrient_factor = 1e4 * (
            1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
        # loop over percentiles
        for yield_percentile_id in yield_percentile_headers:
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            LOGGER.info("Calculating zonal stats for %s  %s", crop_name,
                        yield_percentile_id)
            total_yield_lookup['%s_%s' % (crop_name, yield_percentile_id)] = (
                pygeoprocessing.zonal_statistics(
                    (percentile_crop_production_raster_path, 1),
                    target_aggregate_vector_path))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_%s' %
                                                   (crop_name,
                                                    yield_percentile_id)]:
                    total_nutrient_table[nutrient_id][yield_percentile_id][
                        id_index] += (nutrient_factor * total_yield_lookup[
                            '%s_%s' %
                            (crop_name, yield_percentile_id)][id_index]['sum']
                                      * nutrient_table[crop_name][nutrient_id])

        # process observed
        observed_yield_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
        total_yield_lookup['%s_observed' %
                           crop_name] = (pygeoprocessing.zonal_statistics(
                               (observed_yield_path, 1),
                               target_aggregate_vector_path))
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
            for id_index in total_yield_lookup['%s_observed' % crop_name]:
                total_nutrient_table[nutrient_id]['observed'][id_index] += (
                    nutrient_factor *
                    total_yield_lookup['%s_observed' %
                                       crop_name][id_index]['sum'] *
                    nutrient_table[crop_name][nutrient_id])

    # report everything to a table
    with open(target_aggregate_table_path, 'w') as aggregate_table:
        # write header
        aggregate_table.write('FID,')
        aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
        aggregate_table.write(','.join([
            '%s_%s' % (nutrient_id, model_type)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
            for model_type in sorted(list(total_nutrient_table.values())[0])
        ]))
        aggregate_table.write('\n')

        # iterate by polygon index
        for id_index in list(total_yield_lookup.values())[0]:
            aggregate_table.write('%s,' % id_index)
            aggregate_table.write(','.join([
                str(total_yield_lookup[yield_header][id_index]['sum'])
                for yield_header in sorted(total_yield_lookup)
            ]))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for model_type in sorted(
                        list(total_nutrient_table.values())[0]):
                    aggregate_table.write(
                        ',%s' %
                        total_nutrient_table[nutrient_id][model_type][id_index]
                    )
            aggregate_table.write('\n')
def execute(args):
    """Crop Production Regression Model.

    This model will take a landcover (crop cover?), N, P, and K map and
    produce modeled yields, and a nutrient table.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_regression_yield_tables/[cropname]_*
              A ValueError is raised if strings don't match.
        args['fertilization_rate_table_path'] (string): path to CSV table
            that contains fertilization rates for the crops in the simulation,
            though it can contain additional crops not used in the simulation.
            The headers must be 'crop_name', 'nitrogen_rate',
            'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the
            name string used to identify crops in the
            'landcover_to_crop_table_path', and rates are in units kg/Ha.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    LOGGER.info(
        "Calculating total land area and warning if the landcover raster "
        "is missing lucodes")
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
        args['fertilization_rate_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_lucodes = [
        x[_EXPECTED_LUCODE_TABLE_HEADER]
        for x in crop_to_landcover_table.itervalues()
    ]

    unique_lucodes = numpy.array([])
    total_area = 0.0
    for _, lu_band_data in pygeoprocessing.iterblocks(
            args['landcover_raster_path']):
        unique_block = numpy.unique(lu_band_data)
        unique_lucodes = numpy.unique(
            numpy.concatenate((unique_lucodes, unique_block)))
        total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD))

    missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes))
    if len(missing_lucodes) > 0:
        LOGGER.warn(
            "The following lucodes are in the landcover to crop table but "
            "aren't in the landcover raster: %s", missing_lucodes)

    LOGGER.info("Checking that crops correspond to known types.")
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            raise ValueError(
                "Expected climate bin map called %s for crop %s "
                "specified in %s", crop_climate_bin_raster_path, crop_name,
                args['landcover_to_crop_table_path'])

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        crop_regression_table_path = os.path.join(
            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

        crop_regression_table = utils.build_lookup_from_csv(
            crop_regression_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True,
            warn_if_missing=False)
        for bin_id in crop_regression_table:
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
                if crop_regression_table[bin_id][header.lower()] == '':
                    crop_regression_table[bin_id][header.lower()] = 0.0

        yield_regression_headers = [
            x for x in crop_regression_table.itervalues().next()
            if x != 'climate_bin'
        ]

        clipped_climate_bin_raster_path_info = (
            pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path))

        regression_parameter_raster_path_lookup = {}
        for yield_regression_id in yield_regression_headers:
            # there are extra headers in that table
            if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS:
                continue
            LOGGER.info("Map %s to climate bins.", yield_regression_id)
            regression_parameter_raster_path_lookup[yield_regression_id] = (
                os.path.join(
                    output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN %
                    (crop_name, yield_regression_id, file_suffix)))
            bin_to_regression_value = dict([
                (bin_id, crop_regression_table[bin_id][yield_regression_id])
                for bin_id in crop_regression_table
            ])
            bin_to_regression_value[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_regression_parameter_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN %
                (crop_name, yield_regression_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_regression_value,
                coarse_regression_parameter_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info("Interpolate %s %s parameter to landcover resolution.",
                        crop_name, yield_regression_id)
            pygeoprocessing.warp_raster(
                coarse_regression_parameter_raster_path,
                landcover_raster_info['pixel_size'],
                regression_parameter_raster_path_lookup[yield_regression_id],
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

        # the regression model has identical mathematical equations for
        # the nitrogen, phosporous, and potassium.  The only difference is
        # the scalars in the equation.  So making a closure below to simplify
        # this coding so I don't repeat the same function 3 times for 3
        # almost identical raster_calculator calls.
        def _x_yield_op_gen(fert_rate):
            """Create a raster calc op given the fertlization rate."""
            def _x_yield_op(y_max, b_x, c_x, lulc_array):
                """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))"""
                result = numpy.empty(b_x.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) &
                              (lulc_array == crop_lucode))
                result[valid_mask] = y_max[valid_mask] * (
                    1 - b_x[valid_mask] *
                    numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha)
                return result

            return _x_yield_op

        LOGGER.info('Calc nitrogen yield')
        nitrogen_yield_raster_path = os.path.join(
            output_dir,
            _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_n'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']),
            nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc phosphorous yield')
        phosphorous_yield_raster_path = os.path.join(
            output_dir,
            _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_p2o5'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name]
                            ['phosphorous_rate']),
            phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc potassium yield')
        potassium_yield_raster_path = os.path.join(
            output_dir,
            _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_k2o'], 1),
             (regression_parameter_raster_path_lookup['c_k2o'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['potassium_rate']),
            potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc the min of N, K, and P')
        crop_production_raster_path = os.path.join(
            output_dir,
            _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        def _min_op(y_n, y_p, y_k):
            """Calculate the min of the three inputs and multiply by Ymax."""
            result = numpy.empty(y_n.shape, dtype=numpy.float32)
            result[:] = _NODATA_YIELD
            valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) &
                          (y_p != _NODATA_YIELD))
            result[valid_mask] = (numpy.min(
                [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0))
            return result

        pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1),
                                           (phosphorous_yield_raster_path, 1),
                                           (potassium_yield_raster_path, 1)],
                                          _min_op, crop_production_raster_path,
                                          gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))
        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    nutrient_headers = [
        nutrient_id + '_' + mode
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for mode in ['modeled', 'observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' +
                           'production_observed,production_modeled,' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            yield_sum = 0.0
            for _, yield_block in pygeoprocessing.iterblocks(
                    crop_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[_NODATA_YIELD != yield_block])
            production_lookup['modeled'] = yield_sum
            result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                total_nutrient = (nutrient_factor *
                                  production_lookup['modeled'] *
                                  nutrient_table[crop_name][nutrient_id])
                result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            LOGGER.info("Calculating zonal stats for %s", crop_name)
            crop_production_raster_path = os.path.join(
                output_dir,
                _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_modeled' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (crop_production_raster_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_modeled' % crop_name]:
                    total_nutrient_table[nutrient_id]['modeled'][id_index] += (
                        nutrient_factor *
                        total_yield_lookup['%s_modeled' %
                                           crop_name][id_index]['sum'] *
                        nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')
Example #5
0
def _build_affected_vector(base_watershed_vector_path, target_wkt,
                           damage_table_path, built_infrastructure_vector_path,
                           target_watershed_result_vector_path):
    """Construct the affected area vector.

    The ``base_watershed_vector_path`` will be intersected with the
    ``built_infrastructure_vector_path`` to get the affected build area.

    Parameters:
        base_watershed_vector_path (str): path to base watershed vector,
        target_wkt (str): desired target projection.
        damage_table_path (None or str): path to a CSV table containing fields
            'Type' and 'Damage'. For every value of 'Type' in the
            built_infrastructure_vector there must be a corresponding entry
            in this table. If None, this field is ignored.
        built_infrastructure_vector_path (str): path to infrastructure vector
            containing at least the integer field 'Type'.
        target_watershed_result_vector_path (str): path to desired target
            watershed result vector that will have an additional field called
            'aff_bld'.

    Returns:
        None.

    """
    if damage_table_path is not None and damage_table_path != '':
        damage_type_map = utils.build_lookup_from_csv(damage_table_path,
                                                      'type',
                                                      to_lower=True,
                                                      warn_if_missing=True)
    else:
        damage_type_map = None

    if os.path.exists(target_watershed_result_vector_path):
        LOGGER.warn('%s exists, removing to make a current one',
                    target_watershed_result_vector_path)
        os.remove(target_watershed_result_vector_path)

    pygeoprocessing.reproject_vector(base_watershed_vector_path,
                                     target_wkt,
                                     target_watershed_result_vector_path,
                                     driver_name='GPKG')

    target_srs = osr.SpatialReference()
    target_srs.ImportFromWkt(target_wkt)

    infrastructure_rtree = rtree.index.Index()
    infrastructure_geometry_list = []
    infrastructure_vector = gdal.OpenEx(built_infrastructure_vector_path,
                                        gdal.OF_VECTOR)
    infrastructure_layer = infrastructure_vector.GetLayer()

    infrastructure_srs = infrastructure_layer.GetSpatialRef()
    infrastructure_to_target = osr.CoordinateTransformation(
        infrastructure_srs, target_srs)

    infrastructure_layer_defn = infrastructure_layer.GetLayerDefn()
    for field_name in ['type', 'Type', 'TYPE']:
        type_index = infrastructure_layer_defn.GetFieldIndex(field_name)
        if type_index != -1:
            break
    if type_index == -1:
        raise ValueError("Could not find field 'Type' in %s",
                         built_infrastructure_vector_path)

    LOGGER.info("building infrastructure lookup dict")
    for infrastructure_feature in infrastructure_layer:
        infrastructure_geom = infrastructure_feature.GetGeometryRef().Clone()
        infrastructure_geom.Transform(infrastructure_to_target)
        infrastructure_geometry_list.append({
            'geom':
            shapely.wkb.loads(infrastructure_geom.ExportToWkb()),
        })
        if damage_type_map is not None:
            infrastructure_geometry_list[-1]['damage'] = (damage_type_map[
                infrastructure_feature.GetField(type_index)]['damage'])
        infrastructure_rtree.insert(
            len(infrastructure_geometry_list) - 1,
            infrastructure_geometry_list[-1]['geom'].bounds)

    infrastructure_vector = None
    infrastructure_layer = None

    watershed_vector = gdal.OpenEx(target_watershed_result_vector_path,
                                   gdal.OF_VECTOR | gdal.OF_UPDATE)
    watershed_layer = watershed_vector.GetLayer()
    watershed_layer.CreateField(ogr.FieldDefn('aff_bld', ogr.OFTReal))
    watershed_layer.SyncToDisk()

    last_time = time.time()
    for watershed_index, watershed_feature in enumerate(watershed_layer):
        current_time = time.time()
        if current_time - last_time > 5.0:
            LOGGER.info("processing watershed result %.2f%%",
                        (100.0 * (watershed_index + 1)) /
                        watershed_layer.GetFeatureCount())
            last_time = current_time
        watershed_shapely = shapely.wkb.loads(
            watershed_feature.GetGeometryRef().ExportToWkb())
        watershed_prep_geom = shapely.prepared.prep(watershed_shapely)
        total_damage = 0.0
        for infrastructure_index in infrastructure_rtree.intersection(
                watershed_shapely.bounds):
            infrastructure_geom = infrastructure_geometry_list[
                infrastructure_index]['geom']
            if damage_type_map:
                if watershed_prep_geom.intersects(infrastructure_geom):
                    total_damage += (
                        watershed_shapely.intersection(
                            infrastructure_geom).area *
                        infrastructure_geometry_list[infrastructure_index]
                        ['damage'])

        if damage_type_map:
            watershed_feature.SetField('aff_bld', total_damage)
        watershed_layer.SetFeature(watershed_feature)
    watershed_layer.SyncToDisk()
    watershed_layer = None
    watershed_vector = None
Example #6
0
def _build_spatial_index(base_raster_path, local_model_dir,
                         tropical_forest_edge_carbon_model_vector_path,
                         target_spatial_index_pickle_path):
    """Build a kd-tree index.

    Build a kd-tree index of the locally projected globally georeferenced
    carbon edge model parameters.

    Args:
        base_raster_path (string): path to a raster that is used to define the
            bounding box and projection of the local model.
        local_model_dir (string): path to a directory where we can write a
            shapefile of the locally projected global data model grid.
            Function will create a file called 'local_carbon_shape.shp' in
            that location and overwrite one if it exists.
        tropical_forest_edge_carbon_model_vector_path (string): a path to an
            OGR shapefile that has the parameters for the global carbon edge
            model. Each georeferenced feature should have fields 'theta1',
            'theta2', 'theta3', and 'method'
        spatial_index_pickle_path (string): path to the pickle file to store a
            tuple of:
                scipy.spatial.cKDTree (georeferenced locally projected model
                    points)
                theta_model_parameters (parallel Nx3 array of theta parameters)
                method_model_parameter (parallel N array of model numbers (1..3))

    Returns:
        None

    """
    # Reproject the global model into local coordinate system
    carbon_model_reproject_path = os.path.join(local_model_dir,
                                               'local_carbon_shape.shp')
    lulc_projection_wkt = pygeoprocessing.get_raster_info(
        base_raster_path)['projection_wkt']
    pygeoprocessing.reproject_vector(
        tropical_forest_edge_carbon_model_vector_path, lulc_projection_wkt,
        carbon_model_reproject_path)

    model_vector = gdal.OpenEx(carbon_model_reproject_path)
    model_layer = model_vector.GetLayer()

    kd_points = []
    theta_model_parameters = []
    method_model_parameter = []

    # put all the polygons in the kd_tree because it's fast and simple
    for poly_feature in model_layer:
        poly_geom = poly_feature.GetGeometryRef()
        poly_centroid = poly_geom.Centroid()
        # put in row/col order since rasters are row/col indexed
        kd_points.append([poly_centroid.GetY(), poly_centroid.GetX()])

        theta_model_parameters.append([
            poly_feature.GetField(feature_id)
            for feature_id in ['theta1', 'theta2', 'theta3']
        ])
        method_model_parameter.append(poly_feature.GetField('method'))

    method_model_parameter = numpy.array(method_model_parameter,
                                         dtype=numpy.int32)
    theta_model_parameters = numpy.array(theta_model_parameters,
                                         dtype=numpy.float32)

    LOGGER.info('Building kd_tree')
    kd_tree = scipy.spatial.cKDTree(kd_points)
    LOGGER.info('Done building kd_tree with %d points', len(kd_points))

    with open(target_spatial_index_pickle_path, 'wb') as picklefile:
        picklefile.write(
            pickle.dumps(
                (kd_tree, theta_model_parameters, method_model_parameter)))
def execute(args):
    """Crop Production Percentile Model.

    This model will take a landcover (crop cover?) map and produce yields,
    production, and observed crop yields, a nutrient table, and a clipped
    observed map.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_bin_maps/[cropname]_*
              A ValueError is raised if strings don't match.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            bad_crop_name_list.append(crop_name)
    if len(bad_crop_name_list) > 0:
        raise ValueError(
            "The following crop names were provided in %s but no such crops "
            "exist for this model: %s" %
            (args['landcover_to_crop_table_path'], bad_crop_name_list))

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.build_lookup_from_csv(
            climate_percentile_yield_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True)
        yield_percentile_headers = [
            x for x in crop_climate_percentile_table.itervalues().next()
            if x != 'climate_bin'
        ]

        for yield_percentile_id in yield_percentile_headers:
            LOGGER.info("Map %s to climate bins.", yield_percentile_id)
            interpolated_yield_percentile_raster_path = os.path.join(
                output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            bin_to_percentile_yield = dict([
                (bin_id,
                 crop_climate_percentile_table[bin_id][yield_percentile_id])
                for bin_id in crop_climate_percentile_table
            ])
            bin_to_percentile_yield[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_yield_percentile_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield,
                coarse_yield_percentile_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info(
                "Interpolate %s %s yield raster to landcover resolution.",
                crop_name, yield_percentile_id)
            pygeoprocessing.warp_raster(
                coarse_yield_percentile_raster_path,
                landcover_raster_info['pixel_size'],
                interpolated_yield_percentile_raster_path,
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

            LOGGER.info("Calculate yield for %s at %s", crop_name,
                        yield_percentile_id)
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))

            def _crop_production_op(lulc_array, yield_array):
                """Mask in yields that overlap with `crop_lucode`."""
                result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = lulc_array != landcover_nodata
                lulc_mask = lulc_array == crop_lucode
                result[valid_mask] = 0
                result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha)
                return result

            pygeoprocessing.raster_calculator(
                [(args['landcover_raster_path'], 1),
                 (interpolated_yield_percentile_raster_path, 1)],
                _crop_production_op, percentile_crop_production_raster_path,
                gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop, assuming that
        # all the percentile rasters have non-zero production so it's okay to
        # use just one of the percentile rasters
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                percentile_crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))

        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    production_percentile_headers = [
        'production_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for yield_percentile_id in sorted(yield_percentile_headers)
    ]
    nutrient_headers = [
        nutrient_id + '_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for yield_percentile_id in sorted(yield_percentile_headers) +
        ['yield_observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' + 'production_observed,' +
                           ','.join(production_percentile_headers) + ',' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            for yield_percentile_id in sorted(yield_percentile_headers):
                yield_percentile_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                yield_sum = 0.0
                for _, yield_block in pygeoprocessing.iterblocks(
                        yield_percentile_raster_path):
                    yield_sum += numpy.sum(
                        yield_block[_NODATA_YIELD != yield_block])
                production_lookup[yield_percentile_id] = yield_sum
                result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for yield_percentile_id in sorted(yield_percentile_headers):
                    total_nutrient = (nutrient_factor *
                                      production_lookup[yield_percentile_id] *
                                      nutrient_table[crop_name][nutrient_id])
                    result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            # loop over percentiles
            for yield_percentile_id in yield_percentile_headers:
                percentile_crop_production_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                LOGGER.info("Calculating zonal stats for %s  %s", crop_name,
                            yield_percentile_id)
                total_yield_lookup[
                    '%s_%s' % (crop_name, yield_percentile_id)] = (
                        pygeoprocessing.zonal_statistics(
                            (percentile_crop_production_raster_path, 1),
                            target_aggregate_vector_path,
                            str(args['aggregate_polygon_id'])))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for id_index in total_yield_lookup['%s_%s' %
                                                       (crop_name,
                                                        yield_percentile_id)]:
                        total_nutrient_table[nutrient_id][yield_percentile_id][
                            id_index] += (
                                nutrient_factor * total_yield_lookup[
                                    '%s_%s' %
                                    (crop_name,
                                     yield_percentile_id)][id_index]['sum'] *
                                nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')