def test_transform_bounding_box(self):
        """PyGeoprocessing: test bounding box transform."""
        import pygeoprocessing

        vector_extent = [
            440446.6938076447695494, 4800590.4052893081679940,
            606196.6938076447695494, 5087540.4052893081679940
        ]
        expected_extents = [
            -123.76825632966793, 43.350664712678984, -121.63016515055192,
            45.941400531740214
        ]
        # test from UTM 10N to WGS84
        base_ref = osr.SpatialReference()
        base_ref.ImportFromEPSG(26910)

        new_ref = osr.SpatialReference()
        new_ref.ImportFromEPSG(4326)
        actual_extents = pygeoprocessing.transform_bounding_box(
            vector_extent,
            base_ref.ExportToWkt(),
            new_ref.ExportToWkt(),
            edge_samples=11)
        numpy.testing.assert_array_almost_equal(expected_extents,
                                                actual_extents)
Ejemplo n.º 2
0
def check_spatial_overlap(spatial_filepaths_list,
                          different_projections_ok=False):
    """Check that the given spatial files spatially overlap.

    Args:
        spatial_filepaths_list (list): A list of files that can be opened with
            GDAL.  Must be on the local filesystem.
        different_projections_ok=False (bool): Whether it's OK for the input
            spatial files to have different projections.  If ``True``, all
            projections will be converted to WGS84 before overlap is checked.

    Returns:
        A string error message if an error is found.  ``None`` otherwise.

    """
    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)
    wgs84_wkt = wgs84_srs.ExportToWkt()

    bounding_boxes = []
    checked_file_list = []
    for filepath in spatial_filepaths_list:
        try:
            info = pygeoprocessing.get_raster_info(filepath)
        except ValueError:
            info = pygeoprocessing.get_vector_info(filepath)

        if info['projection_wkt'] is None:
            return f'Spatial file {filepath} has no projection'

        if different_projections_ok:
            bounding_box = pygeoprocessing.transform_bounding_box(
                info['bounding_box'], info['projection_wkt'], wgs84_wkt)
        else:
            bounding_box = info['bounding_box']

        if all([numpy.isinf(coord) for coord in bounding_box]):
            LOGGER.warning('Skipping infinite bounding box for file %s',
                           filepath)
            continue

        bounding_boxes.append(bounding_box)
        checked_file_list.append(filepath)

    try:
        pygeoprocessing.merge_bounding_box_list(bounding_boxes, 'intersection')
    except ValueError as error:
        LOGGER.debug(error)
        formatted_lists = ' | '.join([
            a + ': ' + str(b)
            for a, b in zip(checked_file_list, bounding_boxes)
        ])
        message = f"Bounding boxes do not intersect: {formatted_lists}"
        return message
    return None
Ejemplo n.º 3
0
def execute(args):
    """Crop Production Percentile.

    This model will take a landcover (crop cover?) map and produce yields,
    production, and observed crop yields, a nutrient table, and a clipped
    observed map.

    Args:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:

            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_bin_maps/[cropname]_*
              A ValueError is raised if strings don't match.

        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path:

            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)

            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None.

    """
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            bad_crop_name_list.append(crop_name)
    if bad_crop_name_list:
        raise ValueError(
            "The following crop names were provided in %s but no such crops "
            "exist for this model: %s" %
            (args['landcover_to_crop_table_path'], bad_crop_name_list))

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000
    landcover_nodata = landcover_raster_info['nodata'][0]
    if landcover_nodata is None:
        LOGGER.warning("%s does not have nodata value defined; "
                       "assuming all pixel values are valid" %
                       args['landcover_raster_path'])

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection_wkt'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    # Initialize a TaskGraph
    work_token_dir = os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR,
                                  '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
    dependent_task_list = []

    crop_lucode = None
    observed_yield_nodata = None
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        crop_climate_bin_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(crop_climate_bin_raster_path,
                  crop_climate_bin_raster_info['pixel_size'],
                  clipped_climate_bin_raster_path, 'near'),
            kwargs={'target_bb': landcover_wgs84_bounding_box},
            target_path_list=[clipped_climate_bin_raster_path],
            task_name='crop_climate_bin')
        dependent_task_list.append(crop_climate_bin_task)

        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.build_lookup_from_csv(
            climate_percentile_yield_table_path, 'climate_bin', to_lower=True)
        yield_percentile_headers = [
            x for x in list(crop_climate_percentile_table.values())[0]
            if x != 'climate_bin'
        ]

        reclassify_error_details = {
            'raster_name': f'{crop_name} Climate Bin',
            'column_name': 'climate_bin',
            'table_name': f'Climate {crop_name} Percentile Yield'
        }
        for yield_percentile_id in yield_percentile_headers:
            LOGGER.info("Map %s to climate bins.", yield_percentile_id)
            interpolated_yield_percentile_raster_path = os.path.join(
                output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            bin_to_percentile_yield = dict([
                (bin_id,
                 crop_climate_percentile_table[bin_id][yield_percentile_id])
                for bin_id in crop_climate_percentile_table
            ])
            # reclassify nodata to a valid value of 0
            # we're assuming that the crop doesn't exist where there is no data
            # this is more likely than assuming the crop does exist, esp.
            # in the context of the provided climate bins map
            bin_to_percentile_yield[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0
            coarse_yield_percentile_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            create_coarse_yield_percentile_task = task_graph.add_task(
                func=utils.reclassify_raster,
                args=((clipped_climate_bin_raster_path,
                       1), bin_to_percentile_yield,
                      coarse_yield_percentile_raster_path, gdal.GDT_Float32,
                      _NODATA_YIELD, reclassify_error_details),
                target_path_list=[coarse_yield_percentile_raster_path],
                dependent_task_list=[crop_climate_bin_task],
                task_name='create_coarse_yield_percentile_%s_%s' %
                (crop_name, yield_percentile_id))
            dependent_task_list.append(create_coarse_yield_percentile_task)

            LOGGER.info(
                "Interpolate %s %s yield raster to landcover resolution.",
                crop_name, yield_percentile_id)
            create_interpolated_yield_percentile_task = task_graph.add_task(
                func=pygeoprocessing.warp_raster,
                args=(coarse_yield_percentile_raster_path,
                      landcover_raster_info['pixel_size'],
                      interpolated_yield_percentile_raster_path,
                      'cubicspline'),
                kwargs={
                    'target_projection_wkt':
                    landcover_raster_info['projection_wkt'],
                    'target_bb':
                    landcover_raster_info['bounding_box']
                },
                target_path_list=[interpolated_yield_percentile_raster_path],
                dependent_task_list=[create_coarse_yield_percentile_task],
                task_name='create_interpolated_yield_percentile_%s_%s' %
                (crop_name, yield_percentile_id))
            dependent_task_list.append(
                create_interpolated_yield_percentile_task)

            LOGGER.info("Calculate yield for %s at %s", crop_name,
                        yield_percentile_id)
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))

            create_percentile_production_task = task_graph.add_task(
                func=calculate_crop_production,
                args=(args['landcover_raster_path'],
                      interpolated_yield_percentile_raster_path, crop_lucode,
                      pixel_area_ha, percentile_crop_production_raster_path),
                target_path_list=[percentile_crop_production_raster_path],
                dependent_task_list=[
                    create_interpolated_yield_percentile_task
                ],
                task_name='create_percentile_production_%s_%s' %
                (crop_name, yield_percentile_id))
            dependent_task_list.append(create_percentile_production_task)

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))

        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        clip_global_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(global_observed_yield_raster_path,
                  global_observed_yield_raster_info['pixel_size'],
                  clipped_observed_yield_raster_path, 'near'),
            kwargs={'target_bb': landcover_wgs84_bounding_box},
            target_path_list=[clipped_observed_yield_raster_path],
            task_name='clip_global_observed_yield_%s_' % crop_name)
        dependent_task_list.append(clip_global_observed_yield_task)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        nodata_to_zero_for_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (clipped_observed_yield_raster_path, 1),
                (observed_yield_nodata, 'raw')
            ], _zero_observed_yield_op, zeroed_observed_yield_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[zeroed_observed_yield_raster_path],
            dependent_task_list=[clip_global_observed_yield_task],
            task_name='nodata_to_zero_for_observed_yield_%s_' % crop_name)
        dependent_task_list.append(nodata_to_zero_for_observed_yield_task)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        interpolate_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(zeroed_observed_yield_raster_path,
                  landcover_raster_info['pixel_size'],
                  interpolated_observed_yield_raster_path, 'cubicspline'),
            kwargs={
                'target_projection_wkt':
                landcover_raster_info['projection_wkt'],
                'target_bb': landcover_raster_info['bounding_box']
            },
            target_path_list=[interpolated_observed_yield_raster_path],
            dependent_task_list=[nodata_to_zero_for_observed_yield_task],
            task_name='interpolate_observed_yield_to_lulc_%s' % crop_name)
        dependent_task_list.append(interpolate_observed_yield_task)

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        calculate_observed_production_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([(args['landcover_raster_path'], 1),
                   (interpolated_observed_yield_raster_path, 1),
                   (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')
                   ], _mask_observed_yield_op, observed_production_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[observed_production_raster_path],
            dependent_task_list=[interpolate_observed_yield_task],
            task_name='calculate_observed_production_%s' % crop_name)
        dependent_task_list.append(calculate_observed_production_task)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)

    tabulate_results_task = task_graph.add_task(
        func=tabulate_results,
        args=(nutrient_table, yield_percentile_headers,
              crop_to_landcover_table, pixel_area_ha,
              args['landcover_raster_path'], landcover_nodata, output_dir,
              file_suffix, result_table_path),
        target_path_list=[result_table_path],
        dependent_task_list=dependent_task_list,
        task_name='tabulate_results')

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] not in ['', None]):
        LOGGER.info("aggregating result over query polygon")
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        aggregate_results_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        aggregate_results_task = task_graph.add_task(
            func=aggregate_to_polygons,
            args=(args['aggregate_polygon_path'], target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
                  crop_to_landcover_table, nutrient_table,
                  yield_percentile_headers, output_dir, file_suffix,
                  aggregate_results_table_path),
            target_path_list=[
                target_aggregate_vector_path, aggregate_results_table_path
            ],
            dependent_task_list=dependent_task_list,
            task_name='aggregate_results_to_polygons')

    task_graph.close()
    task_graph.join()
def execute(args):
    """Crop Production Regression Model.

    This model will take a landcover (crop cover?), N, P, and K map and
    produce modeled yields, and a nutrient table.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_regression_yield_tables/[cropname]_*
              A ValueError is raised if strings don't match.
        args['fertilization_rate_table_path'] (string): path to CSV table
            that contains fertilization rates for the crops in the simulation,
            though it can contain additional crops not used in the simulation.
            The headers must be 'crop_name', 'nitrogen_rate',
            'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the
            name string used to identify crops in the
            'landcover_to_crop_table_path', and rates are in units kg/Ha.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    LOGGER.info(
        "Calculating total land area and warning if the landcover raster "
        "is missing lucodes")
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
        args['fertilization_rate_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)

    crop_lucodes = [
        x[_EXPECTED_LUCODE_TABLE_HEADER]
        for x in crop_to_landcover_table.itervalues()
    ]

    unique_lucodes = numpy.array([])
    total_area = 0.0
    for _, lu_band_data in pygeoprocessing.iterblocks(
            args['landcover_raster_path']):
        unique_block = numpy.unique(lu_band_data)
        unique_lucodes = numpy.unique(
            numpy.concatenate((unique_lucodes, unique_block)))
        total_area += numpy.count_nonzero((lu_band_data != _NODATA_YIELD))

    missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes))
    if len(missing_lucodes) > 0:
        LOGGER.warn(
            "The following lucodes are in the landcover to crop table but "
            "aren't in the landcover raster: %s", missing_lucodes)

    LOGGER.info("Checking that crops correspond to known types.")
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            raise ValueError(
                "Expected climate bin map called %s for crop %s "
                "specified in %s", crop_climate_bin_raster_path, crop_name,
                args['landcover_to_crop_table_path'])

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        crop_regression_table_path = os.path.join(
            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

        crop_regression_table = utils.build_lookup_from_csv(
            crop_regression_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True,
            warn_if_missing=False)
        for bin_id in crop_regression_table:
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
                if crop_regression_table[bin_id][header.lower()] == '':
                    crop_regression_table[bin_id][header.lower()] = 0.0

        yield_regression_headers = [
            x for x in crop_regression_table.itervalues().next()
            if x != 'climate_bin'
        ]

        clipped_climate_bin_raster_path_info = (
            pygeoprocessing.get_raster_info(clipped_climate_bin_raster_path))

        regression_parameter_raster_path_lookup = {}
        for yield_regression_id in yield_regression_headers:
            # there are extra headers in that table
            if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS:
                continue
            LOGGER.info("Map %s to climate bins.", yield_regression_id)
            regression_parameter_raster_path_lookup[yield_regression_id] = (
                os.path.join(
                    output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN %
                    (crop_name, yield_regression_id, file_suffix)))
            bin_to_regression_value = dict([
                (bin_id, crop_regression_table[bin_id][yield_regression_id])
                for bin_id in crop_regression_table
            ])
            bin_to_regression_value[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_regression_parameter_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN %
                (crop_name, yield_regression_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_regression_value,
                coarse_regression_parameter_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info("Interpolate %s %s parameter to landcover resolution.",
                        crop_name, yield_regression_id)
            pygeoprocessing.warp_raster(
                coarse_regression_parameter_raster_path,
                landcover_raster_info['pixel_size'],
                regression_parameter_raster_path_lookup[yield_regression_id],
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

        # the regression model has identical mathematical equations for
        # the nitrogen, phosporous, and potassium.  The only difference is
        # the scalars in the equation.  So making a closure below to simplify
        # this coding so I don't repeat the same function 3 times for 3
        # almost identical raster_calculator calls.
        def _x_yield_op_gen(fert_rate):
            """Create a raster calc op given the fertlization rate."""
            def _x_yield_op(y_max, b_x, c_x, lulc_array):
                """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC))"""
                result = numpy.empty(b_x.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = ((b_x != _NODATA_YIELD) & (c_x != _NODATA_YIELD) &
                              (lulc_array == crop_lucode))
                result[valid_mask] = y_max[valid_mask] * (
                    1 - b_x[valid_mask] *
                    numpy.exp(-c_x[valid_mask] * fert_rate) * pixel_area_ha)
                return result

            return _x_yield_op

        LOGGER.info('Calc nitrogen yield')
        nitrogen_yield_raster_path = os.path.join(
            output_dir,
            _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_n'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['nitrogen_rate']),
            nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc phosphorous yield')
        phosphorous_yield_raster_path = os.path.join(
            output_dir,
            _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_nut'], 1),
             (regression_parameter_raster_path_lookup['c_p2o5'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(crop_to_fertlization_rate_table[crop_name]
                            ['phosphorous_rate']),
            phosphorous_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc potassium yield')
        potassium_yield_raster_path = os.path.join(
            output_dir,
            _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.raster_calculator(
            [(regression_parameter_raster_path_lookup['yield_ceiling'], 1),
             (regression_parameter_raster_path_lookup['b_k2o'], 1),
             (regression_parameter_raster_path_lookup['c_k2o'], 1),
             (args['landcover_raster_path'], 1)],
            _x_yield_op_gen(
                crop_to_fertlization_rate_table[crop_name]['potassium_rate']),
            potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD)

        LOGGER.info('Calc the min of N, K, and P')
        crop_production_raster_path = os.path.join(
            output_dir,
            _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        def _min_op(y_n, y_p, y_k):
            """Calculate the min of the three inputs and multiply by Ymax."""
            result = numpy.empty(y_n.shape, dtype=numpy.float32)
            result[:] = _NODATA_YIELD
            valid_mask = ((y_n != _NODATA_YIELD) & (y_k != _NODATA_YIELD) &
                          (y_p != _NODATA_YIELD))
            result[valid_mask] = (numpy.min(
                [y_n[valid_mask], y_k[valid_mask], y_p[valid_mask]], axis=0))
            return result

        pygeoprocessing.raster_calculator([(nitrogen_yield_raster_path, 1),
                                           (phosphorous_yield_raster_path, 1),
                                           (potassium_yield_raster_path, 1)],
                                          _min_op, crop_production_raster_path,
                                          gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))
        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    nutrient_headers = [
        nutrient_id + '_' + mode
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for mode in ['modeled', 'observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' +
                           'production_observed,production_modeled,' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            yield_sum = 0.0
            for _, yield_block in pygeoprocessing.iterblocks(
                    crop_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[_NODATA_YIELD != yield_block])
            production_lookup['modeled'] = yield_sum
            result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                total_nutrient = (nutrient_factor *
                                  production_lookup['modeled'] *
                                  nutrient_table[crop_name][nutrient_id])
                result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            LOGGER.info("Calculating zonal stats for %s", crop_name)
            crop_production_raster_path = os.path.join(
                output_dir,
                _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_modeled' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (crop_production_raster_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_modeled' % crop_name]:
                    total_nutrient_table[nutrient_id]['modeled'][id_index] += (
                        nutrient_factor *
                        total_yield_lookup['%s_modeled' %
                                           crop_name][id_index]['sum'] *
                        nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')
def execute(args):
    """Crop Production Percentile Model.

    This model will take a landcover (crop cover?) map and produce yields,
    production, and observed crop yields, a nutrient table, and a clipped
    observed map.

    Parameters:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:
            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_bin_maps/[cropname]_*
              A ValueError is raised if strings don't match.
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['aggregate_polygon_id'] (string): This is the id field in
            args['aggregate_polygon_path'] to be used to index the final
            aggregate results.  If args['aggregate_polygon_path'] is not
            provided, this value is ignored.
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path
            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.
    """
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'],
        'crop_name',
        to_lower=True,
        numerical_cast=True)
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            bad_crop_name_list.append(crop_name)
    if len(bad_crop_name_list) > 0:
        raise ValueError(
            "The following crop names were provided in %s but no such crops "
            "exist for this model: %s" %
            (args['landcover_to_crop_table_path'], bad_crop_name_list))

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None
    production_area = collections.defaultdict(float)
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        pygeoprocessing.warp_raster(crop_climate_bin_raster_path,
                                    crop_climate_bin_raster_info['pixel_size'],
                                    clipped_climate_bin_raster_path,
                                    'nearest',
                                    target_bb=landcover_wgs84_bounding_box)

        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.build_lookup_from_csv(
            climate_percentile_yield_table_path,
            'climate_bin',
            to_lower=True,
            numerical_cast=True)
        yield_percentile_headers = [
            x for x in crop_climate_percentile_table.itervalues().next()
            if x != 'climate_bin'
        ]

        for yield_percentile_id in yield_percentile_headers:
            LOGGER.info("Map %s to climate bins.", yield_percentile_id)
            interpolated_yield_percentile_raster_path = os.path.join(
                output_dir, _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            bin_to_percentile_yield = dict([
                (bin_id,
                 crop_climate_percentile_table[bin_id][yield_percentile_id])
                for bin_id in crop_climate_percentile_table
            ])
            bin_to_percentile_yield[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_yield_percentile_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_PERCENTILE_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))
            pygeoprocessing.reclassify_raster(
                (clipped_climate_bin_raster_path, 1), bin_to_percentile_yield,
                coarse_yield_percentile_raster_path, gdal.GDT_Float32,
                _NODATA_YIELD)

            LOGGER.info(
                "Interpolate %s %s yield raster to landcover resolution.",
                crop_name, yield_percentile_id)
            pygeoprocessing.warp_raster(
                coarse_yield_percentile_raster_path,
                landcover_raster_info['pixel_size'],
                interpolated_yield_percentile_raster_path,
                'cubic_spline',
                target_sr_wkt=landcover_raster_info['projection'],
                target_bb=landcover_raster_info['bounding_box'])

            LOGGER.info("Calculate yield for %s at %s", crop_name,
                        yield_percentile_id)
            percentile_crop_production_raster_path = os.path.join(
                output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                (crop_name, yield_percentile_id, file_suffix))

            def _crop_production_op(lulc_array, yield_array):
                """Mask in yields that overlap with `crop_lucode`."""
                result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
                result[:] = _NODATA_YIELD
                valid_mask = lulc_array != landcover_nodata
                lulc_mask = lulc_array == crop_lucode
                result[valid_mask] = 0
                result[lulc_mask] = (yield_array[lulc_mask] * pixel_area_ha)
                return result

            pygeoprocessing.raster_calculator(
                [(args['landcover_raster_path'], 1),
                 (interpolated_yield_percentile_raster_path, 1)],
                _crop_production_op, percentile_crop_production_raster_path,
                gdal.GDT_Float32, _NODATA_YIELD)

        # calculate the non-zero production area for that crop, assuming that
        # all the percentile rasters have non-zero production so it's okay to
        # use just one of the percentile rasters
        LOGGER.info("Calculating production area.")
        for _, band_values in pygeoprocessing.iterblocks(
                percentile_crop_production_raster_path):
            production_area[crop_name] += numpy.count_nonzero(
                (band_values != _NODATA_YIELD) & (band_values > 0.0))
        production_area[crop_name] *= pixel_area_ha

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))

        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        pygeoprocessing.warp_raster(
            global_observed_yield_raster_path,
            global_observed_yield_raster_info['pixel_size'],
            clipped_observed_yield_raster_path,
            'nearest',
            target_bb=landcover_wgs84_bounding_box)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        def _zero_observed_yield_op(observed_yield_array):
            """Calculate observed 'actual' yield."""
            result = numpy.empty(observed_yield_array.shape,
                                 dtype=numpy.float32)
            result[:] = 0.0
            valid_mask = observed_yield_array != observed_yield_nodata
            result[valid_mask] = observed_yield_array[valid_mask]
            return result

        pygeoprocessing.raster_calculator(
            [(clipped_observed_yield_raster_path, 1)], _zero_observed_yield_op,
            zeroed_observed_yield_raster_path, gdal.GDT_Float32,
            observed_yield_nodata)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        pygeoprocessing.warp_raster(
            zeroed_observed_yield_raster_path,
            landcover_raster_info['pixel_size'],
            interpolated_observed_yield_raster_path,
            'cubic_spline',
            target_sr_wkt=landcover_raster_info['projection'],
            target_bb=landcover_raster_info['bounding_box'])

        def _mask_observed_yield(lulc_array, observed_yield_array):
            """Mask total observed yield to crop lulc type."""
            result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
            result[:] = observed_yield_nodata
            valid_mask = lulc_array != landcover_nodata
            lulc_mask = lulc_array == crop_lucode
            result[valid_mask] = 0
            result[lulc_mask] = (observed_yield_array[lulc_mask] *
                                 pixel_area_ha)
            return result

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        pygeoprocessing.raster_calculator(
            [(args['landcover_raster_path'], 1),
             (interpolated_observed_yield_raster_path, 1)],
            _mask_observed_yield, observed_production_raster_path,
            gdal.GDT_Float32, observed_yield_nodata)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    production_percentile_headers = [
        'production_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for yield_percentile_id in sorted(yield_percentile_headers)
    ]
    nutrient_headers = [
        nutrient_id + '_' +
        re.match(_YIELD_PERCENTILE_FIELD_PATTERN, yield_percentile_id).group(1)
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for yield_percentile_id in sorted(yield_percentile_headers) +
        ['yield_observed']
    ]
    with open(result_table_path, 'wb') as result_table:
        result_table.write('crop,area (ha),' + 'production_observed,' +
                           ','.join(production_percentile_headers) + ',' +
                           ','.join(nutrient_headers) + '\n')
        for crop_name in sorted(crop_to_landcover_table):
            result_table.write(crop_name)
            result_table.write(',%f' % production_area[crop_name])
            production_lookup = {}
            yield_sum = 0.0
            observed_production_raster_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            observed_yield_nodata = pygeoprocessing.get_raster_info(
                observed_production_raster_path)['nodata'][0]
            for _, yield_block in pygeoprocessing.iterblocks(
                    observed_production_raster_path):
                yield_sum += numpy.sum(
                    yield_block[observed_yield_nodata != yield_block])
            production_lookup['observed'] = yield_sum
            result_table.write(",%f" % yield_sum)

            for yield_percentile_id in sorted(yield_percentile_headers):
                yield_percentile_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                yield_sum = 0.0
                for _, yield_block in pygeoprocessing.iterblocks(
                        yield_percentile_raster_path):
                    yield_sum += numpy.sum(
                        yield_block[_NODATA_YIELD != yield_block])
                production_lookup[yield_percentile_id] = yield_sum
                result_table.write(",%f" % yield_sum)

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for yield_percentile_id in sorted(yield_percentile_headers):
                    total_nutrient = (nutrient_factor *
                                      production_lookup[yield_percentile_id] *
                                      nutrient_table[crop_name][nutrient_id])
                    result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (nutrient_factor * production_lookup['observed'] *
                             nutrient_table[crop_name][nutrient_id]))
            result_table.write('\n')

        total_area = 0.0
        for _, band_values in pygeoprocessing.iterblocks(
                args['landcover_raster_path']):
            total_area += numpy.count_nonzero(
                (band_values != landcover_nodata))
        result_table.write('\n,total area (both crop and non-crop)\n,%f\n' %
                           (total_area * pixel_area_ha))

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] is not None):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        pygeoprocessing.reproject_vector(args['aggregate_polygon_path'],
                                         landcover_raster_info['projection'],
                                         target_aggregate_vector_path,
                                         layer_index=0,
                                         driver_name='ESRI Shapefile')

        # loop over every crop and query with pgp function
        total_yield_lookup = {}
        total_nutrient_table = collections.defaultdict(
            lambda: collections.defaultdict(lambda: collections.defaultdict(
                float)))
        for crop_name in crop_to_landcover_table:
            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
                1.0 - nutrient_table[crop_name]['Percentrefuse'] / 100.0)
            # loop over percentiles
            for yield_percentile_id in yield_percentile_headers:
                percentile_crop_production_raster_path = os.path.join(
                    output_dir, _PERCENTILE_CROP_PRODUCTION_FILE_PATTERN %
                    (crop_name, yield_percentile_id, file_suffix))
                LOGGER.info("Calculating zonal stats for %s  %s", crop_name,
                            yield_percentile_id)
                total_yield_lookup[
                    '%s_%s' % (crop_name, yield_percentile_id)] = (
                        pygeoprocessing.zonal_statistics(
                            (percentile_crop_production_raster_path, 1),
                            target_aggregate_vector_path,
                            str(args['aggregate_polygon_id'])))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for id_index in total_yield_lookup['%s_%s' %
                                                       (crop_name,
                                                        yield_percentile_id)]:
                        total_nutrient_table[nutrient_id][yield_percentile_id][
                            id_index] += (
                                nutrient_factor * total_yield_lookup[
                                    '%s_%s' %
                                    (crop_name,
                                     yield_percentile_id)][id_index]['sum'] *
                                nutrient_table[crop_name][nutrient_id])

            # process observed
            observed_yield_path = os.path.join(
                output_dir,
                _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
            total_yield_lookup['%s_observed' %
                               crop_name] = (pygeoprocessing.zonal_statistics(
                                   (observed_yield_path, 1),
                                   target_aggregate_vector_path,
                                   str(args['aggregate_polygon_id'])))
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for id_index in total_yield_lookup['%s_observed' % crop_name]:
                    total_nutrient_table[nutrient_id]['observed'][
                        id_index] += (
                            nutrient_factor *
                            total_yield_lookup['%s_observed' %
                                               crop_name][id_index]['sum'] *
                            nutrient_table[crop_name][nutrient_id])

        # use that result to calculate nutrient totals

        # report everything to a table
        aggregate_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        with open(aggregate_table_path, 'wb') as aggregate_table:
            # write header
            aggregate_table.write('%s,' % args['aggregate_polygon_id'])
            aggregate_table.write(','.join(sorted(total_yield_lookup)) + ',')
            aggregate_table.write(','.join([
                '%s_%s' % (nutrient_id, model_type)
                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS for
                model_type in sorted(total_nutrient_table.itervalues().next())
            ]))
            aggregate_table.write('\n')

            # iterate by polygon index
            for id_index in total_yield_lookup.itervalues().next():
                aggregate_table.write('%s,' % id_index)
                aggregate_table.write(','.join([
                    str(total_yield_lookup[yield_header][id_index]['sum'])
                    for yield_header in sorted(total_yield_lookup)
                ]))

                for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                    for model_type in sorted(
                            total_nutrient_table.itervalues().next()):
                        aggregate_table.write(',%s' %
                                              total_nutrient_table[nutrient_id]
                                              [model_type][id_index])
                aggregate_table.write('\n')
Ejemplo n.º 6
0
def do_inference_worker(model, quad_offset_queue, quad_file_path_queue,
                        inference_lock):
    """Calculate inference on data coming in on the URI_TO_PROCESS_LIST.

    Other notable global variable is QUAD_AVAILBLE_EVENT that's an event for
    waiting for new work that gets set when new works is recieved.

    Args:
        model (keras model): model used for bounding box prediction
        quad_offset_queue (queue): send to queue for quad processing
        quad_file_path_queue (queue): used for recieving quads that need to be
            inferenced.
        inference_lock (threading.Lock): used to ensure one shot of inference
            goes at a time.

    Returns:
        never
    """
    global HEALTHY
    try:
        wgs84_srs = osr.SpatialReference()
        wgs84_srs.ImportFromEPSG(4326)
        subprocess_result = None
        while True:
            QUAD_AVAILBLE_EVENT.wait(5.0)
            if not URI_TO_PROCESS_LIST:
                continue
            start_time = time.time()
            quad_uri = URI_TO_PROCESS_LIST.pop()
            QUAD_URI_TO_STATUS_MAP[quad_uri] = 'processing'
            quad_raster_path = os.path.join(WORKSPACE_DIR,
                                            os.path.basename(quad_uri))
            LOGGER.info('download ' + quad_uri + ' to ' + quad_raster_path)
            subprocess_result = subprocess.run(
                '/usr/local/gcloud-sdk/google-cloud-sdk/bin/gsutil cp '
                '"%s" %s' % (quad_uri, quad_raster_path),
                check=True,
                shell=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            quad_info = pygeoprocessing.get_raster_info(quad_raster_path)
            n_cols, n_rows = quad_info['raster_size']
            quad_id = os.path.basename(os.path.splitext(quad_raster_path)[0])
            quad_slice_index = 0
            non_max_supression_box_list = []

            LOGGER.info('schedule clip of %s', quad_id)
            for xoff in range(0, n_cols, TRAINING_IMAGE_DIMS[0]):
                win_xsize = TRAINING_IMAGE_DIMS[0]
                if xoff + win_xsize >= n_cols:
                    xoff = n_cols - win_xsize - 1
                for yoff in range(0, n_rows, TRAINING_IMAGE_DIMS[1]):
                    win_ysize = TRAINING_IMAGE_DIMS[1]
                    if yoff + win_ysize >= n_rows:
                        yoff = n_rows - win_ysize - 1
                    quad_png_path = os.path.join(
                        WORKSPACE_DIR,
                        '%s_%d.png' % (quad_id, quad_slice_index))
                    quad_slice_index += 1
                    quad_offset_queue.put((quad_png_path, quad_raster_path,
                                           xoff, yoff, win_xsize, win_ysize))

            LOGGER.info('schedule inference of %s', quad_id)
            box_score_tuple_list = []
            with inference_lock:
                while quad_slice_index > 0:
                    quad_slice_index -= 1
                    xoff, yoff, scale, image = quad_file_path_queue.get()
                    result = model.predict_on_batch(image)
                    # correct boxes for image scale
                    boxes, scores, labels = result
                    boxes /= scale

                    # convert box to a list from a numpy array and score to a
                    # value from a single element array
                    box_score_tuple_list.extend([([
                        box[0] + xoff, box[1] + yoff, box[2] + xoff,
                        box[3] + yoff
                    ], score) for box, score in zip(boxes[0], scores[0])
                                                 if score > 0.3])

            # quad is now processed, it can be removed
            os.remove(quad_raster_path)
            while box_score_tuple_list:
                box, score = box_score_tuple_list.pop()
                shapely_box = shapely.geometry.box(*box)
                keep = True
                # this list makes a copy
                for test_box, test_score in list(box_score_tuple_list):
                    shapely_test_box = shapely.geometry.box(*test_box)
                    if shapely_test_box.intersects(shapely_box):
                        if test_score > score:
                            # keep the new one
                            keep = False
                            break
                if keep:
                    non_max_supression_box_list.append(box)

            #quad_png_path = '%s.png' % os.path.splitext(quad_raster_path)[0]
            # make_quad_png(
            #     quad_raster_path, quad_png_path, 0, 0, None, None)
            # render_bounding_boxes(non_max_supression_box_list, quad_png_path)
            lat_lng_bb_list = []
            for bounding_box in non_max_supression_box_list:
                local_coord_bb = []
                for offset in [0, 2]:
                    coords = list(
                        gdal.ApplyGeoTransform(quad_info['geotransform'],
                                               bounding_box[0 + offset],
                                               bounding_box[1 + offset]))
                    local_coord_bb.extend(coords)
                transformed_bb = pygeoprocessing.transform_bounding_box(
                    local_coord_bb, quad_info['projection_wkt'],
                    wgs84_srs.ExportToWkt())
                lat_lng_bb_list.append(transformed_bb)
            QUAD_URI_TO_STATUS_MAP[quad_uri] = lat_lng_bb_list
            LOGGER.info('done processing quad %s took %ss', quad_raster_path,
                        str(time.time() - start_time))
            if len(URI_TO_PROCESS_LIST) == 0:
                QUAD_AVAILBLE_EVENT.clear()
    except Exception:
        LOGGER.exception('error occured on inference worker')
        if subprocess_result:
            LOGGER.error(subprocess_result)
        QUAD_URI_TO_STATUS_MAP[quad_uri] = 'error'
        HEALTHY = False
        raise
def execute(args):
    """Crop Production Regression.

    This model will take a landcover (crop cover?), N, P, and K map and
    produce modeled yields, and a nutrient table.

    Args:
        args['workspace_dir'] (string): output directory for intermediate,
            temporary, and final files
        args['results_suffix'] (string): (optional) string to append to any
            output file names
        args['landcover_raster_path'] (string): path to landcover raster
        args['landcover_to_crop_table_path'] (string): path to a table that
            converts landcover types to crop names that has two headers:

            * lucode: integer value corresponding to a landcover code in
              `args['landcover_raster_path']`.
            * crop_name: a string that must match one of the crops in
              args['model_data_path']/climate_regression_yield_tables/[cropname]_*
              A ValueError is raised if strings don't match.

        args['fertilization_rate_table_path'] (string): path to CSV table
            that contains fertilization rates for the crops in the simulation,
            though it can contain additional crops not used in the simulation.
            The headers must be 'crop_name', 'nitrogen_rate',
            'phosphorous_rate', and 'potassium_rate', where 'crop_name' is the
            name string used to identify crops in the
            'landcover_to_crop_table_path', and rates are in units kg/Ha.
        args['aggregate_polygon_path'] (string): path to polygon vector
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
        args['model_data_path'] (string): path to the InVEST Crop Production
            global data directory.  This model expects that the following
            directories are subdirectories of this path:

            * climate_bin_maps (contains [cropname]_climate_bin.tif files)
            * climate_percentile_yield (contains
              [cropname]_percentile_yield_table.csv files)
              
            Please see the InVEST user's guide chapter on crop production for
            details about how to download these data.

    Returns:
        None.

    """
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir,
         os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    # Initialize a TaskGraph
    work_token_dir = os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR,
                                  '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
    dependent_task_list = []

    LOGGER.info("Checking if the landcover raster is missing lucodes")
    crop_to_landcover_table = utils.build_lookup_from_csv(
        args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)

    crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
        args['fertilization_rate_table_path'], 'crop_name', to_lower=True)

    crop_lucodes = [
        x[_EXPECTED_LUCODE_TABLE_HEADER]
        for x in crop_to_landcover_table.values()
    ]

    unique_lucodes = numpy.array([])
    for _, lu_band_data in pygeoprocessing.iterblocks(
        (args['landcover_raster_path'], 1)):
        unique_block = numpy.unique(lu_band_data)
        unique_lucodes = numpy.unique(
            numpy.concatenate((unique_lucodes, unique_block)))

    missing_lucodes = set(crop_lucodes).difference(set(unique_lucodes))
    if len(missing_lucodes) > 0:
        LOGGER.warning(
            "The following lucodes are in the landcover to crop table but "
            "aren't in the landcover raster: %s", missing_lucodes)

    LOGGER.info("Checking that crops correspond to known types.")
    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
        if not os.path.exists(crop_climate_bin_raster_path):
            raise ValueError(
                "Expected climate bin map called %s for crop %s "
                "specified in %s", crop_climate_bin_raster_path, crop_name,
                args['landcover_to_crop_table_path'])

    landcover_raster_info = pygeoprocessing.get_raster_info(
        args['landcover_raster_path'])
    pixel_area_ha = numpy.product(
        [abs(x) for x in landcover_raster_info['pixel_size']]) / 10000.0
    landcover_nodata = landcover_raster_info['nodata'][0]
    if landcover_nodata is None:
        LOGGER.warning("%s does not have nodata value defined; "
                       "assuming all pixel values are valid" %
                       args['landcover_raster_path'])

    # Calculate lat/lng bounding box for landcover map
    wgs84srs = osr.SpatialReference()
    wgs84srs.ImportFromEPSG(4326)  # EPSG4326 is WGS84 lat/lng
    landcover_wgs84_bounding_box = pygeoprocessing.transform_bounding_box(
        landcover_raster_info['bounding_box'],
        landcover_raster_info['projection_wkt'],
        wgs84srs.ExportToWkt(),
        edge_samples=11)

    crop_lucode = None
    observed_yield_nodata = None

    for crop_name in crop_to_landcover_table:
        crop_lucode = crop_to_landcover_table[crop_name][
            _EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)

        LOGGER.info(
            "Clipping global climate bin raster to landcover bounding box.")
        clipped_climate_bin_raster_path = os.path.join(
            output_dir,
            _CLIPPED_CLIMATE_BIN_FILE_PATTERN % (crop_name, file_suffix))
        crop_climate_bin_raster_info = pygeoprocessing.get_raster_info(
            crop_climate_bin_raster_path)
        crop_climate_bin_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(crop_climate_bin_raster_path,
                  crop_climate_bin_raster_info['pixel_size'],
                  clipped_climate_bin_raster_path, 'near'),
            kwargs={'target_bb': landcover_wgs84_bounding_box},
            target_path_list=[clipped_climate_bin_raster_path],
            task_name='crop_climate_bin')
        dependent_task_list.append(crop_climate_bin_task)

        crop_regression_table_path = os.path.join(
            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

        crop_regression_table = utils.build_lookup_from_csv(
            crop_regression_table_path, 'climate_bin', to_lower=True)
        for bin_id in crop_regression_table:
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
                if crop_regression_table[bin_id][header.lower()] == '':
                    crop_regression_table[bin_id][header.lower()] = 0.0

        yield_regression_headers = [
            x for x in list(crop_regression_table.values())[0]
            if x != 'climate_bin'
        ]

        reclassify_error_details = {
            'raster_name': f'{crop_name} Climate Bin',
            'column_name': 'climate_bin',
            'table_name': f'Climate {crop_name} Regression Yield'
        }
        regression_parameter_raster_path_lookup = {}
        for yield_regression_id in yield_regression_headers:
            # there are extra headers in that table
            if yield_regression_id not in _EXPECTED_REGRESSION_TABLE_HEADERS:
                continue
            LOGGER.info("Map %s to climate bins.", yield_regression_id)
            regression_parameter_raster_path_lookup[yield_regression_id] = (
                os.path.join(
                    output_dir, _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN %
                    (crop_name, yield_regression_id, file_suffix)))
            bin_to_regression_value = dict([
                (bin_id, crop_regression_table[bin_id][yield_regression_id])
                for bin_id in crop_regression_table
            ])
            bin_to_regression_value[crop_climate_bin_raster_info['nodata']
                                    [0]] = 0.0
            coarse_regression_parameter_raster_path = os.path.join(
                output_dir, _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN %
                (crop_name, yield_regression_id, file_suffix))
            create_coarse_regression_parameter_task = task_graph.add_task(
                func=utils.reclassify_raster,
                args=((clipped_climate_bin_raster_path,
                       1), bin_to_regression_value,
                      coarse_regression_parameter_raster_path,
                      gdal.GDT_Float32, _NODATA_YIELD,
                      reclassify_error_details),
                target_path_list=[coarse_regression_parameter_raster_path],
                dependent_task_list=[crop_climate_bin_task],
                task_name='create_coarse_regression_parameter_%s_%s' %
                (crop_name, yield_regression_id))
            dependent_task_list.append(create_coarse_regression_parameter_task)

            LOGGER.info("Interpolate %s %s parameter to landcover resolution.",
                        crop_name, yield_regression_id)
            create_interpolated_parameter_task = task_graph.add_task(
                func=pygeoprocessing.warp_raster,
                args=(coarse_regression_parameter_raster_path,
                      landcover_raster_info['pixel_size'],
                      regression_parameter_raster_path_lookup[
                          yield_regression_id], 'cubicspline'),
                kwargs={
                    'target_projection_wkt':
                    landcover_raster_info['projection_wkt'],
                    'target_bb':
                    landcover_raster_info['bounding_box']
                },
                target_path_list=[
                    regression_parameter_raster_path_lookup[
                        yield_regression_id]
                ],
                dependent_task_list=[create_coarse_regression_parameter_task],
                task_name='create_interpolated_parameter_%s_%s' %
                (crop_name, yield_regression_id))
            dependent_task_list.append(create_interpolated_parameter_task)

        LOGGER.info('Calc nitrogen yield')
        nitrogen_yield_raster_path = os.path.join(
            output_dir,
            _NITROGEN_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        calc_nitrogen_yield_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (regression_parameter_raster_path_lookup['yield_ceiling'], 1),
                (regression_parameter_raster_path_lookup['b_nut'], 1),
                (regression_parameter_raster_path_lookup['c_n'], 1),
                (args['landcover_raster_path'], 1),
                (crop_to_fertlization_rate_table[crop_name]['nitrogen_rate'],
                 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw')
            ], _x_yield_op, nitrogen_yield_raster_path, gdal.GDT_Float32,
                  _NODATA_YIELD),
            target_path_list=[nitrogen_yield_raster_path],
            dependent_task_list=dependent_task_list,
            task_name='calculate_nitrogen_yield_%s' % crop_name)

        LOGGER.info('Calc phosphorous yield')
        phosphorous_yield_raster_path = os.path.join(
            output_dir,
            _PHOSPHOROUS_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        calc_phosphorous_yield_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (regression_parameter_raster_path_lookup['yield_ceiling'], 1),
                (regression_parameter_raster_path_lookup['b_nut'], 1),
                (regression_parameter_raster_path_lookup['c_p2o5'], 1),
                (args['landcover_raster_path'], 1),
                (crop_to_fertlization_rate_table[crop_name]
                 ['phosphorous_rate'], 'raw'), (crop_lucode, 'raw'),
                (pixel_area_ha, 'raw')
            ], _x_yield_op, phosphorous_yield_raster_path, gdal.GDT_Float32,
                  _NODATA_YIELD),
            target_path_list=[phosphorous_yield_raster_path],
            dependent_task_list=dependent_task_list,
            task_name='calculate_phosphorous_yield_%s' % crop_name)

        LOGGER.info('Calc potassium yield')
        potassium_yield_raster_path = os.path.join(
            output_dir,
            _POTASSIUM_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        calc_potassium_yield_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (regression_parameter_raster_path_lookup['yield_ceiling'], 1),
                (regression_parameter_raster_path_lookup['b_k2o'], 1),
                (regression_parameter_raster_path_lookup['c_k2o'], 1),
                (args['landcover_raster_path'], 1),
                (crop_to_fertlization_rate_table[crop_name]['potassium_rate'],
                 'raw'), (crop_lucode, 'raw'), (pixel_area_ha, 'raw')
            ], _x_yield_op, potassium_yield_raster_path, gdal.GDT_Float32,
                  _NODATA_YIELD),
            target_path_list=[potassium_yield_raster_path],
            dependent_task_list=dependent_task_list,
            task_name='calculate_potassium_yield_%s' % crop_name)

        dependent_task_list.extend(
            (calc_nitrogen_yield_task, calc_phosphorous_yield_task,
             calc_potassium_yield_task))

        LOGGER.info('Calc the min of N, K, and P')
        crop_production_raster_path = os.path.join(
            output_dir,
            _CROP_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))

        calc_min_NKP_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([(nitrogen_yield_raster_path, 1),
                   (phosphorous_yield_raster_path, 1),
                   (potassium_yield_raster_path, 1)
                   ], _min_op, crop_production_raster_path, gdal.GDT_Float32,
                  _NODATA_YIELD),
            target_path_list=[crop_production_raster_path],
            dependent_task_list=dependent_task_list,
            task_name='calc_min_of_NKP')
        dependent_task_list.append(calc_min_NKP_task)

        LOGGER.info("Calculate observed yield for %s", crop_name)
        global_observed_yield_raster_path = os.path.join(
            args['model_data_path'],
            _GLOBAL_OBSERVED_YIELD_FILE_PATTERN % crop_name)
        global_observed_yield_raster_info = (
            pygeoprocessing.get_raster_info(global_observed_yield_raster_path))
        clipped_observed_yield_raster_path = os.path.join(
            output_dir,
            _CLIPPED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))
        clip_global_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(global_observed_yield_raster_path,
                  global_observed_yield_raster_info['pixel_size'],
                  clipped_observed_yield_raster_path, 'near'),
            kwargs={'target_bb': landcover_wgs84_bounding_box},
            target_path_list=[clipped_observed_yield_raster_path],
            task_name='clip_global_observed_yield_%s_' % crop_name)
        dependent_task_list.append(clip_global_observed_yield_task)

        observed_yield_nodata = (
            global_observed_yield_raster_info['nodata'][0])

        zeroed_observed_yield_raster_path = os.path.join(
            output_dir,
            _ZEROED_OBSERVED_YIELD_FILE_PATTERN % (crop_name, file_suffix))

        nodata_to_zero_for_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([
                (clipped_observed_yield_raster_path, 1),
                (observed_yield_nodata, 'raw')
            ], _zero_observed_yield_op, zeroed_observed_yield_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[zeroed_observed_yield_raster_path],
            dependent_task_list=[clip_global_observed_yield_task],
            task_name='nodata_to_zero_for_observed_yield_%s_' % crop_name)
        dependent_task_list.append(nodata_to_zero_for_observed_yield_task)

        interpolated_observed_yield_raster_path = os.path.join(
            output_dir, _INTERPOLATED_OBSERVED_YIELD_FILE_PATTERN %
            (crop_name, file_suffix))

        LOGGER.info("Interpolating observed %s raster to landcover.",
                    crop_name)
        interpolate_observed_yield_task = task_graph.add_task(
            func=pygeoprocessing.warp_raster,
            args=(zeroed_observed_yield_raster_path,
                  landcover_raster_info['pixel_size'],
                  interpolated_observed_yield_raster_path, 'cubicspline'),
            kwargs={
                'target_projection_wkt':
                landcover_raster_info['projection_wkt'],
                'target_bb': landcover_raster_info['bounding_box']
            },
            target_path_list=[interpolated_observed_yield_raster_path],
            dependent_task_list=[nodata_to_zero_for_observed_yield_task],
            task_name='interpolate_observed_yield_to_lulc_%s' % crop_name)
        dependent_task_list.append(interpolate_observed_yield_task)

        observed_production_raster_path = os.path.join(
            output_dir,
            _OBSERVED_PRODUCTION_FILE_PATTERN % (crop_name, file_suffix))
        calculate_observed_production_task = task_graph.add_task(
            func=pygeoprocessing.raster_calculator,
            args=([(args['landcover_raster_path'], 1),
                   (interpolated_observed_yield_raster_path, 1),
                   (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')
                   ], _mask_observed_yield_op, observed_production_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[observed_production_raster_path],
            dependent_task_list=[interpolate_observed_yield_task],
            task_name='calculate_observed_production_%s' % crop_name)
        dependent_task_list.append(calculate_observed_production_task)

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
    nutrient_table = utils.build_lookup_from_csv(os.path.join(
        args['model_data_path'], 'crop_nutrient.csv'),
                                                 'crop',
                                                 to_lower=False)

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(output_dir,
                                     'result_table%s.csv' % file_suffix)
    _ = task_graph.add_task(func=tabulate_regression_results,
                            args=(nutrient_table, crop_to_landcover_table,
                                  pixel_area_ha, args['landcover_raster_path'],
                                  landcover_nodata, output_dir, file_suffix,
                                  result_table_path),
                            target_path_list=[result_table_path],
                            dependent_task_list=dependent_task_list,
                            task_name='tabulate_results')

    if ('aggregate_polygon_path' in args
            and args['aggregate_polygon_path'] not in ['', None]):
        LOGGER.info("aggregating result over query polygon")
        # reproject polygon to LULC's projection
        target_aggregate_vector_path = os.path.join(
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        aggregate_results_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
        _ = task_graph.add_task(
            func=aggregate_regression_results_to_polygons,
            args=(args['aggregate_polygon_path'], target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
                  crop_to_landcover_table, nutrient_table, output_dir,
                  file_suffix, aggregate_results_table_path),
            target_path_list=[
                target_aggregate_vector_path, aggregate_results_table_path
            ],
            dependent_task_list=dependent_task_list,
            task_name='aggregate_results_to_polygons')

    task_graph.close()
    task_graph.join()
Ejemplo n.º 8
0
def single_run_ndr(watershed_basename, watershed_fid, bucket_uri_prefix,
                   scenario_id, error_queue):
    """Run a single instance of NDR."""
    try:
        LOGGER.debug('running %s %d', watershed_basename, watershed_fid)
        # create local workspace
        ws_prefix = '%s_%d' % (watershed_basename, watershed_fid)
        local_workspace = os.path.join(WORKSPACE_DIR, ws_prefix)
        try:
            os.makedirs(local_workspace)
        except OSError:
            LOGGER.exception('unable to create %s', local_workspace)

        # extract the watershed to workspace/data
        watershed_root_path = os.path.join(
            ECOSHARD_DIR, 'watersheds_globe_HydroSHEDS_15arcseconds_'
            'blake2b_14ac9c77d2076d51b0258fd94d9378d4',
            'watersheds_globe_HydroSHEDS_15arcseconds',
            '%s.shp' % watershed_basename)
        epsg_srs = get_utm_epsg_srs(watershed_root_path, watershed_fid)
        local_watershed_path = os.path.join(local_workspace,
                                            '%s.gpkg' % ws_prefix)

        # the dem is in lat/lng and is also a big set of tiles. Make a
        # VRT which is the bounds of the lat/lng of the watershed and
        # use that as the dem path argument
        watershed_vector = gdal.OpenEx(watershed_root_path, gdal.OF_VECTOR)
        watershed_layer = watershed_vector.GetLayer()
        watershed_feature = watershed_layer.GetFeature(watershed_fid)
        watershed_geom = watershed_feature.GetGeometryRef()
        x1, x2, y1, y2 = watershed_geom.GetEnvelope()
        watershed_geom = None
        watershed_feature = None
        watershed_layer = None
        watershed_vector = None

        watershed_bounding_box = [
            min(x1, x2), min(y1, y2),
            max(x1, x2), max(y1, y2)
        ]

        vrt_options = gdal.BuildVRTOptions(outputBounds=(min(x1, x2) - 0.1,
                                                         min(y1, y2) - 0.1,
                                                         max(x1, x2) + 0.1,
                                                         max(y1, y2) + 0.1))
        dem_dir_path = os.path.join(PATH_MAP['dem_path'], 'global_dem_3s')
        dem_vrt_path = os.path.join(
            dem_dir_path,
            '%s_%s_vrt.vrt' % (watershed_basename, watershed_fid))
        gdal.BuildVRT(dem_vrt_path,
                      glob.glob(os.path.join(dem_dir_path, '*.tif')),
                      options=vrt_options)

        wgs84_sr = osr.SpatialReference()
        wgs84_sr.ImportFromEPSG(4326)
        target_bounding_box = pygeoprocessing.transform_bounding_box(
            watershed_bounding_box, wgs84_sr.ExportToWkt(),
            epsg_srs.ExportToWkt())

        reproject_geometry_to_target(watershed_root_path, watershed_fid,
                                     epsg_srs.ExportToWkt(),
                                     local_watershed_path)

        args = {
            'workspace_dir':
            local_workspace,
            'dem_path':
            dem_vrt_path,
            'lulc_path':
            PATH_MAP[scenario_id]['lulc_path'],
            'runoff_proxy_path':
            PATH_MAP['precip_path'],
            'ag_load_path':
            PATH_MAP[scenario_id]['fertilizer_path'],
            'watersheds_path':
            local_watershed_path,
            'biophysical_table_path': (PATH_MAP['biophysical_table_path']),
            'calc_n':
            True,
            'calc_p':
            False,
            'results_suffix':
            '',
            'threshold_flow_accumulation':
            (GLOBAL_NDR_ARGS['threshold_flow_accumulation']),
            'k_param':
            GLOBAL_NDR_ARGS['k_param'],
            'n_workers':
            -1,
            'target_sr_wkt':
            epsg_srs.ExportToWkt(),
            'target_pixel_size':
            TARGET_PIXEL_SIZE,
            'target_bounding_box':
            target_bounding_box
        }
        inspring.ndr.ndr.execute(args)
        zipfile_path = '%s.zip' % ws_prefix
        LOGGER.debug("zipping %s to %s", args['workspace_dir'], zipfile_path)
        zipdir(args['workspace_dir'], zipfile_path)
        zipfile_s3_uri = (
            "%s/%s/%s" %
            (bucket_uri_prefix, scenario_id, os.path.basename(zipfile_path)))
        subprocess.run([
            "/usr/local/bin/aws s3 cp %s %s" % (zipfile_path, zipfile_s3_uri)
        ],
                       shell=True,
                       check=True)
        shutil.rmtree(args['workspace_dir'])
        os.remove(dem_vrt_path)
        # strip off the "s3://" part of the uri prefix
        bucket_id, bucket_subdir = re.match('s3://([^/]*)/(.*)',
                                            bucket_uri_prefix).groups()
        workspace_url = ('https://%s.s3-us-west-1.amazonaws.com/'
                         '%s/%s/%s' % (bucket_id, bucket_subdir, scenario_id,
                                       os.path.basename(zipfile_path)))
        os.remove(zipfile_path)
        try:
            head_request = requests.head(workspace_url)
            if not head_request:
                raise RuntimeError(
                    "something bad happened when checking if url "
                    "workspace was live: %s %s" %
                    (workspace_url, str(head_request)))
        except ConnectionError:
            LOGGER.exception('a connection error when checking live url '
                             'workspace')
            raise
    except Exception:
        LOGGER.exception('something bad happened when running ndr')
        error_queue.put(traceback.format_exc())
        raise
Ejemplo n.º 9
0
"""Demo some clipping."""
import logging

import pygeoprocessing

logging.basicConfig(
    level=logging.DEBUG,
    format=(
        '%(asctime)s (%(relativeCreated)d) %(processName)s %(levelname)s '
        '%(name)s [%(funcName)s:%(lineno)d] %(message)s'))
LOGGER = logging.getLogger(__name__)

if __name__ == '__main__':
    raster_path = '../session2/DEM_md5_53d4998eec75d803a318fafd28c40a3e.tif'
    aoi_vector_path = './session2/aoi.gpkg'

    raster_info = pygeoprocessing.get_raster_info(raster_path)
    vector_info = pygeoprocessing.get_vector_info(aoi_vector_path)

    raster_projected_bounding_box = pygeoprocessing.transform_bounding_box(
        vector_info['bounding_box'], vector_info['projection_wkt'],
        raster_info['projection_wkt'])

    target_clipped_raster_path = 'DEM_clip.tif'
    pygeoprocessing.warp_raster(
        raster_path, raster_info['pixel_size'], target_clipped_raster_path,
        'near', target_bb=raster_projected_bounding_box)
def postprocessing_worker(
        postprocessing_queue, country_borders_vector_path, work_database_path,
        grid_done_queue):
    """Get detected images, annotate them, and stick them in the db."""
    try:
        while True:
            payload = postprocessing_queue.get()
            if payload == 'STOP':
                postprocessing_queue.put('STOP')
                break
            grid_id, boxes, scores, image_path, xoff, yoff, quad_info = payload
            non_max_supression_box_list = []
            # convert box to a list from a numpy array and score to a value
            # from a single element array
            box_score_tuple_list = [
                (list(box), score) for box, score in zip(boxes[0], scores[0])
                if score > 0.3]
            while box_score_tuple_list:
                box, score = box_score_tuple_list.pop()
                shapely_box = shapely.geometry.box(*box)
                keep = True
                # this list makes a copy
                for test_box, test_score in list(box_score_tuple_list):
                    shapely_test_box = shapely.geometry.box(*test_box)
                    if shapely_test_box.intersects(shapely_box):
                        if test_score > score:
                            # keep the new one
                            keep = False
                            break
                if keep:
                    non_max_supression_box_list.append((box, score))

            if not non_max_supression_box_list:
                # no dams detected
                os.remove(image_path)
                grid_done_queue.put((grid_id, -1))
                continue

            # if non_max_supression_box_list:
            #     LOGGER.debug('found %d dams', len(non_max_supression_box_list))
            #     raw_image = read_image_bgr(image_path)
            #     for box, score in non_max_supression_box_list:
            #         detected_box = shapely.geometry.box(*box)
            #         color = (255, 102, 179)
            #         draw_box(raw_image, detected_box.bounds, color, 1)
            #         draw_caption(raw_image, detected_box.bounds, str(score))

            #     cv2.imwrite(image_path, raw_image)
            # else:
            #     # no dams detected
            #     os.remove(image_path)
            #     grid_done_queue.put((grid_id, -1))
            #     continue

            # transform local bbs so they're relative to the png
            lng_lat_score_list = []
            for bounding_box, score in non_max_supression_box_list:
                global_bounding_box = [
                    bounding_box[0]+xoff,
                    bounding_box[1]+yoff,
                    bounding_box[2]+xoff,
                    bounding_box[3]+yoff]

                # convert to lat/lng
                geotransform = quad_info['geotransform']
                x_a, y_a = [x for x in gdal.ApplyGeoTransform(
                    geotransform, global_bounding_box[0],
                    global_bounding_box[1])]
                x_b, y_b = [x for x in gdal.ApplyGeoTransform(
                    geotransform, global_bounding_box[2],
                    global_bounding_box[3])]
                x_min, x_max = sorted([x_a, x_b])
                y_min, y_max = sorted([y_a, y_b])
                x_y_bounding_box = [
                    x_min, y_min, x_max, y_max]
                LOGGER.debug('original bounding box: %s', bounding_box)
                LOGGER.debug('xoff: %s yoff: %s', xoff, yoff)
                LOGGER.debug('global_bounding_box: %s', global_bounding_box)
                LOGGER.debug('xy bounding box: %s', x_y_bounding_box)

                lng_lat_bounding_box = \
                    pygeoprocessing.transform_bounding_box(
                        x_y_bounding_box, quad_info['projection'],
                        WGS84_WKT)
                LOGGER.debug('lng_lat_bounding_box: %s', lng_lat_bounding_box)

                # get country intersection list
                shapely_box = shapely.geometry.box(
                    *lng_lat_bounding_box)

                country_intersection_list = \
                    get_country_intersection_list(
                        shapely_box,
                        country_borders_vector_path)

                lng_lat_score_list.append((
                    lng_lat_bounding_box + [
                        float(score),
                        ','.join(country_intersection_list),
                        image_path]))

            # upload .pngs to bucket this is old code but i want to keep it
            # try:
            #     quad_uri = (
            #         'gs://natgeo-dams-data/detected_dam_data/'
            #         'annotated_imagery/%s' % os.path.basename(
            #             image_path))
            #     subprocess.run(
            #         'gsutil mv %s %s'
            #         % (image_path, quad_uri), shell=True,
            #         check=True)
            # except subprocess.CalledProcessError:
            #     LOGGER.warning(
            #         'file might already exist -- not uploading')
            #     if os.path.exists(image_path):
            #         os.remove(image_path)

            _execute_sqlite(
                """
                INSERT INTO
                detected_dams
                    (lng_min, lat_min, lng_max, lat_max, probability,
                     country_list, image_uri)
                VALUES (?, ?, ?, ?, ?, ?, ?)
                """, work_database_path,
                argument_list=lng_lat_score_list, mode='modify',
                execute='many')
            grid_done_queue.put((grid_id, -1))
            try:
                os.remove(image_path)
            except Exception:
                LOGGER.exception(
                    "couldn't remove %s after postprocessing", image_path)
    except Exception:
        LOGGER.exception('error occured')
        raise
Ejemplo n.º 11
0
def process_quad(quad_uri, quad_id, dams_database_path):
    """Process quad into bounding box annotated chunks.

    Parameters:
        quad_uri (str): gs:// path to quad to download.
        quad_id (str): ID in the database so work can be updated.
        dams_database_path (str): path to the database that can be
            updated to include the processing state complete and the
            quad processed.

    Returns:
        True when complete.

    """
    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1)
    quad_raster_path = os.path.join(TRAINING_IMAGERY_DIR,
                                    os.path.basename(quad_uri))
    download_quad_task = task_graph.add_task(
        func=copy_from_gs,
        args=(quad_uri, quad_raster_path),
        target_path_list=[quad_raster_path],
        task_name='download %s' % quad_uri)
    download_quad_task.join()
    quad_info = pygeoprocessing.get_raster_info(quad_raster_path)
    n_cols, n_rows = quad_info['raster_size']

    # extract the bounding boxes
    bb_srs = osr.SpatialReference()
    bb_srs.ImportFromEPSG(4326)

    bounding_box_blob_list = _execute_sqlite('''
        SELECT bounding_box
        FROM quad_bounding_box_uri_table
        WHERE quad_id=?
        ''',
                                             dams_database_path,
                                             argument_list=[quad_id],
                                             fetch='all')
    working_dam_bb_list = []  # will be used to collapose duplicates later
    for index, (bounding_box_blob, ) in enumerate(bounding_box_blob_list):
        bounding_box = pickle.loads(bounding_box_blob)
        LOGGER.debug('%s: %s', quad_uri, bounding_box)

        local_bb = pygeoprocessing.transform_bounding_box(
            bounding_box,
            bb_srs.ExportToWkt(),
            quad_info['projection'],
            edge_samples=11)

        inv_gt = gdal.InvGeoTransform(quad_info['geotransform'])
        ul_i, ul_j = [
            int(x)
            for x in gdal.ApplyGeoTransform(inv_gt, local_bb[0], local_bb[1])
        ]
        lr_i, lr_j = [
            int(x)
            for x in gdal.ApplyGeoTransform(inv_gt, local_bb[2], local_bb[3])
        ]
        ul_i, lr_i = sorted([ul_i, lr_i])
        ul_j, lr_j = sorted([ul_j, lr_j])

        # possible the dam may lie outside of the quad, if so clip to the
        # edge of the quad
        if ul_j < 0:
            ul_j = 0
        if ul_i < 0:
            ul_i = 0
        if lr_i >= n_cols:
            lr_i = n_cols - 1
        if lr_j >= n_rows:
            lr_j = n_rows - 1

        # if < 0.5 ratio, bump up to 0.5 ratio
        bb_xsize = max(1, lr_i - ul_i)
        bb_ysize = max(1, lr_j - ul_j)
        if bb_xsize / bb_ysize < 0.5:
            delta_xsize = max(2, 0.5 * bb_ysize - bb_xsize)
            ul_i -= delta_xsize / 2
            lr_i += delta_xsize / 2
        elif bb_ysize / bb_xsize < 0.5:
            delta_ysize = max(2, 0.5 * bb_xsize - bb_ysize)
            ul_j -= delta_ysize / 2
            lr_j += delta_ysize / 2
        dam_bb = [ul_i, ul_j, lr_i, lr_j]

        # this is a sanity check
        if ul_i >= n_cols or ul_j >= n_rows or lr_i < 0 or lr_j < 0:
            raise ValueError(
                'transformed coordinates outside of raster bounds: '
                'lat/lng: %s\nlocal: %sraster_bb: %s\ntransformed: %s' %
                (bounding_box, local_bb, quad_info['bounding_box'], dam_bb))

        working_dam_bb_list.append(dam_bb)

    bounding_box_rtree = rtree.index.Index()
    index_to_bb_list = []
    while working_dam_bb_list:
        current_bb = shapely.geometry.box(*working_dam_bb_list.pop())
        for index in range(len(working_dam_bb_list) - 1, -1, -1):
            test_bb = shapely.geometry.box(*working_dam_bb_list[index])
            if current_bb.intersects(test_bb):
                current_bb = current_bb.union(test_bb)
                del working_dam_bb_list[index]
        LOGGER.debug('going to insert this: %s',
                     str((len(index_to_bb_list), current_bb.bounds)))
        bounding_box_rtree.insert(len(index_to_bb_list), current_bb.bounds)
        index_to_bb_list.append(current_bb.bounds)

    quad_slice_index = 0
    annotation_string_list = []
    for xoff in range(0, n_cols, TRAINING_IMAGE_DIMS[0]):
        win_xsize = TRAINING_IMAGE_DIMS[0]
        if xoff + win_xsize >= n_cols:
            xoff = n_cols - win_xsize - 1
        for yoff in range(0, n_rows, TRAINING_IMAGE_DIMS[1]):
            win_ysize = TRAINING_IMAGE_DIMS[1]
            if yoff + win_ysize >= n_rows:
                yoff = n_rows - win_ysize - 1

            bb_indexes = list(
                bounding_box_rtree.intersection(
                    (xoff, yoff, xoff + win_xsize, yoff + win_ysize)))

            # see if any of the bounding boxes intersect in which case make
            # a single big one

            if bb_indexes:
                LOGGER.debug('these local bbs at %d %d: %s', xoff, yoff,
                             str(bb_indexes))
                # clip out the png and name after number of bbs per image
                quad_png_path = os.path.join(
                    TRAINING_IMAGERY_DIR, '%d_%s_%d.png' %
                    (len(bb_indexes), quad_id, quad_slice_index))
                quad_slice_index += 1
                try:
                    make_quad_png(quad_raster_path, quad_png_path, xoff, yoff,
                                  win_xsize, win_ysize)
                    # transform local bbs so they're relative to the png
                    for bb_index in bb_indexes:
                        base_bb = list(index_to_bb_list[bb_index])
                        # if the centroid is out of bounds, go with the other
                        # quad that contains it
                        bb_xcentroid = base_bb[0] + (base_bb[2] -
                                                     base_bb[0]) / 2
                        bb_ycentroid = base_bb[1] + (base_bb[3] -
                                                     base_bb[1]) / 2
                        if (bb_xcentroid - xoff < 0 or
                                bb_xcentroid - xoff >= TRAINING_IMAGE_DIMS[0]
                                or bb_ycentroid - yoff < 0 or
                                bb_ycentroid - yoff >= TRAINING_IMAGE_DIMS[1]):
                            continue

                        # make sure it's not tiny
                        if base_bb[2] - base_bb[0] < 16:
                            delta = 16 - (base_bb[2] - base_bb[0])
                            base_bb[0] -= delta // 2
                            base_bb[2] += delta // 2
                        if base_bb[3] - base_bb[1] < 16:
                            delta = 16 - (base_bb[3] - base_bb[1])
                            base_bb[1] -= delta // 2
                            base_bb[3] += delta // 2

                        base_bb[0] = max(0, base_bb[0] - xoff)
                        base_bb[1] = max(0, base_bb[1] - yoff)
                        base_bb[2] = \
                            min(TRAINING_IMAGE_DIMS[0], base_bb[2]-xoff)
                        base_bb[3] = \
                            min(TRAINING_IMAGE_DIMS[1], base_bb[3]-yoff)
                        annotation_string_list.append([
                            '%s,%d,%d,%d,%d,dam' %
                            (quad_png_path, base_bb[0], base_bb[1], base_bb[2],
                             base_bb[3])
                        ])
                except Exception:
                    LOGGER.exception('skipping %s' % quad_raster_path)

    LOGGER.debug('updating annotation table with this: %s',
                 str(annotation_string_list))
    _execute_sqlite('''
        INSERT OR REPLACE INTO annotation_table
            (record)
        VALUES (?);
        ''',
                    dams_database_path,
                    argument_list=annotation_string_list,
                    execute='many',
                    mode='modify')

    _execute_sqlite('''
        UPDATE quad_processing_status
            SET processed=1
        WHERE quad_id=?
        ''',
                    dams_database_path,
                    argument_list=[quad_id],
                    mode='modify')

    task_graph.join()
    task_graph.close()
    os.remove(quad_raster_path)
def fetch_quad(session, quad_database_path, mosaic_id, quad_id, cache_dir,
               to_copy_queue, global_report_queue, grid_id):
    """Fetch quad from planet DB.

    Args:
        session (Session): session object to use for authentication
        quad_database_path (str): path to quad datbase
        mosaic_id (str): Planet mosaic ID to search for
        quad_id (str): Planet quad ID in the given mosaic to fetch
        cache_dir (str): path to directory to write temporary files in
        to_copy_queue (Queue): put 'OK' here when done with processing
        global_report_queue (Queue): used to report of the quad need not
            be downloaded.
        grid_id (str): unique id to report to global report queue if quad is
            predownloaded.

    Returns:
        None.
    """
    try:
        LOGGER.debug(f'fetching {quad_id}')
        count = _execute_sqlite('''
            SELECT count(quad_id)
            FROM quad_cache_table
            WHERE quad_id=?;
            ''',
                                quad_database_path,
                                argument_list=[quad_id],
                                fetch='one')
        LOGGER.debug(f'result of count query {count}')
        if count[0] > 0:
            LOGGER.debug('already fetched %s', quad_id)
            global_report_queue.put(grid_id)
            return

        get_quad_url = (f'https://api.planet.com/basemaps/v1/mosaics/'
                        f'{mosaic_id}/quads/{quad_id}')
        quads_json = session.get(get_quad_url, timeout=REQUEST_TIMEOUT)
        download_url = (quads_json.json())['_links']['download']
        local_quad_path = os.path.join(cache_dir, '%s.tif' % quad_id)
        quad_uri = ('gs://natgeo-dams-data/cached-planet-quads/%s' %
                    os.path.basename(local_quad_path))

        LOGGER.debug(f'download {download_url} to {local_quad_path}')
        ecoshard.download_url(download_url, local_quad_path)
        local_quad_info = pygeoprocessing.get_raster_info(local_quad_path)

        lng_lat_bb = pygeoprocessing.transform_bounding_box(
            local_quad_info['bounding_box'], local_quad_info['projection_wkt'],
            WGS84_WKT)

        sqlite_update_variables = []
        sqlite_update_variables.append(quad_id)
        sqlite_update_variables.extend(lng_lat_bb)
        sqlite_update_variables.append(  # file size in bytes
            pathlib.Path(local_quad_path).stat().st_size)
        sqlite_update_variables.append(quad_uri)

        LOGGER.debug(f'put {quad_uri} to copy from {sqlite_update_variables}')
        to_copy_queue.put((local_quad_path, quad_uri, sqlite_update_variables))

    except Exception:
        LOGGER.exception('error on quad %s' % quad_id)
        raise
Ejemplo n.º 13
0
    base_ref.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)
    target_ref.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

    # Create a coordinate transformation
    transformer = osr.CreateCoordinateTransformation(base_ref, target_ref)
    back_transformer = osr.CreateCoordinateTransformation(target_ref, base_ref)

    trans_x, trans_y, _ = transformer.TransformPoint(args.lng, args.lat)
    print(f'({trans_x}, {trans_y})')
    back_lng, back_lat, _ = back_transformer.TransformPoint(trans_x, trans_y)
    print(f'({back_lat}, {back_lng})')

    world_borders_vector = gdal.OpenEx(
        'TM_WORLD_BORDERS-0.3_simplified_md5_47f2059be8d4016072aa6abe77762021.gpkg',
        gdal.OF_VECTOR)
    world_borders_layer = world_borders_vector.GetLayer()
    for country_feature in world_borders_layer:
        if country_feature.GetField(
                "NAME").lower() == args.country_name.lower():
            country_geometry = country_feature.GetGeometryRef()
            country_bb = [
                country_geometry.GetEnvelope()[i] for i in [0, 2, 1, 3]
            ]
            print(country_bb)
            transformbb = pygeoprocessing.transform_bounding_box(
                country_bb,
                osr.SRS_WKT_WGS84_LAT_LONG,
                world_eckert_iv_wkt,
                edge_samples=11)
            print(f'transformbb: {transformbb}')
Ejemplo n.º 14
0
def main():
    """Entry point."""
    parser = argparse.ArgumentParser(description='People Travel Coverage')
    parser.add_argument('--population_key',
                        required=True,
                        help='population ecoshard key to simulate')
    parser.add_argument('--max_travel_time',
                        required=True,
                        type=float,
                        help='travel time in minutes')
    parser.add_argument('--pixel_size_m',
                        required=True,
                        type=float,
                        help='pixel size in meters')
    parser.add_argument('--countries',
                        type=str,
                        nargs='+',
                        help='comma separated list of countries to simulate')
    args = parser.parse_args()

    population_key = args.population_key
    max_travel_time = args.max_travel_time

    for dir_path in [WORKSPACE_DIR, CHURN_DIR, ECOSHARD_DIR]:
        os.makedirs(dir_path, exist_ok=True)
    task_graph = taskgraph.TaskGraph(CHURN_DIR,
                                     multiprocessing.cpu_count() // 4, 5.0)
    ecoshard_path_map = {}

    for ecoshard_id, ecoshard_url in RASTER_ECOSHARD_URL_MAP.items():
        ecoshard_path = os.path.join(ECOSHARD_DIR,
                                     os.path.basename(ecoshard_url))
        _ = task_graph.add_task(func=ecoshard.download_url,
                                args=(ecoshard_url, ecoshard_path),
                                target_path_list=[ecoshard_path],
                                task_name=f'fetch {ecoshard_url}')
        ecoshard_path_map[ecoshard_id] = ecoshard_path
    task_graph.join()

    target_population_density_raster_path = os.path.join(
        CHURN_DIR,
        f'density_{os.path.basename(ecoshard_path_map[population_key])}')
    population_density_task = task_graph.add_task(
        func=create_population_density,
        args=(ecoshard_path_map[population_key],
              target_population_density_raster_path),
        target_path_list=[target_population_density_raster_path],
        task_name=f'create population density for {population_key}')
    population_density_task.join()
    ecoshard_path_map[population_key] = target_population_density_raster_path

    world_borders_vector = gdal.OpenEx(ecoshard_path_map['world_borders'],
                                       gdal.OF_VECTOR)
    world_borders_layer = world_borders_vector.GetLayer()

    area_fid_list = []

    world_eckert_iv_wkt = """PROJCRS["unknown",
    BASEGEOGCRS["GCS_unknown",
        DATUM["World Geodetic System 1984",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]],
            ID["EPSG",6326]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["Degree",0.0174532925199433]]],
    CONVERSION["unnamed",
        METHOD["Eckert IV"],
        PARAMETER["Longitude of natural origin",0,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["False easting",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["(E)",east,
            ORDER[1],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]],
        AXIS["(N)",north,
            ORDER[2],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]]]"""

    for country_feature in world_borders_layer:
        country_name = country_feature.GetField('NAME')
        if country_name in SKIP_THESE_COUNTRIES:
            LOGGER.debug('skipping %s', country_name)
            continue
        country_geom = country_feature.GetGeometryRef()

        LOGGER.debug(country_name)
        country_geom = country_feature.GetGeometryRef()

        area_fid_list.append(
            (country_geom.GetArea(), world_eckert_iv_wkt, country_name,
             country_feature.GetFID()))

    world_borders_layer.ResetReading()

    allowed_country_set = None
    if args.countries is not None:
        allowed_country_set = set([name.lower() for name in args.countries])
    people_access_path_list = []
    normalized_people_access_path_list = []
    for country_index, (country_area, target_wkt, country_name,
                        country_fid) in enumerate(
                            sorted(area_fid_list, reverse=True)):
        # put the index on there so we can see which one is done first
        if args.countries is not None and (country_name.lower()
                                           not in allowed_country_set):
            continue
        country_workspace = os.path.join(COUNTRY_WORKSPACE_DIR,
                                         f'{country_index}_{country_name}')
        os.makedirs(country_workspace, exist_ok=True)
        base_raster_path_list = [
            ecoshard_path_map['friction_surface'],
            ecoshard_path_map[population_key],
            ecoshard_path_map['habitat_mask'],
        ]

        # swizzle so it's xmin, ymin, xmax, ymax
        country_feature = world_borders_layer.GetFeature(country_fid)
        LOGGER.debug(f'country name: {country_feature.GetField("NAME")}')
        country_geometry = country_feature.GetGeometryRef()
        country_bb = [country_geometry.GetEnvelope()[i] for i in [0, 2, 1, 3]]

        # make sure the bounding coordinates snap to pixel grid in global coords
        LOGGER.debug(f'lat/lng country_bb: {country_bb}')
        target_bounding_box = pygeoprocessing.transform_bounding_box(
            country_bb,
            world_borders_layer.GetSpatialRef().ExportToWkt(),
            target_wkt,
            edge_samples=11)
        # make sure the bounding coordinates snap to pixel grid
        target_bounding_box[0] -= target_bounding_box[0] % TARGET_CELL_LENGTH_M
        target_bounding_box[1] -= target_bounding_box[1] % TARGET_CELL_LENGTH_M
        target_bounding_box[2] += target_bounding_box[2] % TARGET_CELL_LENGTH_M
        target_bounding_box[3] += target_bounding_box[3] % TARGET_CELL_LENGTH_M
        LOGGER.debug(f'projected country_bb: {target_bounding_box}')

        sinusoidal_friction_path = os.path.join(
            country_workspace, f'{country_name}_friction.tif')
        sinusoidal_population_path = os.path.join(
            country_workspace,
            f'{country_name}_population_{population_key}.tif')
        sinusoidal_hab_path = os.path.join(
            country_workspace, f'sinusoidal_{country_name}_hab.tif')
        sinusoidal_raster_path_list = [
            sinusoidal_friction_path, sinusoidal_population_path,
            sinusoidal_hab_path
        ]

        projection_task = task_graph.add_task(
            func=pygeoprocessing.align_and_resize_raster_stack,
            args=(base_raster_path_list, sinusoidal_raster_path_list,
                  ['near'] * len(base_raster_path_list),
                  (TARGET_CELL_LENGTH_M,
                   -TARGET_CELL_LENGTH_M), target_bounding_box),
            kwargs={
                'target_projection_wkt': world_eckert_iv_wkt,
                'vector_mask_options': {
                    'mask_vector_path': ecoshard_path_map['world_borders'],
                    'mask_vector_where_filter': f'"fid"={country_fid}'
                }
            },
            target_path_list=sinusoidal_raster_path_list,
            task_name=f'project and clip rasters for {country_name}')

        people_access_path = os.path.join(
            country_workspace,
            f'people_access_{country_name}_{population_key}_{max_travel_time}m.tif'
        )
        normalized_people_access_path = os.path.join(
            country_workspace,
            f'norm_people_access_{country_name}_{max_travel_time}m.tif')

        _ = task_graph.add_task(
            func=people_access,
            args=(country_name, sinusoidal_friction_path,
                  sinusoidal_population_path, sinusoidal_hab_path,
                  max_travel_time, people_access_path,
                  normalized_people_access_path),
            target_path_list=[
                people_access_path, normalized_people_access_path
            ],
            dependent_task_list=[projection_task],
            task_name='calculating people access for %s' % country_name)
        people_access_path_list.append((people_access_path, 1))
        normalized_people_access_path_list.append(
            (normalized_people_access_path, 1))

    LOGGER.debug('create target global population layers')
    # warp population layer to target projection
    warped_pop_raster_path = os.path.join(
        WORKSPACE_DIR,
        f"warped_{os.path.basename(ecoshard_path_map[population_key])}")
    _ = task_graph.add_task(
        func=pygeoprocessing.warp_raster,
        args=(ecoshard_path_map[population_key], (TARGET_CELL_LENGTH_M,
                                                  -TARGET_CELL_LENGTH_M),
              warped_pop_raster_path, 'near'),
        kwargs={
            'target_projection_wkt': world_eckert_iv_wkt,
            'target_bb':
            [-16921202.923, -8460601.461, 16921797.077, 8461398.539],
            'working_dir': WORKSPACE_DIR
        },
        target_path_list=[warped_pop_raster_path],
        task_name=f'warp {warped_pop_raster_path}')
    task_graph.close()
    task_graph.join()

    # create access and normalized access paths
    target_people_global_access_path = os.path.join(
        WORKSPACE_DIR,
        f'global_people_access_{population_key}_{max_travel_time}m.tif')
    pygeoprocessing.new_raster_from_base(warped_pop_raster_path,
                                         target_people_global_access_path,
                                         gdal.GDT_Float32, [-1])
    target_normalized_people_global_access_path = os.path.join(
        WORKSPACE_DIR,
        f'global_normalized_people_access_{population_key}_{max_travel_time}m.tif'
    )
    pygeoprocessing.new_raster_from_base(
        warped_pop_raster_path, target_normalized_people_global_access_path,
        gdal.GDT_Float32, [-1])

    pygeoprocessing.stitch_rasters(people_access_path_list,
                                   ['near'] * len(people_access_path_list),
                                   (target_people_global_access_path, 1),
                                   overlap_algorithm='etch')
    people_global_access_raster = gdal.OpenEx(target_people_global_access_path,
                                              gdal.OF_RASTER | gdal.GA_Update)
    people_global_access_band = people_global_access_raster.GetRasterBand(1)
    people_global_access_band.ComputeStatistics(0)
    people_global_access_band = None
    pygeoprocessing.stitch_rasters(
        normalized_people_access_path_list,
        ['near'] * len(normalized_people_access_path_list),
        (target_normalized_people_global_access_path, 1),
        overlap_algorithm='etch')
    normalized_people_global_access_raster = gdal.OpenEx(
        target_normalized_people_global_access_path,
        gdal.OF_RASTER | gdal.GA_Update)
    normalized_people_global_access_band = normalized_people_global_access_raster.GetRasterBand(
        1)
    normalized_people_global_access_band.ComputeStatistics(0)
    normalized_people_global_access_band = None
Ejemplo n.º 15
0
def process_watershed(job_id, watershed_vector_path, watershed_fid, dem_path,
                      hab_path, pop_raster_path_list,
                      target_beneficiaries_path_list,
                      target_normalized_beneficiaries_path_list,
                      target_hab_normalized_beneficiaries_path_list,
                      target_stitch_work_queue_list):
    """Calculate downstream beneficiaries for this watershed.

    Args:
        job_id (str): unique ID identifying this job, can be used to
            create unique workspaces.
        watershed_vector_path (str): path to watershed vector
        watershed_fid (str): watershed FID to process
        dem_path (str): path to DEM raster
        hab_path (str): path to habitat mask raster
        pop_raster_path_list (list): list of population rasters to route
        target_beneficiaries_path_list (str): list of target downstream
            beneficiary rasters to create, parallel with
            `pop_raster_path_list`.
        target_normalized_beneficiaries_path_list (list): list of target
            normalized downstream beneficiary rasters, parallel with other
            lists.
        target_hab_normalized_beneficiaries_path_list (list): list of target
            hab normalized downstream beneficiary rasters, parallel with other
            lists.
        target_stitch_work_queue_list (list): list of work queue tuples to
            put done signals in when each beneficiary raster is done. The
            first element is for the standard target, the second for the
            normalized raster.

    Return:
        None.
    """
    working_dir = os.path.join(
        os.path.dirname(target_beneficiaries_path_list[0]))
    os.makedirs(working_dir, exist_ok=True)
    LOGGER.debug(f'create working directory for {job_id} at {working_dir}')

    task_graph = taskgraph.TaskGraph(working_dir, -1)

    watershed_info = pygeoprocessing.get_vector_info(watershed_vector_path)
    watershed_vector = gdal.OpenEx(watershed_vector_path, gdal.OF_VECTOR)
    watershed_layer = watershed_vector.GetLayer()
    watershed_feature = watershed_layer.GetFeature(watershed_fid)
    watershed_geom = watershed_feature.GetGeometryRef()
    watershed_centroid = watershed_geom.Centroid()
    utm_code = (math.floor((watershed_centroid.GetX() + 180) / 6) % 60) + 1
    lat_code = 6 if watershed_centroid.GetY() > 0 else 7
    epsg_code = int('32%d%02d' % (lat_code, utm_code))
    epsg_sr = osr.SpatialReference()
    epsg_sr.ImportFromEPSG(epsg_code)

    watershed_envelope = watershed_geom.GetEnvelope()
    # swizzle the envelope order that by default is xmin/xmax/ymin/ymax
    lat_lng_watershed_bb = [watershed_envelope[i] for i in [0, 2, 1, 3]]
    target_watershed_bb = pygeoprocessing.transform_bounding_box(
        lat_lng_watershed_bb, watershed_info['projection_wkt'],
        epsg_sr.ExportToWkt())

    watershed_vector = None
    watershed_layer = None
    watershed_feature = None
    watershed_geom = None
    watershed_centroid = None
    watershed_envelope = None

    target_pixel_size = (300, -300)

    warped_dem_raster_path = os.path.join(working_dir, f'{job_id}_dem.tif')
    warped_habitat_raster_path = os.path.join(working_dir, f'{job_id}_hab.tif')
    align_task = task_graph.add_task(
        func=pygeoprocessing.align_and_resize_raster_stack,
        args=([dem_path,
               hab_path], [warped_dem_raster_path, warped_habitat_raster_path],
              ['near', 'mode'], target_pixel_size, target_watershed_bb),
        kwargs={
            'target_projection_wkt': epsg_sr.ExportToWkt(),
            'vector_mask_options': {
                'mask_vector_path': watershed_vector_path,
                'mask_vector_where_filter': f'"FID"={watershed_fid}'
            },
        },
        target_path_list=[warped_dem_raster_path, warped_habitat_raster_path],
        task_name=(
            f'align and clip and warp dem/hab to {warped_dem_raster_path} '
            f'{warped_habitat_raster_path}'))

    filled_dem_raster_path = os.path.join(working_dir,
                                          f'{job_id}_filled_dem.tif')
    fill_pits_task = task_graph.add_task(
        func=pygeoprocessing.routing.fill_pits,
        args=((warped_dem_raster_path, 1), filled_dem_raster_path),
        kwargs={
            'working_dir': working_dir,
            'max_pixel_fill_count': 1000000
        },
        dependent_task_list=[align_task],
        target_path_list=[filled_dem_raster_path],
        task_name=f'fill dem pits to {filled_dem_raster_path}')

    flow_dir_mfd_raster_path = os.path.join(working_dir,
                                            f'{job_id}_flow_dir_mfd.tif')
    flow_dir_mfd_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_dir_mfd,
        args=((filled_dem_raster_path, 1), flow_dir_mfd_raster_path),
        kwargs={'working_dir': working_dir},
        dependent_task_list=[fill_pits_task],
        target_path_list=[flow_dir_mfd_raster_path],
        task_name=f'calc flow dir for {flow_dir_mfd_raster_path}')

    outlet_vector_path = os.path.join(working_dir,
                                      f'{job_id}_outlet_vector.gpkg')
    detect_outlets_task = task_graph.add_task(
        func=pygeoprocessing.routing.detect_outlets,
        args=((flow_dir_mfd_raster_path, 1), 'mfd', outlet_vector_path),
        dependent_task_list=[flow_dir_mfd_task],
        target_path_list=[outlet_vector_path],
        task_name=f'detect outlets {outlet_vector_path}')

    outlet_raster_path = os.path.join(working_dir,
                                      f'{job_id}_outlet_raster.tif')
    create_outlet_raster_task = task_graph.add_task(
        func=_create_outlet_raster,
        args=(outlet_vector_path, flow_dir_mfd_raster_path,
              outlet_raster_path),
        dependent_task_list=[detect_outlets_task],
        target_path_list=[outlet_raster_path],
        task_name=f'create outlet raster {outlet_raster_path}')

    flow_accum_mfd_raster_path = os.path.join(working_dir,
                                              f'{job_id}_flow_accum.tif')
    flow_accum_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=((flow_dir_mfd_raster_path, 1), flow_accum_mfd_raster_path),
        dependent_task_list=[flow_dir_mfd_task],
        target_path_list=[flow_accum_mfd_raster_path],
        task_name=f'calc upstream flow area for {flow_accum_mfd_raster_path}')

    hab_upstream_area_raster_path = os.path.join(working_dir,
                                                 f'{job_id}_hab_upstream.tif')
    hab_upstream_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=((flow_dir_mfd_raster_path, 1), hab_upstream_area_raster_path),
        kwargs={'weight_raster_path_band': (warped_habitat_raster_path, 1)},
        dependent_task_list=[flow_dir_mfd_task],
        target_path_list=[hab_upstream_area_raster_path],
        task_name=(
            f'calc upstream hab area for {hab_upstream_area_raster_path}'))

    for (pop_raster_path, target_beneficiaries_path,
         target_normalized_beneficiaries_path,
         target_hab_normalized_beneficiaries_path, stitch_queue_tuple) in zip(
             pop_raster_path_list, target_beneficiaries_path_list,
             target_normalized_beneficiaries_path_list,
             target_hab_normalized_beneficiaries_path_list,
             target_stitch_work_queue_list):

        LOGGER.debug(f'processing {target_beneficiaries_path} and normalized')

        aligned_pop_raster_path = os.path.join(
            working_dir, f'''{job_id}_{os.path.basename(
                os.path.splitext(pop_raster_path)[0])}.tif''')

        pop_warp_task = task_graph.add_task(
            func=_warp_and_wgs84_area_scale,
            args=(pop_raster_path, warped_dem_raster_path,
                  aligned_pop_raster_path, 'near', lat_lng_watershed_bb,
                  watershed_vector_path, watershed_fid, working_dir),
            dependent_task_list=[align_task],
            target_path_list=[aligned_pop_raster_path],
            task_name=f'align {aligned_pop_raster_path}')

        downstream_bene_task = task_graph.add_task(
            func=pygeoprocessing.routing.distance_to_channel_mfd,
            args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1),
                  target_beneficiaries_path),
            kwargs={'weight_raster_path_band': (aligned_pop_raster_path, 1)},
            dependent_task_list=[
                pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task
            ],
            target_path_list=[target_beneficiaries_path],
            task_name=('calc downstream beneficiaries for '
                       f'{target_beneficiaries_path}'))

        # divide aligned_pop_raster_path by flow accum to get normalized then
        # route it downstream
        pop_normal_by_upstream_raster_path = '%s_norm%s' % os.path.splitext(
            aligned_pop_raster_path)

        normalize_by_dist_task = task_graph.add_task(
            func=normalize,
            args=(aligned_pop_raster_path, flow_accum_mfd_raster_path,
                  pop_normal_by_upstream_raster_path),
            dependent_task_list=[flow_accum_task, align_task],
            target_path_list=[pop_normal_by_upstream_raster_path],
            task_name=(f'normalized beneficiaries for '
                       f'{pop_normal_by_upstream_raster_path}'))

        prescaled_normalized_beneficiaries_path = (
            '%s_prescaled%s' %
            os.path.splitext(target_normalized_beneficiaries_path))
        downstream_norm_bene_task = task_graph.add_task(
            func=pygeoprocessing.routing.distance_to_channel_mfd,
            args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1),
                  prescaled_normalized_beneficiaries_path),
            kwargs={
                'weight_raster_path_band':
                (pop_normal_by_upstream_raster_path, 1)
            },
            dependent_task_list=[
                pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task,
                normalize_by_dist_task
            ],
            target_path_list=[prescaled_normalized_beneficiaries_path],
            task_name=('calc downstream normalized beneficiaries for '
                       f'{prescaled_normalized_beneficiaries_path}'))

        task_graph.add_task(
            func=rescale_by_base,
            args=(aligned_pop_raster_path,
                  prescaled_normalized_beneficiaries_path,
                  target_normalized_beneficiaries_path),
            target_path_list=[target_normalized_beneficiaries_path],
            dependent_task_list=[downstream_norm_bene_task],
            task_name=f'rescale {target_normalized_beneficiaries_path}')

        # divide aligned_pop_raster_path by hab accum to get normalized by
        # hab then route it downstream
        pop_hab_normal_by_upstream_raster_path = (
            '%s_hab_norm%s' % os.path.splitext(aligned_pop_raster_path))

        normalize_by_dist_task = task_graph.add_task(
            func=normalize,
            args=(aligned_pop_raster_path, hab_upstream_area_raster_path,
                  pop_hab_normal_by_upstream_raster_path),
            dependent_task_list=[hab_upstream_task, align_task],
            target_path_list=[pop_hab_normal_by_upstream_raster_path],
            task_name=(f'normalized beneficiaries for '
                       f'{pop_hab_normal_by_upstream_raster_path}'))
        hab_pre_mask_normalized_beneficiaries_path = (
            '%s_pre_mask%s' %
            os.path.splitext(target_hab_normalized_beneficiaries_path))
        downstream_norm_hab_bene_task = task_graph.add_task(
            func=pygeoprocessing.routing.distance_to_channel_mfd,
            args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1),
                  hab_pre_mask_normalized_beneficiaries_path),
            kwargs={
                'weight_raster_path_band':
                (pop_hab_normal_by_upstream_raster_path, 1)
            },
            dependent_task_list=[
                pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task,
                normalize_by_dist_task
            ],
            target_path_list=[hab_pre_mask_normalized_beneficiaries_path],
            task_name=('calc downstream normalized beneficiaries for '
                       f'{hab_pre_mask_normalized_beneficiaries_path}'))
        # mask this result to the target
        prescaled_hab_normalized_beneficiaries_path = (
            '%s_prescaled%s' %
            os.path.splitext(target_hab_normalized_beneficiaries_path))
        mask_downstream_norm_bene_task = task_graph.add_task(
            func=_mask_raster,
            args=(hab_pre_mask_normalized_beneficiaries_path,
                  warped_habitat_raster_path,
                  prescaled_hab_normalized_beneficiaries_path),
            dependent_task_list=[downstream_norm_hab_bene_task, align_task],
            target_path_list=[prescaled_hab_normalized_beneficiaries_path],
            task_name=f'mask {prescaled_hab_normalized_beneficiaries_path}')

        task_graph.add_task(
            func=rescale_by_base,
            args=(aligned_pop_raster_path,
                  prescaled_hab_normalized_beneficiaries_path,
                  target_hab_normalized_beneficiaries_path),
            target_path_list=[target_hab_normalized_beneficiaries_path],
            dependent_task_list=[mask_downstream_norm_bene_task],
            task_name=f'rescale {target_hab_normalized_beneficiaries_path}')

        task_graph.join()
        stitch_queue_tuple[0].put(
            (target_beneficiaries_path, working_dir, job_id))
        stitch_queue_tuple[1].put(
            (target_normalized_beneficiaries_path, working_dir, job_id))
        stitch_queue_tuple[2].put(
            (target_hab_normalized_beneficiaries_path, working_dir, job_id))

    task_graph.close()
    task_graph.join()
    task_graph = None
def main():
    """Entry point."""
    parser = argparse.ArgumentParser(
        description=('Search for matching rasters to stitch into one big '
                     'raster.'))
    parser.add_argument('--target_projection_epsg',
                        required=True,
                        help='EPSG code of target projection')
    parser.add_argument(
        '--target_cell_size',
        required=True,
        help=('A single float indicating the desired square pixel size of '
              'the stitched raster.'))
    parser.add_argument(
        '--resample_method',
        default='near',
        help=('One of near|bilinear|cubic|cubicspline|lanczos|average|mode|max'
              'min|med|q1|q3'))
    parser.add_argument('--target_raster_path',
                        required=True,
                        help='Path to target raster.')
    parser.add_argument('--raster_list',
                        nargs='+',
                        help='List of rasters or wildcards to stitch.')
    parser.add_argument(
        '--raster_pattern',
        nargs=2,
        help=('Recursive directory search for raster pattern such that '
              'the first argument is the directory to search and the second '
              'is the filename pattern.'))
    parser.add_argument(
        '--overlap_algorithm',
        default='replace',
        help=('can be one of etch|replace|add, default is replace'))
    parser.add_argument(
        '--_n_limit',
        type=int,
        help=('limit the number of stitches to this number, default is to '
              'stitch all found rasters'))

    parser.add_argument(
        '--area_weight_m2_to_wgs84',
        action='store_true',
        help=('if true, rescales values to be proportional to area change '
              'for wgs84 coordinates'))

    args = parser.parse_args()

    if not args.raster_list != args.raster_pattern:
        raise ValueError(
            'only one of --raster_list or --raster_pattern must be '
            'specified: \n'
            f'args.raster_list={args.raster_list}\n'
            f'args.raster_pattern={args.raster_pattern}\n')

    LOGGER.info('searching for matching files')
    if args.raster_list:
        raster_path_list = list(raster_path for raster_glob in args.raster_list
                                for raster_path in glob.glob(raster_glob))
    else:
        base_dir = args.raster_pattern[0]
        file_pattern = args.raster_pattern[1]
        LOGGER.info(f'searching {base_dir} for {file_pattern}')

        raster_path_list = list(
            itertools.islice((raster_path for walk_info in os.walk(base_dir)
                              for raster_path in glob.glob(
                                  os.path.join(walk_info[0], file_pattern))),
                             0, args._n_limit))
        LOGGER.info(f'found {len(raster_path_list)} files that matched')

    target_projection = osr.SpatialReference()
    target_projection.ImportFromEPSG(int(args.target_projection_epsg))

    if len(raster_path_list) == 0:
        raise RuntimeError(
            f'no rasters were found with the pattern "{file_pattern}"')

    LOGGER.info('calculating target bounding box')
    target_bounding_box_list = []
    raster_path_set = set()
    for raster_path in raster_path_list:
        if raster_path in raster_path_set:
            LOGGER.warning(f'{raster_path} already scheduled')
            continue
        raster_path_set.add(raster_path)
        raster_info = pygeoprocessing.get_raster_info(raster_path)
        bounding_box = raster_info['bounding_box']
        target_bounding_box = pygeoprocessing.transform_bounding_box(
            bounding_box, raster_info['projection_wkt'],
            target_projection.ExportToWkt())
        target_bounding_box_list.append(target_bounding_box)

    target_bounding_box = pygeoprocessing.merge_bounding_box_list(
        target_bounding_box_list, 'union')

    gtiff_driver = gdal.GetDriverByName('GTiff')

    n_cols = int(
        math.ceil((target_bounding_box[2] - target_bounding_box[0]) /
                  float(args.target_cell_size)))
    n_rows = int(
        math.ceil((target_bounding_box[3] - target_bounding_box[1]) /
                  float(args.target_cell_size)))

    geotransform = (target_bounding_box[0], float(args.target_cell_size), 0.0,
                    target_bounding_box[3], 0.0, -float(args.target_cell_size))

    target_raster = gtiff_driver.Create(
        os.path.join('.', args.target_raster_path),
        n_cols,
        n_rows,
        1,
        raster_info['datatype'],
        options=('TILED=YES', 'BIGTIFF=YES', 'BLOCKXSIZE=256',
                 'BLOCKYSIZE=256', 'COMPRESS=LZW', 'SPARSE_OK=TRUE'))
    target_raster.SetProjection(target_projection.ExportToWkt())
    target_raster.SetGeoTransform(geotransform)
    target_band = target_raster.GetRasterBand(1)
    target_band.SetNoDataValue(raster_info['nodata'][0])
    target_band = None
    target_raster = None

    LOGGER.info('calling stitch_rasters')
    pygeoprocessing.stitch_rasters(
        [(path, 1) for path in raster_path_list],
        [args.resample_method] * len(raster_path_list),
        (args.target_raster_path, 1),
        overlap_algorithm=args.overlap_algorithm,
        area_weight_m2_to_wgs84=args.area_weight_m2_to_wgs84)

    LOGGER.debug('build overviews...')
    ecoshard.build_overviews(args.target_raster_path)
    LOGGER.info('all done')