Exemple #1
0
def check_spatial_overlap(spatial_filepaths_list,
                          different_projections_ok=False):
    """Check that the given spatial files spatially overlap.

    Args:
        spatial_filepaths_list (list): A list of files that can be opened with
            GDAL.  Must be on the local filesystem.
        different_projections_ok=False (bool): Whether it's OK for the input
            spatial files to have different projections.  If ``True``, all
            projections will be converted to WGS84 before overlap is checked.

    Returns:
        A string error message if an error is found.  ``None`` otherwise.

    """
    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)
    wgs84_wkt = wgs84_srs.ExportToWkt()

    bounding_boxes = []
    checked_file_list = []
    for filepath in spatial_filepaths_list:
        try:
            info = pygeoprocessing.get_raster_info(filepath)
        except ValueError:
            info = pygeoprocessing.get_vector_info(filepath)

        if info['projection_wkt'] is None:
            return f'Spatial file {filepath} has no projection'

        if different_projections_ok:
            bounding_box = pygeoprocessing.transform_bounding_box(
                info['bounding_box'], info['projection_wkt'], wgs84_wkt)
        else:
            bounding_box = info['bounding_box']

        if all([numpy.isinf(coord) for coord in bounding_box]):
            LOGGER.warning('Skipping infinite bounding box for file %s',
                           filepath)
            continue

        bounding_boxes.append(bounding_box)
        checked_file_list.append(filepath)

    try:
        pygeoprocessing.merge_bounding_box_list(bounding_boxes, 'intersection')
    except ValueError as error:
        LOGGER.debug(error)
        formatted_lists = ' | '.join([
            a + ': ' + str(b)
            for a, b in zip(checked_file_list, bounding_boxes)
        ])
        message = f"Bounding boxes do not intersect: {formatted_lists}"
        return message
    return None
Exemple #2
0
    def test_clip_vector_by_vector_polygons(self):
        """WaveEnergy: testing clipping polygons from polygons."""
        from natcap.invest import wave_energy

        aoi_path = os.path.join(REGRESSION_DATA, 'aoi_proj_to_extract.shp')
        extract_path = os.path.join(SAMPLE_DATA, 'WaveData',
                                    'Global_extract.shp')

        result_path = os.path.join(self.workspace_dir, 'aoi_proj_clipped.shp')
        target_projection = pygeoprocessing.get_vector_info(
            extract_path)['projection_wkt']
        wave_energy._clip_vector_by_vector(aoi_path, extract_path, result_path,
                                           target_projection,
                                           self.workspace_dir)

        expected_path = os.path.join(REGRESSION_DATA, 'aoi_proj_clipped.shp')
        WaveEnergyRegressionTests._assert_point_vectors_equal(
            result_path, expected_path)
Exemple #3
0
def _clip_and_mask_dem(dem_path, aoi_path, target_path, working_dir):
    """Clip and mask the DEM to the AOI.

    Args:
        dem_path (string): The path to the DEM to use. Must have the same
            projection as the AOI.
        aoi_path (string): The path to the AOI to use. Must have the same
            projection as the DEM.
        target_path (string): The path on disk to where the clipped and masked
            raster will be saved. If a file exists at this location it will be
            overwritten. The raster will have a bounding box matching the
            intersection of the AOI and the DEM's bounding box and a spatial
            reference matching the AOI and the DEM.
        working_dir (string): A path to a directory on disk. A new temporary
            directory will be created within this directory for the storage of
            several working files. This temporary directory will be removed at
            the end of this function.

    Returns:
        ``None``

    """
    temp_dir = tempfile.mkdtemp(dir=working_dir,
                                prefix='clip_dem')

    LOGGER.info('Clipping the DEM to its intersection with the AOI.')
    aoi_vector_info = pygeoprocessing.get_vector_info(aoi_path)
    dem_raster_info = pygeoprocessing.get_raster_info(dem_path)
    mean_pixel_size = (
        abs(dem_raster_info['pixel_size'][0]) +
        abs(dem_raster_info['pixel_size'][1])) / 2.0
    pixel_size = (mean_pixel_size, -mean_pixel_size)

    intersection_bbox = [op(aoi_dim, dem_dim) for (aoi_dim, dem_dim, op) in
                         zip(aoi_vector_info['bounding_box'],
                             dem_raster_info['bounding_box'],
                             [max, max, min, min])]

    clipped_dem_path = os.path.join(temp_dir, 'clipped_dem.tif')
    pygeoprocessing.warp_raster(
        dem_path, pixel_size, clipped_dem_path, 'near',
        target_bb=intersection_bbox)

    LOGGER.info('Masking DEM pixels outside the AOI to nodata')
    aoi_mask_raster_path = os.path.join(temp_dir, 'aoi_mask.tif')
    pygeoprocessing.new_raster_from_base(
        clipped_dem_path, aoi_mask_raster_path, gdal.GDT_Byte,
        [_BYTE_NODATA], [0],
        raster_driver_creation_tuple=BYTE_GTIFF_CREATION_OPTIONS)
    pygeoprocessing.rasterize(aoi_path, aoi_mask_raster_path, [1], None)

    dem_nodata = dem_raster_info['nodata'][0]

    def _mask_op(dem, aoi_mask):
        valid_pixels = (~utils.array_equals_nodata(dem, dem_nodata) &
                        (aoi_mask == 1))
        masked_dem = numpy.empty(dem.shape)
        masked_dem[:] = dem_nodata
        masked_dem[valid_pixels] = dem[valid_pixels]
        return masked_dem

    pygeoprocessing.raster_calculator(
        [(clipped_dem_path, 1), (aoi_mask_raster_path, 1)],
        _mask_op, target_path, gdal.GDT_Float32, dem_nodata,
        raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS)

    shutil.rmtree(temp_dir, ignore_errors=True)
Exemple #4
0
def _calculate_args_bounding_box(args, args_spec):
    """Calculate the bounding boxes of any GIS types found in `args_dict`.

    Args:
        args (dict): a string key and any value pair dictionary.
        args_spec (dict): the model ARGS_SPEC describing args

    Returns:
        bb_intersection, bb_union tuple that's either the lat/lng bounding
            intersection and union bounding boxes of the gis types referred to
            in args_dict.  If no GIS types are present, this is a (None, None)
            tuple.
    """
    def _merge_bounding_boxes(bb1, bb2, mode):
        """Merge two bounding boxes through union or intersection.

        Args:
            bb1 (list of float): bounding box of the form
                [minx, maxy, maxx, miny] or None
            bb2 (list of float): bounding box of the form
                [minx, maxy, maxx, miny] or None
            mode (string): either "union" or "intersection" indicating the
                how to combine the two bounding boxes.

        Returns:
            either the intersection or union of bb1 and bb2 depending
            on mode.  If either bb1 or bb2 is None, the other is returned.
            If both are None, None is returned.
        """
        if bb1 is None:
            return bb2
        if bb2 is None:
            return bb1

        if mode == "union":
            comparison_ops = [min, max, max, min]
        if mode == "intersection":
            comparison_ops = [max, min, min, max]

        bb_out = [op(x, y) for op, x, y in zip(comparison_ops, bb1, bb2)]
        return bb_out

    bb_intersection = None
    bb_union = None
    for key, value in args.items():
        # Using gdal.OpenEx to check if an input is spatial caused the
        # model to hang sometimes (possible race condition), so only
        # get the bounding box of inputs that are known to be spatial.
        # Also eliminate any string paths that are empty to prevent an
        # exception. By the time we've made it to this function, all paths
        # should already have been validated so the path is either valid or
        # blank.
        spatial_info = None
        if args_spec['args'][key]['type'] == 'raster' and value.strip() != '':
            spatial_info = pygeoprocessing.get_raster_info(value)
        elif (args_spec['args'][key]['type'] == 'vector'
              and value.strip() != ''):
            spatial_info = pygeoprocessing.get_vector_info(value)

        if spatial_info:
            local_bb = spatial_info['bounding_box']
            projection_wkt = spatial_info['projection_wkt']
            spatial_ref = osr.SpatialReference()
            spatial_ref.ImportFromWkt(projection_wkt)

            try:
                # means there's a GIS type with a well defined bounding box
                # create transform, and reproject local bounding box to
                # lat/lng
                lat_lng_ref = osr.SpatialReference()
                lat_lng_ref.ImportFromEPSG(4326)  # EPSG 4326 is lat/lng
                to_lat_trans = utils.create_coordinate_transformer(
                    spatial_ref, lat_lng_ref)
                for point_index in [0, 2]:
                    local_bb[point_index], local_bb[point_index + 1], _ = (
                        to_lat_trans.TransformPoint(local_bb[point_index],
                                                    local_bb[point_index + 1]))

                bb_intersection = _merge_bounding_boxes(
                    local_bb, bb_intersection, 'intersection')
                bb_union = _merge_bounding_boxes(local_bb, bb_union, 'union')
            except Exception as transform_error:
                # All kinds of exceptions from bad transforms or CSV files
                # or dbf files could get us to this point, just don't
                # bother with the local_bb at all
                LOGGER.exception('Error when transforming coordinates: %s',
                                 transform_error)
        else:
            LOGGER.debug(f'Arg {key} of type {args_spec["args"][key]["type"]} '
                         'excluded from bounding box calculation')

    return bb_intersection, bb_union
Exemple #5
0
def _add_zonal_stats(runoff_retention_pickle_path,
                     runoff_retention_ret_vol_pickle_path,
                     flood_vol_pickle_path, base_watershed_result_vector_path,
                     target_watershed_result_vector_path):
    """Add watershed scale values of the given base_raster.

    Parameters:
        runoff_retention_pickle_path (str): path to runoff retention
            zonal stats pickle file.
        runoff_retention_ret_vol_pickle_path (str): path to runoff
            retention volume zonal stats pickle file.
        flood_vol_pickle_path (str): path to flood volume zonal stats
            pickle file.
        base_watershed_result_vector_path (str): path to existing vector
            to copy for the target vector.
        target_watershed_result_vector_path (str): path to target vector that
            will contain the additional fields:
                * rnf_rt_idx
                * rnf_rt_m3
                * serv_bld

    Return:
        None.

    """
    LOGGER.info("Processing zonal stats for %s",
                target_watershed_result_vector_path)

    with open(runoff_retention_pickle_path, 'rb') as runoff_retention_file:
        runoff_retention_stats = pickle.load(runoff_retention_file)
    with open(runoff_retention_ret_vol_pickle_path,
              'rb') as (runoff_retention_ret_vol_file):
        runoff_retention_vol_stats = pickle.load(runoff_retention_ret_vol_file)
    with open(flood_vol_pickle_path, 'rb') as flood_vol_pickle_file:
        flood_vol_stats = pickle.load(flood_vol_pickle_file)

    base_sr_wkt = pygeoprocessing.get_vector_info(
        base_watershed_result_vector_path)['projection']
    base_watershed_vector = gdal.OpenEx(base_watershed_result_vector_path,
                                        gdal.OF_VECTOR)
    base_watershed_layer = base_watershed_vector.GetLayer()
    base_geom_type = base_watershed_layer.GetGeomType()
    base_sr = osr.SpatialReference()
    base_sr.ImportFromWkt(base_sr_wkt)

    if os.path.exists(target_watershed_result_vector_path):
        LOGGER.warn("deleting existing target result at %s",
                    target_watershed_result_vector_path)
        os.remove(target_watershed_result_vector_path)
    esri_driver = gdal.GetDriverByName('ESRI Shapefile')
    target_watershed_vector = esri_driver.Create(
        target_watershed_result_vector_path, 0, 0, 0, gdal.GDT_Unknown)
    layer_name = str(
        os.path.splitext(
            os.path.basename(target_watershed_result_vector_path))[0])
    LOGGER.debug("creating layer %s", layer_name)
    target_watershed_layer = target_watershed_vector.CreateLayer(
        str(layer_name), base_sr, base_geom_type)

    for field_name in ['aff_bld', 'rnf_rt_idx', 'rnf_rt_m3', 'serv_bld']:
        field_def = ogr.FieldDefn(field_name, ogr.OFTReal)
        field_def.SetWidth(24)
        field_def.SetPrecision(11)
        target_watershed_layer.CreateField(field_def)

    target_layer_defn = target_watershed_layer.GetLayerDefn()

    for base_feature in base_watershed_layer:
        feature_id = base_feature.GetFID()
        target_feature = ogr.Feature(target_layer_defn)
        base_geom_ref = base_feature.GetGeometryRef()
        target_feature.SetGeometry(base_geom_ref.Clone())
        base_geom_ref = None

        if feature_id in runoff_retention_stats:
            pixel_count = runoff_retention_stats[feature_id]['count']
            if pixel_count > 0:
                mean_value = (runoff_retention_stats[feature_id]['sum'] /
                              float(pixel_count))
                target_feature.SetField('rnf_rt_idx', float(mean_value))

        if feature_id in runoff_retention_vol_stats:
            target_feature.SetField(
                'rnf_rt_m3',
                float(runoff_retention_vol_stats[feature_id]['sum']))

        if feature_id in flood_vol_stats:
            pixel_count = flood_vol_stats[feature_id]['count']
            if pixel_count > 0:
                affected_build = base_feature.GetField('aff_bld')
                target_feature.SetField('aff_bld', affected_build)
                target_feature.SetField(
                    'serv_bld',
                    affected_build *
                    float(runoff_retention_vol_stats[feature_id]['sum']))

        target_watershed_layer.CreateFeature(target_feature)
    target_watershed_layer.SyncToDisk()
    target_watershed_layer = None
    target_watershed_vector = None
def main():
    """Entry point."""
    #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]:
    #    try:
    #        os.makedirs(dir_path)
    #    except OSError:
    #        pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0)
    world_borders_path = os.path.join(
        WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL))
    download_wb_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WORLD_BORDERS_URL, world_borders_path),
        target_path_list=[world_borders_path],
        task_name='download world borders')
    raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL))
    download_raster_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(RASTER_URL, raster_path),
        target_path_list=[raster_path],
        task_name='download raster')

    #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR)
    #world_borders_layer = world_borders_vector.GetLayer()
    #wgs84_srs = osr.SpatialReference()
    #wgs84_srs.ImportFromEPSG(4326)

    # mask out everything that's not a country
    masked_raster_path = os.path.join(
        WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext(
            os.path.basename(raster_path)))
    # we need to define this because otherwise no nodata value is defined
    mask_nodata = -1
    mask_task = task_graph.add_task(
        func=pygeoprocessing.mask_raster,
        args=(
            (raster_path, 1), world_borders_path, masked_raster_path),
        kwargs={
            'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS,
            'target_mask_value': mask_nodata,
        },
        target_path_list=[masked_raster_path],
        dependent_task_list=[download_wb_task, download_raster_task],
        task_name='mask raster')

    download_raster_task.join()
    raster_info = pygeoprocessing.get_raster_info(raster_path)
    country_name = "Global"

    country_threshold_table_path = os.path.join(
        WORKSPACE_DIR, 'country_threshold.csv')
    country_threshold_table_file = open(country_threshold_table_path, 'w')
    country_threshold_table_file.write('country,percentile at 90% max,pixel count\n')

    target_percentile_pickle_path = os.path.join(
        WORKSPACE_DIR, '%s.pkl' % (
            os.path.basename(os.path.splitext(raster_path)[0])))
    calculate_percentiles_task = task_graph.add_task(
        func=calculate_percentiles,
        args=(
            raster_path, PERCENTILE_LIST, target_percentile_pickle_path),
        target_path_list=[target_percentile_pickle_path],
        dependent_task_list=[mask_task],
        task_name='calculate percentiles')
    calculate_percentiles_task.join()
    with open(target_percentile_pickle_path, 'rb') as pickle_file:
        percentile_values = pickle.load(pickle_file)
    LOGGER.debug(
        "len percentile_values: %d len PERCENTILE_LIST: %d",
        len(percentile_values), len(PERCENTILE_LIST))

    cdf_array = [0.0] * len(percentile_values)

    raster_info = pygeoprocessing.get_raster_info(raster_path)
    nodata = raster_info['nodata'][0]
    valid_pixel_count = 0
    total_pixel_count = 0
    total_pixels = (
        raster_info['raster_size'][0] * raster_info['raster_size'][1])
    for _, data_block in pygeoprocessing.iterblocks(
            (raster_path, 1), largest_block=2**28):
        nodata_mask = ~numpy.isclose(data_block, nodata)
        nonzero_count = numpy.count_nonzero(nodata_mask)
        if nonzero_count == 0:
            continue
        valid_pixel_count += numpy.count_nonzero(nodata_mask)
        for index, percentile_value in enumerate(percentile_values):
            cdf_array[index] += numpy.sum((data_block[
                nodata_mask & (data_block >= percentile_value)]).astype(
                    numpy.float32))
        total_pixel_count += data_block.size
        LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels)
        LOGGER.debug('current cdf array: %s', cdf_array)
        # threshold is at 90% says Becky
    threshold_limit = 0.9 * cdf_array[2]

    LOGGER.debug(cdf_array)
    fig, ax = matplotlib.pyplot.subplots()
    ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
    f = scipy.interpolate.interp1d(
        cdf_array, list(reversed(PERCENTILE_LIST)))
    try:
        cdf_threshold = f(threshold_limit)
    except ValueError:
        LOGGER.exception(
            "error when passing threshold_limit: %s\ncdf_array: %s" % (
                threshold_limit, cdf_array))
        cdf_threshold = cdf_array[2]

    ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
    ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

    ax.grid(True, linestyle='-.')
    ax.set_title(
        '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count))
    ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path))
    ax.set_ylabel('100-percentile')
    ax.tick_params(labelcolor='r', labelsize='medium', width=3)
    matplotlib.pyplot.autoscale(enable=True, tight=True)
    matplotlib.pyplot.savefig(
        os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
    country_threshold_table_file.write(
        '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count))
    country_threshold_table_file.flush()
    country_threshold_table_file.close()

    return

    for world_border_feature in world_borders_layer:
        country_name = world_border_feature.GetField('nev_name')
        country_name= country_name.replace('.','')
        LOGGER.debug(country_name)
        country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name)
        try:
            os.makedirs(country_workspace)
        except OSError:
            pass

        country_vector = os.path.join(
            country_workspace, '%s.gpkg' % country_name)
        country_vector_complete_token = os.path.join(
            country_workspace, '%s.COMPLETE' % country_name)
        extract_feature(
            world_borders_path, world_border_feature.GetFID(),
            wgs84_srs.ExportToWkt(), country_vector,
            country_vector_complete_token)

        country_raster_path = os.path.join(country_workspace, '%s_%s' % (
            country_name, os.path.basename(RASTER_PATH)))

        country_vector_info = pygeoprocessing.get_vector_info(country_vector)
        pygeoprocessing.warp_raster(
            RASTER_PATH, raster_info['pixel_size'], country_raster_path,
            'near', target_bb=country_vector_info['bounding_box'],
            vector_mask_options={'mask_vector_path': country_vector},
            working_dir=country_workspace)

        percentile_values = pygeoprocessing.raster_band_percentile(
            (country_raster_path, 1), country_workspace, PERCENTILE_LIST)
        if len(percentile_values) != len(PERCENTILE_LIST):
            continue
        LOGGER.debug(
            "len percentile_values: %d len PERCENTILE_LIST: %d",
            len(percentile_values), len(PERCENTILE_LIST))

        cdf_array = [0.0] * len(percentile_values)

        nodata = pygeoprocessing.get_raster_info(
            country_raster_path)['nodata'][0]
        valid_pixel_count = 0
        for _, data_block in pygeoprocessing.iterblocks(
                (country_raster_path, 1)):
            nodata_mask = ~numpy.isclose(data_block, nodata)
            valid_pixel_count += numpy.count_nonzero(nodata_mask)
            for index, percentile_value in enumerate(percentile_values):
                cdf_array[index] += numpy.sum(data_block[
                    nodata_mask & (data_block >= percentile_value)])

        # threshold is at 90% says Becky
        threshold_limit = 0.9 * cdf_array[2]

        LOGGER.debug(cdf_array)
        fig, ax = matplotlib.pyplot.subplots()
        ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
        f = scipy.interpolate.interp1d(
            cdf_array, list(reversed(PERCENTILE_LIST)))
        try:
            cdf_threshold = f(threshold_limit)
        except ValueError:
            LOGGER.exception(
                "error when passing threshold_limit: %s\ncdf_array: %s" % (
                    threshold_limit, cdf_array))
            cdf_threshold = cdf_array[2]

        ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
        ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

        ax.grid(True, linestyle='-.')
        ax.set_title(
            '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count))
        ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH))
        ax.set_ylabel('100-percentile')
        ax.tick_params(labelcolor='r', labelsize='medium', width=3)
        matplotlib.pyplot.autoscale(enable=True, tight=True)
        matplotlib.pyplot.savefig(
            os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
        country_threshold_table_file.write(
            '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count))
        country_threshold_table_file.flush()
    country_threshold_table_file.close()
Exemple #7
0
    def _merge_local_bounding_boxes(arg, bb_intersection=None, bb_union=None):
        """Traverse nested dictionary to merge bounding boxes of GIS types.

        Args:
            arg (dict): contains string keys and pairs that might be files to
                gis types.  They can be any other type, including dictionaries.
            bb_intersection (list or None): if list, has the form
                [xmin, ymin, xmax, ymax], where coordinates are in lng, lat
            bb_union (list or None): if list, has the form
                [xmin, ymin, xmax, ymax], where coordinates are in lng, lat

        Returns:
            (intersection, union) bounding box tuples of all filepaths to GIS
            data types found in the dictionary and bb_intersection and bb_union
            inputs.  None, None if no arguments were GIS data types and input
            bounding boxes are None.
        """
        def _is_spatial(arg):
            if isinstance(arg, str) and os.path.exists(arg):
                with utils.capture_gdal_logging():
                    dataset = gdal.OpenEx(arg)
                    if dataset is not None:
                        # OGR opens CSV files.  For now, we should not
                        # consider these to be vectors.
                        driver_name = dataset.GetDriver().ShortName
                        if driver_name == 'CSV':
                            return False
                        return True
            return False

        if isinstance(arg, dict):
            # if dict, grab the bb's for all the members in it
            for value in arg.values():
                bb_intersection, bb_union = _merge_local_bounding_boxes(
                    value, bb_intersection, bb_union)
        elif isinstance(arg, list):
            # if list, grab the bb's for all the members in it
            for value in arg:
                bb_intersection, bb_union = _merge_local_bounding_boxes(
                    value, bb_intersection, bb_union)
        else:
            # singular value, test if GIS type, if not, don't update bb's
            # this is an undefined bounding box that gets returned when ogr
            # opens a table only
            if _is_spatial(arg):
                with utils.capture_gdal_logging():
                    if gdal.OpenEx(arg, gdal.OF_RASTER) is not None:
                        spatial_info = pygeoprocessing.get_raster_info(arg)
                    else:
                        # If it isn't a raster, it should be a vector!
                        spatial_info = pygeoprocessing.get_vector_info(arg)

                local_bb = [0., 0., 0., 0.]
                local_bb = spatial_info['bounding_box']
                projection_wkt = spatial_info['projection']
                spatial_ref = osr.SpatialReference()
                spatial_ref.ImportFromWkt(projection_wkt)

                try:
                    # means there's a GIS type with a well defined bounding box
                    # create transform, and reproject local bounding box to
                    # lat/lng
                    lat_lng_ref = osr.SpatialReference()
                    lat_lng_ref.ImportFromEPSG(4326)  # EPSG 4326 is lat/lng
                    to_lat_trans = osr.CoordinateTransformation(
                        spatial_ref, lat_lng_ref)
                    for point_index in [0, 2]:
                        local_bb[point_index], local_bb[point_index + 1], _ = (
                            to_lat_trans.TransformPoint(
                                local_bb[point_index],
                                local_bb[point_index + 1]))

                    bb_intersection = _merge_bounding_boxes(
                        local_bb, bb_intersection, 'intersection')
                    bb_union = _merge_bounding_boxes(local_bb, bb_union,
                                                     'union')
                except Exception as transform_error:
                    # All kinds of exceptions from bad transforms or CSV files
                    # or dbf files could get us to this point, just don't
                    # bother with the local_bb at all
                    LOGGER.exception('Error when transforming coordinates: %s',
                                     transform_error)

        return bb_intersection, bb_union
Exemple #8
0
def execute(args):
    """Forest Carbon Edge Effect.

    InVEST Carbon Edge Model calculates the carbon due to edge effects in
    tropical forest pixels.

    Args:
        args['workspace_dir'] (string): a path to the directory that will write
            output and other temporary files during calculation. (required)
        args['results_suffix'] (string): a string to append to any output file
            name (optional)
        args['n_nearest_model_points'] (int): number of nearest neighbor model
            points to search for
        args['aoi_vector_path'] (string): (optional) if present, a path to a
            shapefile that will be used to aggregate carbon stock results at
            the end of the run.
        args['biophysical_table_path'] (string): a path to a CSV table that has
            at least the fields 'lucode' and 'c_above'. If
            ``args['compute_forest_edge_effects'] == True``, table must
            also contain an 'is_tropical_forest' field.  If
            ``args['pools_to_calculate'] == 'all'``, this table must contain
            the fields 'c_below', 'c_dead', and 'c_soil'.

                * ``lucode``: an integer that corresponds to landcover codes in
                  the raster ``args['lulc_raster_path']``
                * ``is_tropical_forest``: either 0 or 1 indicating whether the
                  landcover type is forest (1) or not (0).  If 1, the value
                  in ``c_above`` is ignored and instead calculated from the
                  edge regression model.
                * ``c_above``: floating point number indicating tons of above
                  ground carbon per hectare for that landcover type
                * ``{'c_below', 'c_dead', 'c_soil'}``: three other optional
                  carbon pools that will statically map landcover types to the
                  carbon densities in the table.

                Example::

                    lucode,is_tropical_forest,c_above,c_soil,c_dead,c_below
                    0,0,32.8,5,5.2,2.1
                    1,1,n/a,2.5,0.0,0.0
                    2,1,n/a,1.8,1.0,0.0
                    16,0,28.1,4.3,0.0,2.0

                Note the "n/a" in ``c_above`` are optional since that field
                is ignored when ``is_tropical_forest==1``.
        args['lulc_raster_path'] (string): path to a integer landcover code
            raster
        args['pools_to_calculate'] (string): if "all" then all carbon pools
            will be calculted.  If any other value only above ground carbon
            pools will be calculated and expect only a 'c_above' header in
            the biophysical table. If "all" model expects 'c_above',
            'c_below', 'c_dead', 'c_soil' in header of biophysical_table and
            will make a translated carbon map for each based off the landcover
            map.
        args['compute_forest_edge_effects'] (boolean): if True, requires
            biophysical table to have 'is_tropical_forest' forest field, and
            any landcover codes that have a 1 in this column calculate carbon
            stocks using the Chaplin-Kramer et. al method and ignore 'c_above'.
        args['tropical_forest_edge_carbon_model_vector_path'] (string):
            path to a shapefile that defines the regions for the local carbon
            edge models.  Has at least the fields 'method', 'theta1', 'theta2',
            'theta3'.  Where 'method' is an int between 1..3 describing the
            biomass regression model, and the thetas are floating point numbers
            that have different meanings depending on the 'method' parameter.
            Specifically,

                * method 1 (asymptotic model)::

                    biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km)

                * method 2 (logarithmic model)::

                    # NOTE: theta3 is ignored for this method
                    biomass = theta1 + theta2 * numpy.log(edge_dist_km)

                * method 3 (linear regression)::

                    biomass = theta1 + theta2 * edge_dist_km
        args['biomass_to_carbon_conversion_factor'] (string/float): Number by
            which to multiply forest biomass to convert to carbon in the edge
            effect calculation.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None

    """
    # just check that the AOI exists since it wouldn't crash until the end of
    # the whole model run if it didn't.
    if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
        aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR)
        if not aoi_vector:
            raise ValueError("Unable to open aoi at: %s" %
                             args['aoi_vector_path'])
        else:
            aoi_vector = None
            lulc_raster_bb = pygeoprocessing.get_raster_info(
                args['lulc_raster_path'])['bounding_box']
            aoi_vector_bb = pygeoprocessing.get_vector_info(
                args['aoi_vector_path'])['bounding_box']
            try:
                merged_bb = pygeoprocessing.merge_bounding_box_list(
                    [lulc_raster_bb, aoi_vector_bb], 'intersection')
                LOGGER.debug("merged bounding boxes: %s", merged_bb)
            except ValueError:
                raise ValueError(
                    "The landcover raster %s and AOI %s do not touch each "
                    "other." %
                    (args['lulc_raster_path'], args['aoi_vector_path']))

    output_dir = args['workspace_dir']
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    utils.make_directories([output_dir, intermediate_dir])
    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    # Initialize a TaskGraph
    taskgraph_working_dir = os.path.join(intermediate_dir,
                                         '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)

    # used to keep track of files generated by this module
    output_file_registry = {
        'c_above_map':
        os.path.join(intermediate_dir,
                     'c_above_carbon_stocks%s.tif' % file_suffix),
        'carbon_map':
        os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix),
        'aggregated_result_vector':
        os.path.join(output_dir,
                     'aggregated_carbon_stocks%s.shp' % file_suffix)
    }

    if args['pools_to_calculate'] == 'all':
        output_file_registry['c_below_map'] = os.path.join(
            intermediate_dir, 'c_below_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['c_soil_map'] = os.path.join(
            intermediate_dir, 'c_soil_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['c_dead_map'] = os.path.join(
            intermediate_dir, 'c_dead_carbon_stocks%s.tif' % file_suffix)

    if args['compute_forest_edge_effects']:
        output_file_registry['spatial_index_pickle'] = os.path.join(
            intermediate_dir, 'spatial_index%s.pickle' % file_suffix)
        output_file_registry['edge_distance'] = os.path.join(
            intermediate_dir, 'edge_distance%s.tif' % file_suffix)
        output_file_registry['tropical_forest_edge_carbon_map'] = os.path.join(
            intermediate_dir,
            'tropical_forest_edge_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['non_forest_mask'] = os.path.join(
            intermediate_dir, 'non_forest_mask%s.tif' % file_suffix)

    # Map non-forest landcover codes to carbon biomasses
    LOGGER.info('Calculating direct mapped carbon stocks')
    carbon_maps = []
    biophysical_table = utils.build_lookup_from_csv(
        args['biophysical_table_path'], 'lucode', to_lower=False)
    biophysical_keys = [
        x.lower() for x in list(biophysical_table.values())[0].keys()
    ]
    pool_list = [('c_above', True)]
    if args['pools_to_calculate'] == 'all':
        pool_list.extend([('c_below', False), ('c_soil', False),
                          ('c_dead', False)])
    for carbon_pool_type, ignore_tropical_type in pool_list:
        if carbon_pool_type in biophysical_keys:
            carbon_maps.append(output_file_registry[carbon_pool_type + '_map'])
            task_graph.add_task(
                func=_calculate_lulc_carbon_map,
                args=(args['lulc_raster_path'], args['biophysical_table_path'],
                      carbon_pool_type, ignore_tropical_type,
                      args['compute_forest_edge_effects'], carbon_maps[-1]),
                target_path_list=[carbon_maps[-1]],
                task_name='calculate_lulc_%s_map' % carbon_pool_type)

    if args['compute_forest_edge_effects']:
        # generate a map of pixel distance to forest edge from the landcover
        # map
        LOGGER.info('Calculating distance from forest edge')
        map_distance_task = task_graph.add_task(
            func=_map_distance_from_tropical_forest_edge,
            args=(args['lulc_raster_path'], args['biophysical_table_path'],
                  output_file_registry['edge_distance'],
                  output_file_registry['non_forest_mask']),
            target_path_list=[
                output_file_registry['edge_distance'],
                output_file_registry['non_forest_mask']
            ],
            task_name='map_distance_from_forest_edge')

        # Build spatial index for gridded global model for closest 3 points
        LOGGER.info('Building spatial index for forest edge models.')
        build_spatial_index_task = task_graph.add_task(
            func=_build_spatial_index,
            args=(args['lulc_raster_path'], intermediate_dir,
                  args['tropical_forest_edge_carbon_model_vector_path'],
                  output_file_registry['spatial_index_pickle']),
            target_path_list=[output_file_registry['spatial_index_pickle']],
            task_name='build_spatial_index')

        # calculate the carbon edge effect on forests
        LOGGER.info('Calculating forest edge carbon')
        task_graph.add_task(
            func=_calculate_tropical_forest_edge_carbon_map,
            args=(output_file_registry['edge_distance'],
                  output_file_registry['spatial_index_pickle'],
                  int(args['n_nearest_model_points']),
                  float(args['biomass_to_carbon_conversion_factor']),
                  output_file_registry['tropical_forest_edge_carbon_map']),
            target_path_list=[
                output_file_registry['tropical_forest_edge_carbon_map']
            ],
            task_name='calculate_forest_edge_carbon_map',
            dependent_task_list=[map_distance_task, build_spatial_index_task])

        # This is also a carbon stock
        carbon_maps.append(
            output_file_registry['tropical_forest_edge_carbon_map'])

    # combine maps into a single output
    LOGGER.info('combining carbon maps into single raster')

    carbon_maps_band_list = [(path, 1) for path in carbon_maps]

    # Join here since the raster calculation depends on the target datasets
    # from all the tasks above
    task_graph.join()

    combine_carbon_maps_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(carbon_maps_band_list, combine_carbon_maps,
              output_file_registry['carbon_map'], gdal.GDT_Float32,
              NODATA_VALUE),
        target_path_list=[output_file_registry['carbon_map']],
        task_name='combine_carbon_maps')

    # generate report (optional) by aoi if they exist
    if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
        LOGGER.info('aggregating carbon map by aoi')
        task_graph.add_task(
            func=_aggregate_carbon_map,
            args=(args['aoi_vector_path'], output_file_registry['carbon_map'],
                  output_file_registry['aggregated_result_vector']),
            target_path_list=[
                output_file_registry['aggregated_result_vector']
            ],
            task_name='combine_carbon_maps',
            dependent_task_list=[combine_carbon_maps_task])

    # close taskgraph
    task_graph.close()
    task_graph.join()
Exemple #9
0
def alternative_index_workflow(workspace_dir,
                               raster_input_dict,
                               aoi_path,
                               index_path,
                               polygon_input_list=None):
    """Compute the alternative index from raw inputs.

    All inputs, including AOI, must be share coordinate reference system and
    must have roughly equivalent extents. Recommend that inputs are clipped and
    projected in Arc prior to running this script.

    Args:
        workspace_dir (string): path to workspace where intermediate results
            should be created/stored
        raster_input_dict (dict): a nested python dictionary containing info
            about raster-based inputs that should be combined. The keys in the
            index should be the labels for each input; values in the dictionary
            should be dictionaries containing the keys 'path' (path to the
            raster input) and 'weight' (weighting value that is applied to the
            normalized values in this input relative to others). EACH INDEX IS
            INTERPRETED AS HIGH VALUE = GOOD.        
        aoi_path (string): path to boundary of the study area
        index_path (string): path to location where the index should be saved
        polygon_input_list (list): list of paths to polygon inputs that should
            be included. Each of these is assigned a weight of 1.

    Side effects:
        creates or modifies a raster at the location ``index_path``

    Returns:
        None
    
    """
    # ensure that each new input shares spatial reference
    vector_info = pygeoprocessing.get_vector_info(aoi_path)
    destination_proj = osr.SpatialReference()
    destination_proj.ImportFromWkt(vector_info['projection_wkt'])
    problem_list = []
    for new_input in raster_input_dict:
        new_proj = osr.SpatialReference()
        new_proj.ImportFromWkt(
            pygeoprocessing.get_raster_info(
                raster_input_dict[new_input]['path'])['projection_wkt'])
        if (new_proj.IsSame(destination_proj) == 0):
            problem_list.append(new_input)
    if problem_list:
        raise ValueError(
            "Project these to match the AOI: {}".format(problem_list))

    intermediate_dir = os.path.join(workspace_dir, 'intermediate')
    if not os.path.exists(intermediate_dir):
        os.makedirs(intermediate_dir)

    normalized_dir = os.path.join(intermediate_dir, 'normalized')
    if not os.path.exists(normalized_dir):
        os.makedirs(normalized_dir)

    aligned_dir = os.path.join(intermediate_dir, 'aligned')
    if not os.path.exists(aligned_dir):
        os.makedirs(aligned_dir)

    # normalize all raster-based inputs within AOI
    base_raster_path_list = []
    aligned_raster_path_list = []
    for new_input in raster_input_dict:
        value_raster_path = raster_input_dict[new_input]['path']
        try:
            weight = raster_input_dict[new_input]['weight']
        except KeyError:
            weight = 1
        bn = os.path.basename(value_raster_path)
        normalized_path = os.path.join(normalized_dir, bn)
        aligned_path = os.path.join(aligned_dir, bn)
        base_raster_path_list.append(normalized_path)
        aligned_raster_path_list.append(aligned_path)
        if not os.path.exists(normalized_path):
            with tempfile.NamedTemporaryFile(
                    prefix='mask_raster',
                    delete=False,
                    suffix='.tif',
                    dir=normalized_dir) as clipped_raster_file:
                clipped_raster_path = clipped_raster_file.name
            pygeoprocessing.mask_raster((value_raster_path, 1), aoi_path,
                                        clipped_raster_path)
            normalize(clipped_raster_path, normalized_path, aoi_path, weight)
            os.remove(clipped_raster_path)

    # align and resample normalized rasters, using minimum pixel size of inputs
    pixel_size_list = []
    for new_input in raster_input_dict:
        value_raster_path = raster_input_dict[new_input]['path']
        raster_info = pygeoprocessing.get_raster_info(value_raster_path)
        pixel_size_list.append(raster_info['pixel_size'])
    target_pixel_size = min(pixel_size_list)
    min_pixel_index = pixel_size_list.index(min(pixel_size_list))

    if not all([os.path.exists(f) for f in aligned_raster_path_list]):
        pygeoprocessing.align_and_resize_raster_stack(
            base_raster_path_list,
            aligned_raster_path_list, ['near'] * len(base_raster_path_list),
            target_pixel_size,
            'intersection',
            raster_align_index=min_pixel_index)

    # rasterize polygon inputs
    template_raster_path = aligned_raster_path_list[0]
    if polygon_input_list:
        for vec_path in polygon_input_list:
            target_raster_path = os.path.join(
                aligned_dir, '{}.tif'.format(os.path.basename(vec_path)[:-4]))
            aligned_raster_path_list.append(target_raster_path)
            if not os.path.exists(target_raster_path):
                pygeoprocessing.new_raster_from_base(
                    template_raster_path,
                    target_raster_path,
                    gdal.GDT_Int16, [_TARGET_NODATA],
                    fill_value_list=[_TARGET_NODATA])
                pygeoprocessing.rasterize(vec_path,
                                          target_raster_path,
                                          burn_values=[100])

    # add together
    raster_list_sum(aligned_raster_path_list,
                    _TARGET_NODATA,
                    index_path,
                    _TARGET_NODATA,
                    nodata_remove=True)
def _write_summary_vector(source_aoi_vector_path,
                          target_vector_path,
                          runoff_ret_stats,
                          runoff_ret_vol_stats,
                          flood_volume_stats,
                          damage_per_aoi_stats=None):
    """Write a vector with summary statistics.

    This vector will always contain two fields::

        * ``'flood_vol'``: The volume of flood (runoff), in m3, per watershed.
        * ``'rnf_rt_idx'``: Average of runoff retention values per watershed
        * ``'rnf_rt_m3'``: Sum of runoff retention volumes, in m3,
          per watershed.

    If ``damage_per_aoi_stats`` is provided, then these additional columns will
    be written to the vector::

        * ``'aff_bld'``: Potential damage to built infrastructure in currency
          units, per watershed.
        * ``'serv_blt'``: Spatial indicator of the importance of the runoff
          retention service

    Args:
        source_aoi_vector_path (str): The path to a GDAL vector that exists on
            disk.
        target_vector_path (str): The path to a vector that will be
            created.  If a file already exists at this path, it will be deleted
            before the new file is created.  This filepath must end with the
            extension ``.shp``, as the file created will be an ESRI Shapefile.
        runoff_ret_stats (dict): A dict representing summary statistics of the
            runoff raster. If provided, it must be a dictionary mapping feature
            IDs from ``source_aoi_vector_path`` to dicts with ``'count'`` and
            ``'sum'`` keys.
        runoff_ret_vol_stats (dict): A dict representing summary statistics of
            the runoff volume raster. If provided, it must be a dictionary
            mapping feature IDs from ``source_aoi_vector_path`` to dicts with
            ``'count'`` and ``'sum'`` keys.
        flood_volume_stats(dict): A dict mapping feature IDs from
            ``source_aoi_vector_path`` to float values representing the flood
            volume over the AOI.
        damage_per_aoi_stats (dict): A dict mapping feature IDs from
            ``source_aoi_vector_path`` to float values representing the total
            damage to built infrastructure in that watershed.

    Returns:
        ``None``
    """
    source_aoi_vector = gdal.OpenEx(source_aoi_vector_path, gdal.OF_VECTOR)
    source_aoi_layer = source_aoi_vector.GetLayer()
    source_geom_type = source_aoi_layer.GetGeomType()
    source_srs_wkt = pygeoprocessing.get_vector_info(
        source_aoi_vector_path)['projection_wkt']
    source_srs = osr.SpatialReference()
    source_srs.ImportFromWkt(source_srs_wkt)

    esri_driver = gdal.GetDriverByName('ESRI Shapefile')
    target_watershed_vector = esri_driver.Create(target_vector_path, 0, 0, 0,
                                                 gdal.GDT_Unknown)
    layer_name = os.path.splitext(os.path.basename(target_vector_path))[0]
    LOGGER.debug("creating layer %s", layer_name)
    target_watershed_layer = target_watershed_vector.CreateLayer(
        layer_name, source_srs, source_geom_type)

    target_fields = ['rnf_rt_idx', 'rnf_rt_m3', 'flood_vol']
    if not damage_per_aoi_stats:
        damage_per_aoi_stats = {}
    else:
        target_fields += ['aff_bld', 'serv_blt']

    for field_name in target_fields:
        field_def = ogr.FieldDefn(field_name, ogr.OFTReal)
        field_def.SetWidth(36)
        field_def.SetPrecision(11)
        target_watershed_layer.CreateField(field_def)

    target_layer_defn = target_watershed_layer.GetLayerDefn()
    for base_feature in source_aoi_layer:
        feature_id = base_feature.GetFID()
        target_feature = ogr.Feature(target_layer_defn)
        base_geom_ref = base_feature.GetGeometryRef()
        target_feature.SetGeometry(base_geom_ref.Clone())
        base_geom_ref = None

        if feature_id in runoff_ret_stats:
            pixel_count = runoff_ret_stats[feature_id]['count']
            if pixel_count > 0:
                mean_value = (runoff_ret_stats[feature_id]['sum'] /
                              float(pixel_count))
                target_feature.SetField('rnf_rt_idx', float(mean_value))

        if feature_id in runoff_ret_vol_stats:
            target_feature.SetField(
                'rnf_rt_m3', float(runoff_ret_vol_stats[feature_id]['sum']))

        if feature_id in damage_per_aoi_stats:
            pixel_count = runoff_ret_vol_stats[feature_id]['count']
            if pixel_count > 0:
                damage_sum = damage_per_aoi_stats[feature_id]
                target_feature.SetField('aff_bld', damage_sum)

                # This is the service_built equation.
                target_feature.SetField(
                    'serv_blt',
                    (damage_sum * runoff_ret_vol_stats[feature_id]['sum']))

        if feature_id in flood_volume_stats:
            target_feature.SetField(
                'flood_vol', float(flood_volume_stats[feature_id]['sum']))

        target_watershed_layer.CreateFeature(target_feature)
    target_watershed_layer.SyncToDisk()
    target_watershed_layer = None
    target_watershed_vector = None
Exemple #11
0
def main():
    """Entry point."""
    for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0)
    world_borders_path = os.path.join(
        WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL))
    download_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WORLD_BORDERS_URL, world_borders_path),
        target_path_list=[world_borders_path],
        task_name='download world borders')

    download_task.join()

    world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR)
    world_borders_layer = world_borders_vector.GetLayer()

    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)

    raster_info = pygeoprocessing.get_raster_info(RASTER_PATH)

    country_threshold_table_path = os.path.join(
        WORKSPACE_DIR, 'country_threshold.csv')
    country_threshold_table_file = open(country_threshold_table_path, 'w')
    country_threshold_table_file.write('country,percentile at 90% max,pixel count\n')
    for world_border_feature in world_borders_layer:
        country_name = world_border_feature.GetField('NAME')
        if country_name != 'Canada':
            continue
        LOGGER.debug(country_name)
        country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name)
        try:
            os.makedirs(country_workspace)
        except OSError:
            pass

        country_vector = os.path.join(
            country_workspace, '%s.gpkg' % country_name)
        country_vector_complete_token = os.path.join(
            country_workspace, '%s.COMPLETE' % country_name)
        extract_feature(
            world_borders_path, world_border_feature.GetFID(),
            wgs84_srs.ExportToWkt(), country_vector,
            country_vector_complete_token)

        country_raster_path = os.path.join(country_workspace, '%s_%s' % (
            country_name, os.path.basename(RASTER_PATH)))

        country_vector_info = pygeoprocessing.get_vector_info(country_vector)
        pygeoprocessing.warp_raster(
            RASTER_PATH, raster_info['pixel_size'], country_raster_path,
            'near', target_bb=country_vector_info['bounding_box'],
            vector_mask_options={'mask_vector_path': country_vector},
            working_dir=country_workspace)

        percentile_values = pygeoprocessing.raster_band_percentile(
            (country_raster_path, 1), country_workspace, PERCENTILE_LIST)
        if len(percentile_values) != len(PERCENTILE_LIST):
            continue
        LOGGER.debug(
            "len percentile_values: %d len PERCENTILE_LIST: %d",
            len(percentile_values), len(PERCENTILE_LIST))

        cdf_array = [0.0] * len(percentile_values)

        nodata = pygeoprocessing.get_raster_info(
            country_raster_path)['nodata'][0]
        pixel_count = 0
        for _, data_block in pygeoprocessing.iterblocks(
                (country_raster_path, 1)):
            nodata_mask = ~numpy.isclose(data_block, nodata)
            pixel_count += numpy.count_nonzero(nodata_mask)
            for index, percentile_value in enumerate(percentile_values):
                cdf_array[index] += numpy.sum(data_block[
                    nodata_mask & (data_block >= percentile_value)])

        # threshold is at 90% says Becky
        threshold_limit = 0.9 * cdf_array[2]

        LOGGER.debug(cdf_array)
        fig, ax = matplotlib.pyplot.subplots()
        ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
        f = scipy.interpolate.interp1d(
            cdf_array, list(reversed(PERCENTILE_LIST)))
        try:
            cdf_threshold = f(threshold_limit)
        except ValueError:
            LOGGER.exception(
                "error when passing threshold_limit: %s\ncdf_array: %s" % (
                    threshold_limit, cdf_array))
            cdf_threshold = cdf_array[2]

        ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
        ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

        ax.grid(True, linestyle='-.')
        ax.set_title(
            '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, pixel_count))
        ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH))
        ax.set_ylabel('100-percentile')
        ax.tick_params(labelcolor='r', labelsize='medium', width=3)
        matplotlib.pyplot.autoscale(enable=True, tight=True)
        matplotlib.pyplot.savefig(
            os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
        country_threshold_table_file.write(
            '%s, %f, %d\n' % (country_name, cdf_threshold, pixel_count))
        country_threshold_table_file.flush()
    country_threshold_table_file.close()
Exemple #12
0
"""Demo some clipping."""
import logging

import pygeoprocessing

logging.basicConfig(
    level=logging.DEBUG,
    format=(
        '%(asctime)s (%(relativeCreated)d) %(processName)s %(levelname)s '
        '%(name)s [%(funcName)s:%(lineno)d] %(message)s'))
LOGGER = logging.getLogger(__name__)

if __name__ == '__main__':
    raster_path = '../session2/DEM_md5_53d4998eec75d803a318fafd28c40a3e.tif'
    aoi_vector_path = './session2/aoi.gpkg'

    raster_info = pygeoprocessing.get_raster_info(raster_path)
    vector_info = pygeoprocessing.get_vector_info(aoi_vector_path)

    raster_projected_bounding_box = pygeoprocessing.transform_bounding_box(
        vector_info['bounding_box'], vector_info['projection_wkt'],
        raster_info['projection_wkt'])

    target_clipped_raster_path = 'DEM_clip.tif'
    pygeoprocessing.warp_raster(
        raster_path, raster_info['pixel_size'], target_clipped_raster_path,
        'near', target_bb=raster_projected_bounding_box)
Exemple #13
0
def _mask_raster_by_vector(
        base_raster_path_band, vector_path, working_dir, target_raster_path):
    """Mask pixels outside of the vector to nodata.

    Parameters:
        base_raster_path (string): path/band tuple to raster to process
        vector_path (string): path to single layer raster that is used to
            indicate areas to preserve from the base raster.  Areas outside
            of this vector are set to nodata.
        working_dir (str): path to temporary directory.
        target_raster_path (string): path to a single band raster that will be
            created of the same dimensions and data type as
            `base_raster_path_band` where any pixels that lie outside of
            `vector_path` coverage will be set to nodata.

    Returns:
        None.

    """
    # Warp input raster to be same bounding box as AOI if smaller.
    base_raster_info = pygeoprocessing.get_raster_info(
        base_raster_path_band[0])
    nodata = base_raster_info['nodata'][base_raster_path_band[1]-1]
    target_pixel_size = base_raster_info['pixel_size']
    vector_info = pygeoprocessing.get_vector_info(vector_path)
    target_bounding_box = pygeoprocessing.merge_bounding_box_list(
        [base_raster_info['bounding_box'],
         vector_info['bounding_box']], 'intersection')
    pygeoprocessing.warp_raster(
        base_raster_path_band[0], target_pixel_size, target_raster_path,
        'near', target_bb=target_bounding_box)

    # Create mask raster same size as the warped raster.
    tmp_dir = tempfile.mkdtemp(dir=working_dir)
    mask_raster_path = os.path.join(tmp_dir, 'mask.tif')
    pygeoprocessing.new_raster_from_base(
        target_raster_path, mask_raster_path, gdal.GDT_Byte, [0],
        fill_value_list=[0])

    # Rasterize the vector onto the mask raster
    pygeoprocessing.rasterize(vector_path, mask_raster_path, [1], None)

    # Parallel iterate over warped raster and mask raster to mask out original.
    target_raster = gdal.OpenEx(
        target_raster_path, gdal.GA_Update | gdal.OF_RASTER)
    target_band = target_raster.GetRasterBand(1)
    mask_raster = gdal.OpenEx(mask_raster_path, gdal.OF_RASTER)
    mask_band = mask_raster.GetRasterBand(1)

    for offset_dict in pygeoprocessing.iterblocks(
            (mask_raster_path, 1), offset_only=True):
        data_array = target_band.ReadAsArray(**offset_dict)
        mask_array = mask_band.ReadAsArray(**offset_dict)
        data_array[mask_array != 1] = nodata
        target_band.WriteArray(
            data_array, xoff=offset_dict['xoff'], yoff=offset_dict['yoff'])
    target_band.FlushCache()
    target_band = None
    target_raster = None
    mask_band = None
    mask_raster = None
    try:
        shutil.rmtree(tmp_dir)
    except OSError:
        LOGGER.warn("Unable to delete temporary file %s", mask_raster_path)
from natcap.invest import coastal_vulnerability as cv
import pygeoprocessing
import logging
import time
import shutil
from osgeo import gdal, ogr

LOGGER = logging.getLogger()
logging.basicConfig(level='INFO')

fetch_ray_vector_path = 'C:/Users/dmf/projects/invest_dev/coastal_vulnerability/bahamas/workspace_37/workspace_37/intermediate/wind_wave/fetch_rays.gpkg'
# fetch_ray_vector_path = 'C:/Users/dmf/projects/invest_dev/coastal_vulnerability/MAR/intermediate/wind_wave/fetch_rays.gpkg'
vector_info = pygeoprocessing.get_vector_info(fetch_ray_vector_path)
model_resolution = 500
file_suffix = ''
base_bathy_path = 'C:/Users/dmf/projects/invest/data/invest-sample-data/Base_Data/Marine/DEMs/global_dem'
target_bathy_path = 'bathy_utm.tif'
working_dir = 'temp_zonal_stats'
target_fetch_depth_path = 'fetch_depth_bahamas.gpkg'

start = time.time()

cv.clip_and_project_raster(base_bathy_path, vector_info['bounding_box'],
                           vector_info['projection'], model_resolution,
                           working_dir, file_suffix, target_bathy_path)

result = pygeoprocessing.zonal_statistics((target_bathy_path, 1),
                                          fetch_ray_vector_path,
                                          polygons_might_overlap=False,
                                          working_dir=working_dir)
    churn_dir = os.path.join(args.workspace_dir, 'churn')
    try:
        os.makedirs(churn_dir)
    except OSError:
        pass

    # ensure AOI and CV points are in the same projection
    aoi_raster_info = pygeoprocessing.get_raster_info(
        args.aoi_mask_raster_path)

    aoi_srs = osr.SpatialReference()
    aoi_srs.ImportFromWkt(aoi_raster_info['projection_wkt'])
    aoi_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1)

    shoreline_point_info = pygeoprocessing.get_vector_info(
        args.shoreline_point_vector_path)
    shoreline_srs = osr.SpatialReference()
    shoreline_srs.ImportFromWkt(shoreline_point_info['projection_wkt'])
    shoreline_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1)

    habitat_vector_info = pygeoprocessing.get_vector_info(
        args.habitat_vector_path)
    habitat_vector_srs = osr.SpatialReference()
    habitat_vector_srs.ImportFromWkt(habitat_vector_info['projection_wkt'])
    habitat_vector_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1)

    if len(set([habitat_vector_epsg, shoreline_epsg, aoi_epsg])) > 1:
        raise ValueError(
            "AOI raster, shoreline point vector, and habitat vector do not "
            "all share the same  projection")
Exemple #16
0
def process_watershed(job_id, watershed_vector_path, watershed_fid, dem_path,
                      hab_path, pop_raster_path_list,
                      target_beneficiaries_path_list,
                      target_normalized_beneficiaries_path_list,
                      target_hab_normalized_beneficiaries_path_list,
                      target_stitch_work_queue_list):
    """Calculate downstream beneficiaries for this watershed.

    Args:
        job_id (str): unique ID identifying this job, can be used to
            create unique workspaces.
        watershed_vector_path (str): path to watershed vector
        watershed_fid (str): watershed FID to process
        dem_path (str): path to DEM raster
        hab_path (str): path to habitat mask raster
        pop_raster_path_list (list): list of population rasters to route
        target_beneficiaries_path_list (str): list of target downstream
            beneficiary rasters to create, parallel with
            `pop_raster_path_list`.
        target_normalized_beneficiaries_path_list (list): list of target
            normalized downstream beneficiary rasters, parallel with other
            lists.
        target_hab_normalized_beneficiaries_path_list (list): list of target
            hab normalized downstream beneficiary rasters, parallel with other
            lists.
        target_stitch_work_queue_list (list): list of work queue tuples to
            put done signals in when each beneficiary raster is done. The
            first element is for the standard target, the second for the
            normalized raster.

    Return:
        None.
    """
    working_dir = os.path.join(
        os.path.dirname(target_beneficiaries_path_list[0]))
    os.makedirs(working_dir, exist_ok=True)
    LOGGER.debug(f'create working directory for {job_id} at {working_dir}')

    task_graph = taskgraph.TaskGraph(working_dir, -1)

    watershed_info = pygeoprocessing.get_vector_info(watershed_vector_path)
    watershed_vector = gdal.OpenEx(watershed_vector_path, gdal.OF_VECTOR)
    watershed_layer = watershed_vector.GetLayer()
    watershed_feature = watershed_layer.GetFeature(watershed_fid)
    watershed_geom = watershed_feature.GetGeometryRef()
    watershed_centroid = watershed_geom.Centroid()
    utm_code = (math.floor((watershed_centroid.GetX() + 180) / 6) % 60) + 1
    lat_code = 6 if watershed_centroid.GetY() > 0 else 7
    epsg_code = int('32%d%02d' % (lat_code, utm_code))
    epsg_sr = osr.SpatialReference()
    epsg_sr.ImportFromEPSG(epsg_code)

    watershed_envelope = watershed_geom.GetEnvelope()
    # swizzle the envelope order that by default is xmin/xmax/ymin/ymax
    lat_lng_watershed_bb = [watershed_envelope[i] for i in [0, 2, 1, 3]]
    target_watershed_bb = pygeoprocessing.transform_bounding_box(
        lat_lng_watershed_bb, watershed_info['projection_wkt'],
        epsg_sr.ExportToWkt())

    watershed_vector = None
    watershed_layer = None
    watershed_feature = None
    watershed_geom = None
    watershed_centroid = None
    watershed_envelope = None

    target_pixel_size = (300, -300)

    warped_dem_raster_path = os.path.join(working_dir, f'{job_id}_dem.tif')
    warped_habitat_raster_path = os.path.join(working_dir, f'{job_id}_hab.tif')
    align_task = task_graph.add_task(
        func=pygeoprocessing.align_and_resize_raster_stack,
        args=([dem_path,
               hab_path], [warped_dem_raster_path, warped_habitat_raster_path],
              ['near', 'mode'], target_pixel_size, target_watershed_bb),
        kwargs={
            'target_projection_wkt': epsg_sr.ExportToWkt(),
            'vector_mask_options': {
                'mask_vector_path': watershed_vector_path,
                'mask_vector_where_filter': f'"FID"={watershed_fid}'
            },
        },
        target_path_list=[warped_dem_raster_path, warped_habitat_raster_path],
        task_name=(
            f'align and clip and warp dem/hab to {warped_dem_raster_path} '
            f'{warped_habitat_raster_path}'))

    filled_dem_raster_path = os.path.join(working_dir,
                                          f'{job_id}_filled_dem.tif')
    fill_pits_task = task_graph.add_task(
        func=pygeoprocessing.routing.fill_pits,
        args=((warped_dem_raster_path, 1), filled_dem_raster_path),
        kwargs={
            'working_dir': working_dir,
            'max_pixel_fill_count': 1000000
        },
        dependent_task_list=[align_task],
        target_path_list=[filled_dem_raster_path],
        task_name=f'fill dem pits to {filled_dem_raster_path}')

    flow_dir_mfd_raster_path = os.path.join(working_dir,
                                            f'{job_id}_flow_dir_mfd.tif')
    flow_dir_mfd_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_dir_mfd,
        args=((filled_dem_raster_path, 1), flow_dir_mfd_raster_path),
        kwargs={'working_dir': working_dir},
        dependent_task_list=[fill_pits_task],
        target_path_list=[flow_dir_mfd_raster_path],
        task_name=f'calc flow dir for {flow_dir_mfd_raster_path}')

    outlet_vector_path = os.path.join(working_dir,
                                      f'{job_id}_outlet_vector.gpkg')
    detect_outlets_task = task_graph.add_task(
        func=pygeoprocessing.routing.detect_outlets,
        args=((flow_dir_mfd_raster_path, 1), 'mfd', outlet_vector_path),
        dependent_task_list=[flow_dir_mfd_task],
        target_path_list=[outlet_vector_path],
        task_name=f'detect outlets {outlet_vector_path}')

    outlet_raster_path = os.path.join(working_dir,
                                      f'{job_id}_outlet_raster.tif')
    create_outlet_raster_task = task_graph.add_task(
        func=_create_outlet_raster,
        args=(outlet_vector_path, flow_dir_mfd_raster_path,
              outlet_raster_path),
        dependent_task_list=[detect_outlets_task],
        target_path_list=[outlet_raster_path],
        task_name=f'create outlet raster {outlet_raster_path}')

    flow_accum_mfd_raster_path = os.path.join(working_dir,
                                              f'{job_id}_flow_accum.tif')
    flow_accum_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=((flow_dir_mfd_raster_path, 1), flow_accum_mfd_raster_path),
        dependent_task_list=[flow_dir_mfd_task],
        target_path_list=[flow_accum_mfd_raster_path],
        task_name=f'calc upstream flow area for {flow_accum_mfd_raster_path}')

    hab_upstream_area_raster_path = os.path.join(working_dir,
                                                 f'{job_id}_hab_upstream.tif')
    hab_upstream_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=((flow_dir_mfd_raster_path, 1), hab_upstream_area_raster_path),
        kwargs={'weight_raster_path_band': (warped_habitat_raster_path, 1)},
        dependent_task_list=[flow_dir_mfd_task],
        target_path_list=[hab_upstream_area_raster_path],
        task_name=(
            f'calc upstream hab area for {hab_upstream_area_raster_path}'))

    for (pop_raster_path, target_beneficiaries_path,
         target_normalized_beneficiaries_path,
         target_hab_normalized_beneficiaries_path, stitch_queue_tuple) in zip(
             pop_raster_path_list, target_beneficiaries_path_list,
             target_normalized_beneficiaries_path_list,
             target_hab_normalized_beneficiaries_path_list,
             target_stitch_work_queue_list):

        LOGGER.debug(f'processing {target_beneficiaries_path} and normalized')

        aligned_pop_raster_path = os.path.join(
            working_dir, f'''{job_id}_{os.path.basename(
                os.path.splitext(pop_raster_path)[0])}.tif''')

        pop_warp_task = task_graph.add_task(
            func=_warp_and_wgs84_area_scale,
            args=(pop_raster_path, warped_dem_raster_path,
                  aligned_pop_raster_path, 'near', lat_lng_watershed_bb,
                  watershed_vector_path, watershed_fid, working_dir),
            dependent_task_list=[align_task],
            target_path_list=[aligned_pop_raster_path],
            task_name=f'align {aligned_pop_raster_path}')

        downstream_bene_task = task_graph.add_task(
            func=pygeoprocessing.routing.distance_to_channel_mfd,
            args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1),
                  target_beneficiaries_path),
            kwargs={'weight_raster_path_band': (aligned_pop_raster_path, 1)},
            dependent_task_list=[
                pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task
            ],
            target_path_list=[target_beneficiaries_path],
            task_name=('calc downstream beneficiaries for '
                       f'{target_beneficiaries_path}'))

        # divide aligned_pop_raster_path by flow accum to get normalized then
        # route it downstream
        pop_normal_by_upstream_raster_path = '%s_norm%s' % os.path.splitext(
            aligned_pop_raster_path)

        normalize_by_dist_task = task_graph.add_task(
            func=normalize,
            args=(aligned_pop_raster_path, flow_accum_mfd_raster_path,
                  pop_normal_by_upstream_raster_path),
            dependent_task_list=[flow_accum_task, align_task],
            target_path_list=[pop_normal_by_upstream_raster_path],
            task_name=(f'normalized beneficiaries for '
                       f'{pop_normal_by_upstream_raster_path}'))

        prescaled_normalized_beneficiaries_path = (
            '%s_prescaled%s' %
            os.path.splitext(target_normalized_beneficiaries_path))
        downstream_norm_bene_task = task_graph.add_task(
            func=pygeoprocessing.routing.distance_to_channel_mfd,
            args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1),
                  prescaled_normalized_beneficiaries_path),
            kwargs={
                'weight_raster_path_band':
                (pop_normal_by_upstream_raster_path, 1)
            },
            dependent_task_list=[
                pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task,
                normalize_by_dist_task
            ],
            target_path_list=[prescaled_normalized_beneficiaries_path],
            task_name=('calc downstream normalized beneficiaries for '
                       f'{prescaled_normalized_beneficiaries_path}'))

        task_graph.add_task(
            func=rescale_by_base,
            args=(aligned_pop_raster_path,
                  prescaled_normalized_beneficiaries_path,
                  target_normalized_beneficiaries_path),
            target_path_list=[target_normalized_beneficiaries_path],
            dependent_task_list=[downstream_norm_bene_task],
            task_name=f'rescale {target_normalized_beneficiaries_path}')

        # divide aligned_pop_raster_path by hab accum to get normalized by
        # hab then route it downstream
        pop_hab_normal_by_upstream_raster_path = (
            '%s_hab_norm%s' % os.path.splitext(aligned_pop_raster_path))

        normalize_by_dist_task = task_graph.add_task(
            func=normalize,
            args=(aligned_pop_raster_path, hab_upstream_area_raster_path,
                  pop_hab_normal_by_upstream_raster_path),
            dependent_task_list=[hab_upstream_task, align_task],
            target_path_list=[pop_hab_normal_by_upstream_raster_path],
            task_name=(f'normalized beneficiaries for '
                       f'{pop_hab_normal_by_upstream_raster_path}'))
        hab_pre_mask_normalized_beneficiaries_path = (
            '%s_pre_mask%s' %
            os.path.splitext(target_hab_normalized_beneficiaries_path))
        downstream_norm_hab_bene_task = task_graph.add_task(
            func=pygeoprocessing.routing.distance_to_channel_mfd,
            args=((flow_dir_mfd_raster_path, 1), (outlet_raster_path, 1),
                  hab_pre_mask_normalized_beneficiaries_path),
            kwargs={
                'weight_raster_path_band':
                (pop_hab_normal_by_upstream_raster_path, 1)
            },
            dependent_task_list=[
                pop_warp_task, create_outlet_raster_task, flow_dir_mfd_task,
                normalize_by_dist_task
            ],
            target_path_list=[hab_pre_mask_normalized_beneficiaries_path],
            task_name=('calc downstream normalized beneficiaries for '
                       f'{hab_pre_mask_normalized_beneficiaries_path}'))
        # mask this result to the target
        prescaled_hab_normalized_beneficiaries_path = (
            '%s_prescaled%s' %
            os.path.splitext(target_hab_normalized_beneficiaries_path))
        mask_downstream_norm_bene_task = task_graph.add_task(
            func=_mask_raster,
            args=(hab_pre_mask_normalized_beneficiaries_path,
                  warped_habitat_raster_path,
                  prescaled_hab_normalized_beneficiaries_path),
            dependent_task_list=[downstream_norm_hab_bene_task, align_task],
            target_path_list=[prescaled_hab_normalized_beneficiaries_path],
            task_name=f'mask {prescaled_hab_normalized_beneficiaries_path}')

        task_graph.add_task(
            func=rescale_by_base,
            args=(aligned_pop_raster_path,
                  prescaled_hab_normalized_beneficiaries_path,
                  target_hab_normalized_beneficiaries_path),
            target_path_list=[target_hab_normalized_beneficiaries_path],
            dependent_task_list=[mask_downstream_norm_bene_task],
            task_name=f'rescale {target_hab_normalized_beneficiaries_path}')

        task_graph.join()
        stitch_queue_tuple[0].put(
            (target_beneficiaries_path, working_dir, job_id))
        stitch_queue_tuple[1].put(
            (target_normalized_beneficiaries_path, working_dir, job_id))
        stitch_queue_tuple[2].put(
            (target_hab_normalized_beneficiaries_path, working_dir, job_id))

    task_graph.close()
    task_graph.join()
    task_graph = None