Example #1
0
def check_spatial_overlap(spatial_filepaths_list,
                          different_projections_ok=False):
    """Check that the given spatial files spatially overlap.

    Args:
        spatial_filepaths_list (list): A list of files that can be opened with
            GDAL.  Must be on the local filesystem.
        different_projections_ok=False (bool): Whether it's OK for the input
            spatial files to have different projections.  If ``True``, all
            projections will be converted to WGS84 before overlap is checked.

    Returns:
        A string error message if an error is found.  ``None`` otherwise.

    """
    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)
    wgs84_wkt = wgs84_srs.ExportToWkt()

    bounding_boxes = []
    checked_file_list = []
    for filepath in spatial_filepaths_list:
        try:
            info = pygeoprocessing.get_raster_info(filepath)
        except ValueError:
            info = pygeoprocessing.get_vector_info(filepath)
        bounding_box = info['bounding_box']

        if different_projections_ok:
            bounding_box = pygeoprocessing.transform_bounding_box(
                bounding_box, info['projection'], wgs84_wkt)

        if all([numpy.isinf(coord) for coord in bounding_box]):
            LOGGER.warning('Skipping infinite bounding box for file %s',
                           filepath)
            continue

        bounding_boxes.append(bounding_box)
        checked_file_list.append(filepath)

    try:
        pygeoprocessing.merge_bounding_box_list(bounding_boxes, 'intersection')
    except ValueError as error:
        LOGGER.debug(error)
        formatted_lists = ' | '.join([
            a + ': ' + str(b)
            for a, b in zip(checked_file_list, bounding_boxes)
        ])
        message = f"Bounding boxes do not intersect: {formatted_lists}"
        return message
    return None
def mult_by_columns(lasso_table_path,
                    data_dir,
                    workspace_dir,
                    base_convolution_raster_id,
                    target_raster_id,
                    bounding_box,
                    pixel_size,
                    target_result_path,
                    task_graph,
                    zero_nodata_symbols=None,
                    target_nodata=numpy.finfo('float32').min,
                    conversion_factor=None):
    """Calculate large regression.

    Args:
        lasso_table_path (str): path to lasso table
        data_dir (str): path to directory containing rasters in lasso
            table path
        workspace_dir (str): path to output directory, will contain
            "result.tif" after completion
        base_convolution_raster_id (str): The convolution columns in
            the lasso table have the form  [base]_[mask_type]_gs[kernel_size],
            this parameter matches [base] so it can be replaced with a
            filename of the form [target_raster_id]_[mask_type]_[kernel_size].
        target_raster_id (str): this is the base of the target raster that
            to use in the table.
        bounding_box (list): If not `None`, manual bounding box in the form
            of four  consecutive floats: "min_lng, min_lat, max_lng,
            max_lat, ex: " "-180.0, -58.3, 180.0, 81.5".
        pixel_size (tuple): desired target pixel size in raster units
        target_result_path (str): path to desired output raster
        task_graph (TaskGraph): TaskGraph object that can be used for
            scheduling.
        zero_nodata_symbols (set): set of symbols whose nodata values should be
            treated as 0.
        target_nodata (float): desired target nodata value
        conversion_factor (float): if not None, this factor is multiplied by
            the final result befor going into target

    Returns:
        None

    """
    lasso_df = pandas.read_csv(lasso_table_path, header=None)
    LOGGER.debug(f"parsing through {lasso_table_path}")
    # built a reverse polish notation stack for the operations and their order
    # that they need to be executed in
    rpn_stack = []
    first_term = True
    for row_index, row in lasso_df.iterrows():
        header = row[0]
        if header == INTERCEPT_COLUMN_ID:
            # special case of the intercept, just push it
            rpn_stack.append(float(row[1]))
        else:
            # it's an expression/coefficient row
            LOGGER.debug(f'{row_index}: {row}')
            coefficient = float(row[1])
            # put on the coefficient first since it's there, we'll multiply
            # it later
            rpn_stack.append(coefficient)

            # split out all the multiplcation terms
            product_list = header.split('*')
            for product in product_list:
                if product.startswith(base_convolution_raster_id):
                    LOGGER.debug(f'parsing out base and gs in {product}')
                    match = re.match(fr'{base_convolution_raster_id}(.*)',
                                     product)
                    suffix = match.group(1)
                    product = \
                        f'''{target_raster_id}{suffix}'''
                # for each multiplication term split out an exponent if exists
                if '^' in product:
                    rpn_stack.extend(product.split('^'))
                    # cast the exponent to an integer so can operate directly
                    rpn_stack[-1] = int(rpn_stack[-1])
                    # push the ^ to exponentiate the last two operations
                    rpn_stack.append('^')
                else:
                    # otherwise it's a single value
                    rpn_stack.append(product)
                # multiply this term and the last
                rpn_stack.append('*')

        # if it's not the first term we want to add the rest
        if first_term:
            first_term = False
        else:
            rpn_stack.append('+')

    LOGGER.debug(rpn_stack)

    # find the unique symbols in the expression
    raster_id_list = [
        x for x in set(rpn_stack) - set(OPERATOR_FN)
        if not isinstance(x, (int, float))
    ]

    LOGGER.debug(raster_id_list)

    # translate symbols into raster paths and get relevant raster info
    raster_id_to_info_map = {}
    missing_raster_path_list = []
    min_size = sys.float_info.max
    bounding_box_list = []
    for index, raster_id in enumerate(raster_id_list):
        raster_path = os.path.join(data_dir, f'{raster_id}.tif')
        if not os.path.exists(raster_path):
            missing_raster_path_list.append(raster_path)
            continue
        else:
            raster_info = pygeoprocessing.get_raster_info(raster_path)
            raster_id_to_info_map[raster_id] = {
                'path': raster_path,
                'nodata': raster_info['nodata'][0],
                'index': index,
            }
            min_size = min(min_size, abs(raster_info['pixel_size'][0]))
            bounding_box_list.append(raster_info['bounding_box'])

    if missing_raster_path_list:
        LOGGER.error(
            f'expected the following '
            f'{"rasters" if len(missing_raster_path_list) > 1 else "raster"} given '
            f'the entries in the table, but could not find them locally:\n' +
            "\n".join(missing_raster_path_list))
        sys.exit(-1)

    LOGGER.info(f'raster paths:\n{str(raster_id_to_info_map)}')

    if bounding_box:
        target_bounding_box = bounding_box
    else:
        target_bounding_box = pygeoprocessing.merge_bounding_box_list(
            bounding_box_list, 'intersection')

    if not pixel_size:
        pixel_size = (min_size, -min_size)

    LOGGER.info(f'target pixel size: {pixel_size}')
    LOGGER.info(f'target bounding box: {target_bounding_box}')

    LOGGER.debug('align rasters, this might take a while')
    align_dir = os.path.join(workspace_dir, 'aligned_rasters')
    try:
        os.makedirs(align_dir)
    except OSError:
        pass

    # align rasters and cast to list because we'll rewrite
    # raster_id_to_path_map object
    for raster_id in raster_id_to_info_map:
        raster_path = raster_id_to_info_map[raster_id]['path']
        raster_basename = os.path.splitext(os.path.basename(raster_path))[0]
        aligned_raster_path = os.path.join(
            align_dir,
            f'{raster_basename}_{target_bounding_box}_{pixel_size}.tif')
        raster_id_to_info_map[raster_id]['aligned_path'] = \
            aligned_raster_path
        task_graph.add_task(func=pygeoprocessing.warp_raster,
                            args=(raster_path, pixel_size, aligned_raster_path,
                                  'near'),
                            kwargs={
                                'target_bb': target_bounding_box,
                                'working_dir': workspace_dir
                            })

    LOGGER.info('construct raster calculator raster path band list')
    raster_path_band_list = []
    LOGGER.debug(raster_id_list)
    LOGGER.debug(raster_id_to_info_map)
    for index, raster_id in enumerate(raster_id_list):
        raster_path_band_list.append(
            (raster_id_to_info_map[raster_id]['aligned_path'], 1))
        raster_path_band_list.append(
            (raster_id_to_info_map[raster_id]['nodata'], 'raw'))
        if index != raster_id_to_info_map[raster_id]['index']:
            raise RuntimeError(f"indexes dont match: {index} {raster_id} "
                               f"{raster_id_to_info_map}")

    zero_nodata_indexes = {
        raster_id_to_info_map[raster_id]['index']
        for raster_id in zero_nodata_symbols
        if raster_id in raster_id_to_info_map
    }

    raster_path_band_list.append((target_nodata, 'raw'))
    raster_path_band_list.append((rpn_stack, 'raw'))
    raster_path_band_list.append((raster_id_to_info_map, 'raw'))
    raster_path_band_list.append((zero_nodata_indexes, 'raw'))
    raster_path_band_list.append((conversion_factor, 'raw'))
    LOGGER.debug(rpn_stack)

    # wait for rasters to align
    task_graph.join()

    LOGGER.debug(raster_path_band_list)
    pygeoprocessing.raster_calculator(raster_path_band_list,
                                      raster_rpn_calculator_op,
                                      target_result_path, gdal.GDT_Float32,
                                      float(target_nodata))
    LOGGER.debug('all done with mult by raster')
            bounding_box_list.append(raster_info['bounding_box'])

    if missing_id_list:
        LOGGER.error(
            f'expected the following '
            f'{"rasters" if len(missing_id_list) > 1 else "raster"} given '
            f'the entries in the table, but could not find them locally:\n' +
            "\n".join(missing_id_list))
        sys.exit(-1)

    LOGGER.info(f'raster paths:\n{str(raster_id_to_info_map)}')

    if args.bounding_box:
        target_bounding_box = args.bounding_box
    else:
        target_bounding_box = pygeoprocessing.merge_bounding_box_list(
            bounding_box_list, 'intersection')

    if args.pixel_size:
        target_pixel_size = (args.pixel_size, -args.pixel_size)
    else:
        target_pixel_size = (min_size, -min_size)

    LOGGER.info(f'target pixel size: {target_pixel_size}')
    LOGGER.info(f'target bounding box: {target_bounding_box}')

    LOGGER.debug('align rasters, this might take a while')
    task_graph = taskgraph.TaskGraph(args.workspace_dir, N_CPUS, 5.0)
    align_dir = os.path.join(args.workspace_dir, 'aligned_rasters')
    try:
        os.makedirs(align_dir)
    except OSError:
Example #4
0
def execute(args):
    """Forest Carbon Edge Effect.

    InVEST Carbon Edge Model calculates the carbon due to edge effects in
    tropical forest pixels.

    Args:
        args['workspace_dir'] (string): a path to the directory that will write
            output and other temporary files during calculation. (required)
        args['results_suffix'] (string): a string to append to any output file
            name (optional)
        args['n_nearest_model_points'] (int): number of nearest neighbor model
            points to search for
        args['aoi_vector_path'] (string): (optional) if present, a path to a
            shapefile that will be used to aggregate carbon stock results at
            the end of the run.
        args['biophysical_table_path'] (string): a path to a CSV table that has
            at least the fields 'lucode' and 'c_above'. If
            ``args['compute_forest_edge_effects'] == True``, table must
            also contain an 'is_tropical_forest' field.  If
            ``args['pools_to_calculate'] == 'all'``, this table must contain
            the fields 'c_below', 'c_dead', and 'c_soil'.

                * ``lucode``: an integer that corresponds to landcover codes in
                  the raster ``args['lulc_raster_path']``
                * ``is_tropical_forest``: either 0 or 1 indicating whether the
                  landcover type is forest (1) or not (0).  If 1, the value
                  in ``c_above`` is ignored and instead calculated from the
                  edge regression model.
                * ``c_above``: floating point number indicating tons of above
                  ground carbon per hectare for that landcover type
                * ``{'c_below', 'c_dead', 'c_soil'}``: three other optional
                  carbon pools that will statically map landcover types to the
                  carbon densities in the table.

                Example::

                    lucode,is_tropical_forest,c_above,c_soil,c_dead,c_below
                    0,0,32.8,5,5.2,2.1
                    1,1,n/a,2.5,0.0,0.0
                    2,1,n/a,1.8,1.0,0.0
                    16,0,28.1,4.3,0.0,2.0

                Note the "n/a" in ``c_above`` are optional since that field
                is ignored when ``is_tropical_forest==1``.
        args['lulc_raster_path'] (string): path to a integer landcover code
            raster
        args['pools_to_calculate'] (string): if "all" then all carbon pools
            will be calculted.  If any other value only above ground carbon
            pools will be calculated and expect only a 'c_above' header in
            the biophysical table. If "all" model expects 'c_above',
            'c_below', 'c_dead', 'c_soil' in header of biophysical_table and
            will make a translated carbon map for each based off the landcover
            map.
        args['compute_forest_edge_effects'] (boolean): if True, requires
            biophysical table to have 'is_tropical_forest' forest field, and
            any landcover codes that have a 1 in this column calculate carbon
            stocks using the Chaplin-Kramer et. al method and ignore 'c_above'.
        args['tropical_forest_edge_carbon_model_vector_path'] (string):
            path to a shapefile that defines the regions for the local carbon
            edge models.  Has at least the fields 'method', 'theta1', 'theta2',
            'theta3'.  Where 'method' is an int between 1..3 describing the
            biomass regression model, and the thetas are floating point numbers
            that have different meanings depending on the 'method' parameter.
            Specifically,

                * method 1 (asymptotic model)::

                    biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km)

                * method 2 (logarithmic model)::

                    # NOTE: theta3 is ignored for this method
                    biomass = theta1 + theta2 * numpy.log(edge_dist_km)

                * method 3 (linear regression)::

                    biomass = theta1 + theta2 * edge_dist_km
        args['biomass_to_carbon_conversion_factor'] (string/float): Number by
            which to multiply forest biomass to convert to carbon in the edge
            effect calculation.
        args['n_workers'] (int): (optional) The number of worker processes to
            use for processing this model.  If omitted, computation will take
            place in the current process.

    Returns:
        None

    """
    # just check that the AOI exists since it wouldn't crash until the end of
    # the whole model run if it didn't.
    if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
        aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR)
        if not aoi_vector:
            raise ValueError("Unable to open aoi at: %s" %
                             args['aoi_vector_path'])
        else:
            aoi_vector = None
            lulc_raster_bb = pygeoprocessing.get_raster_info(
                args['lulc_raster_path'])['bounding_box']
            aoi_vector_bb = pygeoprocessing.get_vector_info(
                args['aoi_vector_path'])['bounding_box']
            try:
                merged_bb = pygeoprocessing.merge_bounding_box_list(
                    [lulc_raster_bb, aoi_vector_bb], 'intersection')
                LOGGER.debug("merged bounding boxes: %s", merged_bb)
            except ValueError:
                raise ValueError(
                    "The landcover raster %s and AOI %s do not touch each "
                    "other." %
                    (args['lulc_raster_path'], args['aoi_vector_path']))

    output_dir = args['workspace_dir']
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    utils.make_directories([output_dir, intermediate_dir])
    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    # Initialize a TaskGraph
    taskgraph_working_dir = os.path.join(intermediate_dir,
                                         '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
        # KeyError when n_workers is not present in args
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)

    # used to keep track of files generated by this module
    output_file_registry = {
        'c_above_map':
        os.path.join(intermediate_dir,
                     'c_above_carbon_stocks%s.tif' % file_suffix),
        'carbon_map':
        os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix),
        'aggregated_result_vector':
        os.path.join(output_dir,
                     'aggregated_carbon_stocks%s.shp' % file_suffix)
    }

    if args['pools_to_calculate'] == 'all':
        output_file_registry['c_below_map'] = os.path.join(
            intermediate_dir, 'c_below_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['c_soil_map'] = os.path.join(
            intermediate_dir, 'c_soil_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['c_dead_map'] = os.path.join(
            intermediate_dir, 'c_dead_carbon_stocks%s.tif' % file_suffix)

    if args['compute_forest_edge_effects']:
        output_file_registry['spatial_index_pickle'] = os.path.join(
            intermediate_dir, 'spatial_index%s.pickle' % file_suffix)
        output_file_registry['edge_distance'] = os.path.join(
            intermediate_dir, 'edge_distance%s.tif' % file_suffix)
        output_file_registry['tropical_forest_edge_carbon_map'] = os.path.join(
            intermediate_dir,
            'tropical_forest_edge_carbon_stocks%s.tif' % file_suffix)
        output_file_registry['non_forest_mask'] = os.path.join(
            intermediate_dir, 'non_forest_mask%s.tif' % file_suffix)

    # Map non-forest landcover codes to carbon biomasses
    LOGGER.info('Calculating direct mapped carbon stocks')
    carbon_maps = []
    biophysical_table = utils.build_lookup_from_csv(
        args['biophysical_table_path'], 'lucode', to_lower=False)
    biophysical_keys = [
        x.lower() for x in list(biophysical_table.values())[0].keys()
    ]
    pool_list = [('c_above', True)]
    if args['pools_to_calculate'] == 'all':
        pool_list.extend([('c_below', False), ('c_soil', False),
                          ('c_dead', False)])
    for carbon_pool_type, ignore_tropical_type in pool_list:
        if carbon_pool_type in biophysical_keys:
            carbon_maps.append(output_file_registry[carbon_pool_type + '_map'])
            task_graph.add_task(
                func=_calculate_lulc_carbon_map,
                args=(args['lulc_raster_path'], args['biophysical_table_path'],
                      carbon_pool_type, ignore_tropical_type,
                      args['compute_forest_edge_effects'], carbon_maps[-1]),
                target_path_list=[carbon_maps[-1]],
                task_name='calculate_lulc_%s_map' % carbon_pool_type)

    if args['compute_forest_edge_effects']:
        # generate a map of pixel distance to forest edge from the landcover
        # map
        LOGGER.info('Calculating distance from forest edge')
        map_distance_task = task_graph.add_task(
            func=_map_distance_from_tropical_forest_edge,
            args=(args['lulc_raster_path'], args['biophysical_table_path'],
                  output_file_registry['edge_distance'],
                  output_file_registry['non_forest_mask']),
            target_path_list=[
                output_file_registry['edge_distance'],
                output_file_registry['non_forest_mask']
            ],
            task_name='map_distance_from_forest_edge')

        # Build spatial index for gridded global model for closest 3 points
        LOGGER.info('Building spatial index for forest edge models.')
        build_spatial_index_task = task_graph.add_task(
            func=_build_spatial_index,
            args=(args['lulc_raster_path'], intermediate_dir,
                  args['tropical_forest_edge_carbon_model_vector_path'],
                  output_file_registry['spatial_index_pickle']),
            target_path_list=[output_file_registry['spatial_index_pickle']],
            task_name='build_spatial_index')

        # calculate the carbon edge effect on forests
        LOGGER.info('Calculating forest edge carbon')
        task_graph.add_task(
            func=_calculate_tropical_forest_edge_carbon_map,
            args=(output_file_registry['edge_distance'],
                  output_file_registry['spatial_index_pickle'],
                  int(args['n_nearest_model_points']),
                  float(args['biomass_to_carbon_conversion_factor']),
                  output_file_registry['tropical_forest_edge_carbon_map']),
            target_path_list=[
                output_file_registry['tropical_forest_edge_carbon_map']
            ],
            task_name='calculate_forest_edge_carbon_map',
            dependent_task_list=[map_distance_task, build_spatial_index_task])

        # This is also a carbon stock
        carbon_maps.append(
            output_file_registry['tropical_forest_edge_carbon_map'])

    # combine maps into a single output
    LOGGER.info('combining carbon maps into single raster')

    carbon_maps_band_list = [(path, 1) for path in carbon_maps]

    # Join here since the raster calculation depends on the target datasets
    # from all the tasks above
    task_graph.join()

    combine_carbon_maps_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(carbon_maps_band_list, combine_carbon_maps,
              output_file_registry['carbon_map'], gdal.GDT_Float32,
              NODATA_VALUE),
        target_path_list=[output_file_registry['carbon_map']],
        task_name='combine_carbon_maps')

    # generate report (optional) by aoi if they exist
    if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
        LOGGER.info('aggregating carbon map by aoi')
        task_graph.add_task(
            func=_aggregate_carbon_map,
            args=(args['aoi_vector_path'], output_file_registry['carbon_map'],
                  output_file_registry['aggregated_result_vector']),
            target_path_list=[
                output_file_registry['aggregated_result_vector']
            ],
            task_name='combine_carbon_maps',
            dependent_task_list=[combine_carbon_maps_task])

    # close taskgraph
    task_graph.close()
    task_graph.join()
Example #5
0
def _mask_raster_by_vector(
        base_raster_path_band, vector_path, working_dir, target_raster_path):
    """Mask pixels outside of the vector to nodata.

    Parameters:
        base_raster_path (string): path/band tuple to raster to process
        vector_path (string): path to single layer raster that is used to
            indicate areas to preserve from the base raster.  Areas outside
            of this vector are set to nodata.
        working_dir (str): path to temporary directory.
        target_raster_path (string): path to a single band raster that will be
            created of the same dimensions and data type as
            `base_raster_path_band` where any pixels that lie outside of
            `vector_path` coverage will be set to nodata.

    Returns:
        None.

    """
    # Warp input raster to be same bounding box as AOI if smaller.
    base_raster_info = pygeoprocessing.get_raster_info(
        base_raster_path_band[0])
    nodata = base_raster_info['nodata'][base_raster_path_band[1]-1]
    target_pixel_size = base_raster_info['pixel_size']
    vector_info = pygeoprocessing.get_vector_info(vector_path)
    target_bounding_box = pygeoprocessing.merge_bounding_box_list(
        [base_raster_info['bounding_box'],
         vector_info['bounding_box']], 'intersection')
    pygeoprocessing.warp_raster(
        base_raster_path_band[0], target_pixel_size, target_raster_path,
        'near', target_bb=target_bounding_box)

    # Create mask raster same size as the warped raster.
    tmp_dir = tempfile.mkdtemp(dir=working_dir)
    mask_raster_path = os.path.join(tmp_dir, 'mask.tif')
    pygeoprocessing.new_raster_from_base(
        target_raster_path, mask_raster_path, gdal.GDT_Byte, [0],
        fill_value_list=[0])

    # Rasterize the vector onto the mask raster
    pygeoprocessing.rasterize(vector_path, mask_raster_path, [1], None)

    # Parallel iterate over warped raster and mask raster to mask out original.
    target_raster = gdal.OpenEx(
        target_raster_path, gdal.GA_Update | gdal.OF_RASTER)
    target_band = target_raster.GetRasterBand(1)
    mask_raster = gdal.OpenEx(mask_raster_path, gdal.OF_RASTER)
    mask_band = mask_raster.GetRasterBand(1)

    for offset_dict in pygeoprocessing.iterblocks(
            (mask_raster_path, 1), offset_only=True):
        data_array = target_band.ReadAsArray(**offset_dict)
        mask_array = mask_band.ReadAsArray(**offset_dict)
        data_array[mask_array != 1] = nodata
        target_band.WriteArray(
            data_array, xoff=offset_dict['xoff'], yoff=offset_dict['yoff'])
    target_band.FlushCache()
    target_band = None
    target_raster = None
    mask_band = None
    mask_raster = None
    try:
        shutil.rmtree(tmp_dir)
    except OSError:
        LOGGER.warn("Unable to delete temporary file %s", mask_raster_path)
def main():
    """Entry point."""
    parser = argparse.ArgumentParser(
        description=('Search for matching rasters to stitch into one big '
                     'raster.'))
    parser.add_argument('--target_projection_epsg',
                        required=True,
                        help='EPSG code of target projection')
    parser.add_argument(
        '--target_cell_size',
        required=True,
        help=('A single float indicating the desired square pixel size of '
              'the stitched raster.'))
    parser.add_argument(
        '--resample_method',
        default='near',
        help=('One of near|bilinear|cubic|cubicspline|lanczos|average|mode|max'
              'min|med|q1|q3'))
    parser.add_argument('--target_raster_path',
                        required=True,
                        help='Path to target raster.')
    parser.add_argument('--raster_list',
                        nargs='+',
                        help='List of rasters or wildcards to stitch.')
    parser.add_argument(
        '--raster_pattern',
        nargs=2,
        help=('Recursive directory search for raster pattern such that '
              'the first argument is the directory to search and the second '
              'is the filename pattern.'))
    parser.add_argument(
        '--overlap_algorithm',
        default='replace',
        help=('can be one of etch|replace|add, default is replace'))
    parser.add_argument(
        '--_n_limit',
        type=int,
        help=('limit the number of stitches to this number, default is to '
              'stitch all found rasters'))

    parser.add_argument(
        '--area_weight_m2_to_wgs84',
        action='store_true',
        help=('if true, rescales values to be proportional to area change '
              'for wgs84 coordinates'))

    args = parser.parse_args()

    if not args.raster_list != args.raster_pattern:
        raise ValueError(
            'only one of --raster_list or --raster_pattern must be '
            'specified: \n'
            f'args.raster_list={args.raster_list}\n'
            f'args.raster_pattern={args.raster_pattern}\n')

    LOGGER.info('searching for matching files')
    if args.raster_list:
        raster_path_list = list(raster_path for raster_glob in args.raster_list
                                for raster_path in glob.glob(raster_glob))
    else:
        base_dir = args.raster_pattern[0]
        file_pattern = args.raster_pattern[1]
        LOGGER.info(f'searching {base_dir} for {file_pattern}')

        raster_path_list = list(
            itertools.islice((raster_path for walk_info in os.walk(base_dir)
                              for raster_path in glob.glob(
                                  os.path.join(walk_info[0], file_pattern))),
                             0, args._n_limit))
        LOGGER.info(f'found {len(raster_path_list)} files that matched')

    target_projection = osr.SpatialReference()
    target_projection.ImportFromEPSG(int(args.target_projection_epsg))

    if len(raster_path_list) == 0:
        raise RuntimeError(
            f'no rasters were found with the pattern "{file_pattern}"')

    LOGGER.info('calculating target bounding box')
    target_bounding_box_list = []
    raster_path_set = set()
    for raster_path in raster_path_list:
        if raster_path in raster_path_set:
            LOGGER.warning(f'{raster_path} already scheduled')
            continue
        raster_path_set.add(raster_path)
        raster_info = pygeoprocessing.get_raster_info(raster_path)
        bounding_box = raster_info['bounding_box']
        target_bounding_box = pygeoprocessing.transform_bounding_box(
            bounding_box, raster_info['projection_wkt'],
            target_projection.ExportToWkt())
        target_bounding_box_list.append(target_bounding_box)

    target_bounding_box = pygeoprocessing.merge_bounding_box_list(
        target_bounding_box_list, 'union')

    gtiff_driver = gdal.GetDriverByName('GTiff')

    n_cols = int(
        math.ceil((target_bounding_box[2] - target_bounding_box[0]) /
                  float(args.target_cell_size)))
    n_rows = int(
        math.ceil((target_bounding_box[3] - target_bounding_box[1]) /
                  float(args.target_cell_size)))

    geotransform = (target_bounding_box[0], float(args.target_cell_size), 0.0,
                    target_bounding_box[3], 0.0, -float(args.target_cell_size))

    target_raster = gtiff_driver.Create(
        os.path.join('.', args.target_raster_path),
        n_cols,
        n_rows,
        1,
        raster_info['datatype'],
        options=('TILED=YES', 'BIGTIFF=YES', 'BLOCKXSIZE=256',
                 'BLOCKYSIZE=256', 'COMPRESS=LZW', 'SPARSE_OK=TRUE'))
    target_raster.SetProjection(target_projection.ExportToWkt())
    target_raster.SetGeoTransform(geotransform)
    target_band = target_raster.GetRasterBand(1)
    target_band.SetNoDataValue(raster_info['nodata'][0])
    target_band = None
    target_raster = None

    LOGGER.info('calling stitch_rasters')
    pygeoprocessing.stitch_rasters(
        [(path, 1) for path in raster_path_list],
        [args.resample_method] * len(raster_path_list),
        (args.target_raster_path, 1),
        overlap_algorithm=args.overlap_algorithm,
        area_weight_m2_to_wgs84=args.area_weight_m2_to_wgs84)

    LOGGER.debug('build overviews...')
    ecoshard.build_overviews(args.target_raster_path)
    LOGGER.info('all done')