Beispiel #1
0
def _compare_with_make_stack(stack_trend_file, pgp_trend_file, diff_file):
    """Compare trend of ``make_regression`` with trend from ``make_stack.py``.

    Comparison is done as a per-pixel diff on any pixel pairs where both
    pixels are not nodata.  If either pixel in a pixel stack is nodata, the
    stack is ignored and nodata is returned for that pixel value.

    The diff looks like this::

        diff_file = stack_trend_file - pgp_trend_file

    Parameters:
        stack_trend_file (string): The path to the trend raster output of
            ``make_stack.py`` (usually named ``stack_trend.tif``).  This
            file must exist on disk.
        pgp_trend_file (string): The path to the trend raster output from
            ``make_regression()``, also in this module.  This file must
            exist on disk.
        diff_file (string): The path to where the difference raster should be
            saved.

    Returns:
        ``None``"""
    stack_nodata = pygeoprocessing.get_nodata_from_uri(stack_trend_file)
    pgp_nodata = pygeoprocessing.get_nodata_from_uri(pgp_trend_file)

    def _diff(stack_trend, pgp_trend):
        """Calculate a diff between two matrices, ignoring nodata.

        Parameters:
            stack_trend (numpy.ndarray): Array of values from the stack trend
                raster.
            pgp_trend (numpy.ndarray): Array of values from the pygeoprocessing
                trend raster.

        Returns:
            ``numpy.ndarray`` of the difference between ``stack_trend`` and
            ``pgp_trend``"""
        valid_mask = ((stack_trend != stack_nodata) &
                      (pgp_trend != pgp_nodata))
        out_array = numpy.empty_like(stack_trend)
        out_array[:] = -9999
        out_array[valid_mask] = stack_trend[valid_mask] - pgp_trend[valid_mask]
        return out_array

    pygeoprocessing.vectorize_datasets(
        dataset_uri_list=[stack_trend_file, pgp_trend_file],
        dataset_pixel_op=_diff,
        dataset_out_uri=diff_file,
        datatype_out=gdal.GDT_Float32,
        nodata_out=-9999,
        pixel_size_out=32.,
        bounding_box_mode='intersection',
        vectorize_op=False,
        datasets_are_pre_aligned=False)
Beispiel #2
0
def main():
    src_ds = 'landuse_cur_200m.tif'
    dest_ds = 'gaussian.tif'

    nodata = pygeoprocessing.get_nodata_from_uri(src_ds)

    pygeoprocessing.gaussian_filter_dataset_uri(
        src_ds, 4, dest_ds, nodata)
Beispiel #3
0
def _map_distance_from_forest_edge(lulc_uri, biophysical_table_uri,
                                   edge_distance_uri):
    """Generates a raster of forest edge distances where each pixel is the
    distance to the edge of the forest in meters.

    Parameters:
        lulc_uri (string): path to the landcover raster that contains integer
            landcover codes
        biophysical_table_uri (string): a path to a csv table that indexes
            landcover codes to forest type, contains at least the fields
            'lucode' (landcover integer code) and 'is_forest' (0 or 1 depending
            on landcover code type)
        edge_distance_uri (string): path to output raster where each pixel
            contains the euclidian pixel distance to nearest forest edges on
            all non-nodata values of lulc_uri

    Returns:
        None"""

    # Build a list of forest lucodes
    biophysical_table = pygeoprocessing.get_lookup_from_table(
        biophysical_table_uri, 'lucode')
    forest_codes = [
        lucode for (lucode, ludata) in biophysical_table.iteritems()
        if int(ludata['is_forest']) == 1
    ]

    # Make a raster where 1 is non-forest landcover types and 0 is forest
    forest_mask_nodata = 255
    lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri)

    def mask_non_forest_op(lulc_array):
        """converts forest lulc codes to 1"""
        non_forest_mask = ~numpy.in1d(lulc_array.flatten(),
                                      forest_codes).reshape(lulc_array.shape)
        nodata_mask = lulc_array == lulc_nodata
        return numpy.where(nodata_mask, forest_mask_nodata, non_forest_mask)

    non_forest_mask_uri = pygeoprocessing.temporary_filename()
    out_pixel_size = pygeoprocessing.get_cell_size_from_uri(lulc_uri)
    pygeoprocessing.vectorize_datasets([lulc_uri],
                                       mask_non_forest_op,
                                       non_forest_mask_uri,
                                       gdal.GDT_Byte,
                                       forest_mask_nodata,
                                       out_pixel_size,
                                       "intersection",
                                       vectorize_op=False)

    # Do the distance transform on non-forest pixels
    pygeoprocessing.distance_transform_edt(non_forest_mask_uri,
                                           edge_distance_uri)

    # good practice to delete temporary files when we're done with them
    os.remove(non_forest_mask_uri)
Beispiel #4
0
def main():
    system = platform.platform()
    logfile_uri = 'md5_check_%s.log' % system
    logfile = open(logfile_uri, 'w')
    _write = lambda x: logfile.write(x + '\n')

    for base_raster in ['landuse_cur_200m.tif', 'gaussian.tif']:
        if base_raster == 'gaussian.tif':
            src_ds = 'landuse_cur_200m.tif'
            dest_ds = 'gaussian.tif'
            nodata = pygeoprocessing.get_nodata_from_uri(src_ds)
            pygeoprocessing.gaussian_filter_dataset_uri(
                src_ds, 4, dest_ds, nodata)

        _write(base_raster + '\n')
        base_nodata = pygeoprocessing.get_nodata_from_uri(base_raster)
        base_pixel_size = pygeoprocessing.get_cell_size_from_uri(base_raster)

        _write('System: %s' % system)
        _write('Python %s' % platform.python_version())
        _write('GDAL version: %s' % gdal.__version__)
        _write('numpy version: %s' % numpy.__version__)
        _write('scipy version: %s' % scipy.__version__)
        _write('base MD5sum: %s' % md5sum(base_raster))

        for gdal_type, gdal_type_label in GDAL_DTYPES.iteritems():
            if gdal_type_label in ['GDT_Unknown', 'GDT_TypeCount']:
                continue

            print gdal_type_label

            # convert the raster (via vectorize_datasets) to a new dtype
            new_uri = '%s.tif' % gdal_type_label
            pygeoprocessing.vectorize_datasets([base_raster], lambda x: x,
                new_uri, gdal_type, base_nodata, base_pixel_size, 'intersection')

            _write("%-15s: %s" % (gdal_type_label, md5sum(new_uri)))
        _write('\n')
Beispiel #5
0
    def test_raster_nodata_notset(self):
        """When nodata=None, a nodata value should not be set."""
        from pygeoprocessing.testing import create_raster_on_disk
        from pygeoprocessing.testing.sampledata import SRS_WILLAMETTE
        pixels = [numpy.array([[0]])]
        nodata = None
        reference = SRS_WILLAMETTE
        filename = pygeoprocessing.temporary_filename()
        create_raster_on_disk(
            pixels, reference.origin, reference.projection, nodata,
            reference.pixel_size(30), datatype='auto', filename=filename)

        set_nodata_value = pygeoprocessing.get_nodata_from_uri(filename)
        self.assertEqual(set_nodata_value, None)
    def test_get_nodata(self):
        """PGP.geoprocessing: Test nodata values get set and read."""
        pixel_matrix = numpy.ones((5, 5), numpy.int16)
        reference = sampledata.SRS_COLOMBIA
        for nodata in [5, 10, -5, 9999]:
            pygeoprocessing.testing.create_raster_on_disk(
                [pixel_matrix],
                reference.origin,
                reference.projection,
                nodata,
                reference.pixel_size(30),
                filename=self.raster_filename)

            raster_nodata = pygeoprocessing.get_nodata_from_uri(
                self.raster_filename)
            self.assertEqual(raster_nodata, nodata)
def _sort_to_disk(dataset_uri, score_weight=1.0):
    """Return an iterable of non-nodata pixels in sorted order.

    Parameters:
        dataset_uri (string): a path to a floating point GDAL dataset
        score_weight (float): a number to multiply all values by, which can be
            used to reverse the order of the iteration if negative.

    Returns:
        an iterable that produces (value * score_weight, flat_index) in
        decreasing sorted order by value * score_weight
    """
    def _read_score_index_from_disk(
            score_file_path, index_file_path):
        """Generator to yield a float/int value from the given filenames.

        Reads a buffer of `buffer_size` big before to avoid keeping the
        file open between generations.

        score_file_path (string): a path to a file that has 32 bit floats
            packed consecutively
        index_file_path (string): a path to a file that has 32 bit ints
            packed consecutively

        Yields:
            next (score, index) tuple in the given score and index files.
        """
        try:
            score_buffer = ''
            index_buffer = ''
            file_offset = 0
            buffer_offset = 0  # initialize to 0 to trigger the first load

            # ensure buffer size that is not a perfect multiple of 4
            read_buffer_size = int(math.sqrt(_BLOCK_SIZE))
            read_buffer_size = read_buffer_size - read_buffer_size % 4

            while True:
                if buffer_offset == len(score_buffer):
                    score_file = open(score_file_path, 'rb')
                    index_file = open(index_file_path, 'rb')
                    score_file.seek(file_offset)
                    index_file.seek(file_offset)

                    score_buffer = score_file.read(read_buffer_size)
                    index_buffer = index_file.read(read_buffer_size)
                    score_file.close()
                    index_file.close()

                    file_offset += read_buffer_size
                    buffer_offset = 0
                packed_score = score_buffer[buffer_offset:buffer_offset+4]
                packed_index = index_buffer[buffer_offset:buffer_offset+4]
                buffer_offset += 4
                if not packed_score:
                    break
                yield (struct.unpack('f', packed_score)[0],
                       struct.unpack('i', packed_index)[0])
        finally:
            # deletes the files when generator goes out of scope or ends
            os.remove(score_file_path)
            os.remove(index_file_path)

    def _sort_cache_to_iterator(
            index_cache, score_cache):
        """Flushe the current cache to a heap and return it.

        Parameters:
            index_cache (1d numpy.array): contains flat indexes to the
                score pixels `score_cache`
            score_cache (1d numpy.array): contains score pixels

        Returns:
            Iterable to visit scores/indexes in increasing score order.
        """
        # sort the whole bunch to disk
        score_file = tempfile.NamedTemporaryFile(delete=False)
        index_file = tempfile.NamedTemporaryFile(delete=False)

        sort_index = score_cache.argsort()
        score_cache = score_cache[sort_index]
        index_cache = index_cache[sort_index]
        for index in xrange(0, score_cache.size, _LARGEST_STRUCT_PACK):
            score_block = score_cache[index:index+_LARGEST_STRUCT_PACK]
            index_block = index_cache[index:index+_LARGEST_STRUCT_PACK]
            score_file.write(
                struct.pack('%sf' % score_block.size, *score_block))
            index_file.write(
                struct.pack('%si' % index_block.size, *index_block))

        score_file_path = score_file.name
        index_file_path = index_file.name
        score_file.close()
        index_file.close()

        return _read_score_index_from_disk(score_file_path, index_file_path)

    nodata = pygeoprocessing.get_nodata_from_uri(dataset_uri)
    nodata *= score_weight  # scale the nodata so they can be filtered out

    # This will be a list of file iterators we'll pass to heap.merge
    iters = []

    _, n_cols = pygeoprocessing.get_row_col_from_uri(dataset_uri)

    for scores_data, scores_block in pygeoprocessing.iterblocks(
            dataset_uri, largest_block=_BLOCK_SIZE):
        # flatten and scale the results
        scores_block = scores_block.flatten() * score_weight

        col_coords, row_coords = numpy.meshgrid(
            xrange(scores_data['xoff'], scores_data['xoff'] +
                   scores_data['win_xsize']),
            xrange(scores_data['yoff'], scores_data['yoff'] +
                   scores_data['win_ysize']))

        flat_indexes = (col_coords + row_coords * n_cols).flatten()

        sort_index = scores_block.argsort()
        sorted_scores = scores_block[sort_index]
        sorted_indexes = flat_indexes[sort_index]

        # search for nodata values are so we can splice them out
        left_index = numpy.searchsorted(sorted_scores, nodata, side='left')
        right_index = numpy.searchsorted(
            sorted_scores, nodata, side='right')

        # remove nodata values
        score_cache = numpy.concatenate(
            (sorted_scores[0:left_index], sorted_scores[right_index::]))
        index_cache = numpy.concatenate(
            (sorted_indexes[0:left_index], sorted_indexes[right_index::]))

        iters.append(_sort_cache_to_iterator(index_cache, score_cache))

    return heapq.merge(*iters)
def _convert_landscape(
        base_lulc_uri, replacement_lucode, area_to_convert,
        focal_landcover_codes, convertible_type_list, score_weight, n_steps,
        smooth_distance_from_edge_uri, output_landscape_raster_uri,
        stats_uri):
    """Expand replacement lucodes in relation to the focal lucodes.

    If the sign on `score_weight` is positive, expansion occurs marches
    away from the focal types, while if `score_weight` is negative conversion
    marches toward the focal types.

    Parameters:
        base_lulc_uri (string): path to landcover raster that will be used as
            the base landcover map to agriculture pixels
        replacement_lucode (int): agriculture landcover code type found in the
            raster at `base_lulc_uri`
        area_to_convert (float): area (Ha) to convert to agriculture
        focal_landcover_codes (list of int): landcover codes that are used to
            calculate proximity
        convertible_type_list (list of int): landcover codes that are allowable
            to be converted to agriculture
        score_weight (float): this value is used to multiply the distance from
            the focal landcover types when prioritizing which pixels in
            `convertable_type_list` are to be converted.  If negative,
            conversion occurs toward the focal types, if positive occurs away
            from the focal types.
        n_steps (int): number of steps to convert the landscape.  On each step
            the distance transform will be applied on the
            current value of the `focal_landcover_codes` pixels in
            `output_landscape_raster_uri`.  On the first step the distance
            is calculated from `base_lulc_uri`.
        smooth_distance_from_edge_uri (string): an intermediate output showing
            the pixel distance from the edge of the base landcover types
        output_landscape_raster_uri (string): an output raster that will
            contain the final fragmented forest layer.
        stats_uri (string): a path to an output csv that records the number
            type, and area of pixels converted in `output_landscape_raster_uri`

    Returns:
        None.
    """
    tmp_file_registry = {
        'non_base_mask': pygeoprocessing.temporary_filename(),
        'base_mask': pygeoprocessing.temporary_filename(),
        'gaussian_kernel': pygeoprocessing.temporary_filename(),
        'distance_from_base_mask_edge': pygeoprocessing.temporary_filename(),
        'distance_from_non_base_mask_edge':
            pygeoprocessing.temporary_filename(),
        'convertible_distances': pygeoprocessing.temporary_filename(),
        'smooth_distance_from_edge': pygeoprocessing.temporary_filename(),
        'distance_from_edge': pygeoprocessing.temporary_filename(),
    }
    # a sigma of 1.0 gives nice visual results to smooth pixel level artifacts
    # since a pixel is the 1.0 unit
    _make_gaussian_kernel_uri(1.0, tmp_file_registry['gaussian_kernel'])

    # create the output raster first as a copy of the base landcover so it can
    # be looped on for each step
    lulc_nodata = pygeoprocessing.get_nodata_from_uri(base_lulc_uri)
    pixel_size_out = pygeoprocessing.get_cell_size_from_uri(base_lulc_uri)
    mask_nodata = 2
    pygeoprocessing.vectorize_datasets(
        [base_lulc_uri], lambda x: x, output_landscape_raster_uri,
        gdal.GDT_Int32, lulc_nodata, pixel_size_out, "intersection",
        vectorize_op=False, datasets_are_pre_aligned=True)

    # convert everything furthest from edge for each of n_steps
    pixel_area_ha = (
            pygeoprocessing.get_cell_size_from_uri(base_lulc_uri)**2 / 10000.0)
    max_pixels_to_convert = int(math.ceil(area_to_convert / pixel_area_ha))
    convertible_type_nodata = -1
    pixels_left_to_convert = max_pixels_to_convert
    pixels_to_convert = max_pixels_to_convert / n_steps
    stats_cache = collections.defaultdict(int)

    # pylint complains when these are defined inside the loop
    invert_mask = None
    distance_nodata = None

    for step_index in xrange(n_steps):
        LOGGER.info('step %d of %d', step_index+1, n_steps)
        pixels_left_to_convert -= pixels_to_convert

        # Often the last segement of the steps will overstep the  number of
        # pixels to convert, this check converts the exact amount
        if pixels_left_to_convert < 0:
            pixels_to_convert += pixels_left_to_convert

        # create distance transforms for inside and outside the base lulc codes
        LOGGER.info('create distance transform for current landcover')
        for invert_mask, mask_id, distance_id in [
            (False, 'non_base_mask', 'distance_from_non_base_mask_edge'),
            (True, 'base_mask', 'distance_from_base_mask_edge')]:

            def _mask_base_op(lulc_array):
                """Create a mask of valid non-base pixels only."""
                base_mask = numpy.in1d(
                    lulc_array.flatten(), focal_landcover_codes).reshape(
                    lulc_array.shape)
                if invert_mask:
                    base_mask = ~base_mask
                return numpy.where(
                    lulc_array == lulc_nodata, mask_nodata, base_mask)
            pygeoprocessing.vectorize_datasets(
                [output_landscape_raster_uri], _mask_base_op,
                tmp_file_registry[mask_id], gdal.GDT_Byte,
                mask_nodata, pixel_size_out, "intersection",
                vectorize_op=False, datasets_are_pre_aligned=True)

            # create distance transform for the current mask
            pygeoprocessing.distance_transform_edt(
                tmp_file_registry[mask_id], tmp_file_registry[distance_id])

        # combine inner and outer distance transforms into one
        distance_nodata = pygeoprocessing.get_nodata_from_uri(
            tmp_file_registry['distance_from_base_mask_edge'])

        def _combine_masks(base_distance_array, non_base_distance_array):
            """create a mask of valid non-base pixels only."""
            result = non_base_distance_array
            valid_base_mask = base_distance_array > 0.0
            result[valid_base_mask] = base_distance_array[valid_base_mask]
            return result
        pygeoprocessing.vectorize_datasets(
            [tmp_file_registry['distance_from_base_mask_edge'],
             tmp_file_registry['distance_from_non_base_mask_edge']],
            _combine_masks, tmp_file_registry['distance_from_edge'],
            gdal.GDT_Float32, distance_nodata, pixel_size_out, "intersection",
            vectorize_op=False, datasets_are_pre_aligned=True)

        # smooth the distance transform to avoid scanline artifacts
        pygeoprocessing.convolve_2d_uri(
            tmp_file_registry['distance_from_edge'],
            tmp_file_registry['gaussian_kernel'],
            smooth_distance_from_edge_uri)

        # turn inside and outside masks into a single mask
        def _mask_to_convertible_codes(distance_from_base_edge, lulc):
            """Mask out the distance transform to a set of lucodes."""
            convertible_mask = numpy.in1d(
                lulc.flatten(), convertible_type_list).reshape(lulc.shape)
            return numpy.where(
                convertible_mask, distance_from_base_edge,
                convertible_type_nodata)
        pygeoprocessing.vectorize_datasets(
            [smooth_distance_from_edge_uri, output_landscape_raster_uri],
            _mask_to_convertible_codes,
            tmp_file_registry['convertible_distances'], gdal.GDT_Float32,
            convertible_type_nodata, pixel_size_out, "intersection",
            vectorize_op=False, datasets_are_pre_aligned=True)

        LOGGER.info(
            'convert %d pixels to lucode %d', pixels_to_convert,
            replacement_lucode)
        _convert_by_score(
            tmp_file_registry['convertible_distances'], pixels_to_convert,
            output_landscape_raster_uri, replacement_lucode, stats_cache,
            score_weight)

    _log_stats(stats_cache, pixel_area_ha, stats_uri)
    for filename in tmp_file_registry.values():
        os.remove(filename)
def execute(args):
    """This function invokes the seasonal water yield model given
        URI inputs of files. It may write log, warning, or error messages to
        stdout.
    """

    alpha_m = float(fractions.Fraction(args['alpha_m']))
    beta_i = float(fractions.Fraction(args['beta_i']))
    gamma = float(fractions.Fraction(args['gamma']))

    try:
        file_suffix = args['results_suffix']
        if file_suffix != "" and not file_suffix.startswith('_'):
            file_suffix = '_' + file_suffix
    except KeyError:
        file_suffix = ''

    pygeoprocessing.geoprocessing.create_directories([args['workspace_dir']])

    qfi_uri = os.path.join(args['workspace_dir'], 'qf%s.tif' % file_suffix)
    cn_uri = os.path.join(args['workspace_dir'], 'cn%s.tif' % file_suffix)

    lulc_uri_aligned = pygeoprocessing.temporary_filename()
    dem_uri_aligned = pygeoprocessing.temporary_filename()

    pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri(
        args['lulc_uri'])

    LOGGER.info('Aligning and clipping dataset list')
    input_align_list = [args['lulc_uri'], args['dem_uri']]
    output_align_list = [lulc_uri_aligned, dem_uri_aligned]

    if not args['user_defined_recharge']:
        precip_uri_list = []
        et0_uri_list = []

        et0_dir_list = [
            os.path.join(args['et0_dir'], f) for f in os.listdir(args['et0_dir'])]
        precip_dir_list = [
            os.path.join(args['precip_dir'], f) for f in os.listdir(
                args['precip_dir'])]

        qf_monthly_uri_list = []
        for m_index in range(1, N_MONTHS + 1):
            qf_monthly_uri_list.append(
                os.path.join(
                    args['workspace_dir'], 'qf_%d%s.tif' %
                    (m_index, file_suffix)))

        for month_index in range(1, N_MONTHS + 1):
            month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index)

            for data_type, dir_list, uri_list in [
                    ('et0', et0_dir_list, et0_uri_list),
                    ('Precip', precip_dir_list, precip_uri_list)]:

                file_list = [x for x in dir_list if month_file_match.match(x)]
                if len(file_list) == 0:
                    raise ValueError(
                        "No %s found for month %d" % (data_type, month_index))
                if len(file_list) > 1:
                    raise ValueError(
                        "Ambiguous set of files found for month %d: %s" %
                        (month_index, file_list))
                uri_list.append(file_list[0])

        soil_group_uri_aligned = pygeoprocessing.temporary_filename()

        #pre align all the datasets
        precip_uri_aligned_list = [
            pygeoprocessing.geoprocessing.temporary_filename() for _ in
            range(len(precip_uri_list))]
        et0_uri_aligned_list = [
            pygeoprocessing.geoprocessing.temporary_filename() for _ in
            range(len(precip_uri_list))]
        input_align_list = (
            precip_uri_list + [args['soil_group_uri']] + et0_uri_list +
            input_align_list)
        output_align_list = (
            precip_uri_aligned_list + [soil_group_uri_aligned] +
            et0_uri_aligned_list + output_align_list)

    interpolate_list = ['nearest'] * len(input_align_list)
    align_index = 0
    if args['user_defined_recharge']:
        input_align_list.append(args['recharge_uri'])
        recharge_aligned_uri = (
            pygeoprocessing.geoprocessing.temporary_filename())
        output_align_list.append(recharge_aligned_uri)
        interpolate_list.append('nearest')
        align_index = len(interpolate_list) - 1

    pygeoprocessing.geoprocessing.align_dataset_list(
        input_align_list, output_align_list,
        interpolate_list,
        pixel_size, 'intersection', align_index, aoi_uri=args['aoi_uri'],
        assert_datasets_projected=True)

    flow_dir_uri = os.path.join(
        args['workspace_dir'], 'flow_dir%s.tif' % file_suffix)
    LOGGER.info('calc flow direction')
    pygeoprocessing.routing.flow_direction_d_inf(dem_uri_aligned, flow_dir_uri)

    flow_accum_uri = os.path.join(
        args['workspace_dir'], 'flow_accum%s.tif' % file_suffix)
    LOGGER.info('calc flow accumulation')
    pygeoprocessing.routing.flow_accumulation(
        flow_dir_uri, dem_uri_aligned, flow_accum_uri)
    stream_uri = os.path.join(
        args['workspace_dir'], 'stream%s.tif' % file_suffix)
    threshold_flow_accumulation = 1000
    pygeoprocessing.routing.stream_threshold(
        flow_accum_uri, threshold_flow_accumulation, stream_uri)

    LOGGER.info('calculating flow weights')
    outflow_weights_uri = os.path.join(
        args['workspace_dir'], 'outflow_weights%s.tif' % file_suffix)
    outflow_direction_uri = os.path.join(
        args['workspace_dir'], 'outflow_direction%s.tif' % file_suffix)
    seasonal_water_yield_core.calculate_flow_weights(
        flow_dir_uri, outflow_weights_uri, outflow_direction_uri)


    si_uri = os.path.join(args['workspace_dir'], 'si%s.tif' % file_suffix)

    biophysical_table = pygeoprocessing.geoprocessing.get_lookup_from_table(
        args['biophysical_table_uri'], 'lucode')

    kc_lookup = dict([
        (lucode, biophysical_table[lucode]['kc']) for lucode in
        biophysical_table])

    recharge_avail_uri = os.path.join(
        args['workspace_dir'], 'recharge_avail%s.tif' % file_suffix)
    r_sum_avail_uri = os.path.join(
        args['workspace_dir'], 'r_sum_avail%s.tif' % file_suffix)
    vri_uri = os.path.join(args['workspace_dir'], 'vri%s.tif' % file_suffix)
    aet_uri = os.path.join(args['workspace_dir'], 'aet%s.tif' % file_suffix)

    r_sum_avail_pour_uri = os.path.join(
        args['workspace_dir'], 'r_sum_avail_pour%s.tif' % file_suffix)
    sf_uri = os.path.join(
        args['workspace_dir'], 'sf%s.tif' % file_suffix)
    sf_down_uri = os.path.join(
        args['workspace_dir'], 'sf_down%s.tif' % file_suffix)
    qb_out_uri = os.path.join(
        args['workspace_dir'], 'qb%s.txt' % file_suffix)

    LOGGER.info('classifying kc')
    kc_uri = os.path.join(args['workspace_dir'], 'kc%s.tif' % file_suffix)
    pygeoprocessing.geoprocessing.reclassify_dataset_uri(
        lulc_uri_aligned, kc_lookup, kc_uri, gdal.GDT_Float32, -1)

    LOGGER.info('calculate slow flow')
    if not args['user_defined_recharge']:
        LOGGER.info('loading number of monthly events')
        rain_events_lookup = (
            pygeoprocessing.geoprocessing.get_lookup_from_table(
                args['rain_events_table_uri'], 'month'))
        n_events = dict([
            (month, rain_events_lookup[month]['events'])
            for month in rain_events_lookup])

        LOGGER.info('calculating curve number')
        soil_nodata = pygeoprocessing.get_nodata_from_uri(
            args['soil_group_uri'])
        map_soil_type_to_header = {
            1: 'cn_a',
            2: 'cn_b',
            3: 'cn_c',
            4: 'cn_d',
        }
        cn_nodata = -1
        lulc_to_soil = {}
        lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri_aligned)
        for soil_id, soil_column in map_soil_type_to_header.iteritems():
            lulc_to_soil[soil_id] = {
                'lulc_values': [],
                'cn_values': []
            }
            for lucode in sorted(biophysical_table.keys() + [lulc_nodata]):
                try:
                    lulc_to_soil[soil_id]['cn_values'].append(
                        biophysical_table[lucode][soil_column])
                    lulc_to_soil[soil_id]['lulc_values'].append(lucode)
                except KeyError:
                    if lucode == lulc_nodata:
                        lulc_to_soil[soil_id]['lulc_values'].append(lucode)
                        lulc_to_soil[soil_id]['cn_values'].append(cn_nodata)
                    else:
                        raise
            lulc_to_soil[soil_id]['lulc_values'] = (
                numpy.array(lulc_to_soil[soil_id]['lulc_values'],
                        dtype=numpy.int32))
            lulc_to_soil[soil_id]['cn_values'] = (
                numpy.array(lulc_to_soil[soil_id]['cn_values'],
                        dtype=numpy.float32))

        def cn_op(lulc_array, soil_group_array):
            """map lulc code and soil to a curve number"""
            cn_result = numpy.empty(lulc_array.shape)
            cn_result[:] = cn_nodata
            for soil_group_id in numpy.unique(soil_group_array):
                if soil_group_id == soil_nodata:
                    continue
                current_soil_mask = (soil_group_array == soil_group_id)
                index = numpy.digitize(
                    lulc_array.ravel(),
                    lulc_to_soil[soil_group_id]['lulc_values'], right=True)
                cn_values = (
                    lulc_to_soil[soil_group_id]['cn_values'][index]).reshape(
                        lulc_array.shape)
                cn_result[current_soil_mask] = cn_values[current_soil_mask]
            return cn_result

        cn_nodata = -1
        pygeoprocessing.vectorize_datasets(
            [lulc_uri_aligned, soil_group_uri_aligned], cn_op, cn_uri,
            gdal.GDT_Float32, cn_nodata, pixel_size, 'intersection',
            vectorize_op=False, datasets_are_pre_aligned=True)

        LOGGER.info('calculate quick flow')
        calculate_quick_flow(
            precip_uri_aligned_list,
            lulc_uri_aligned, cn_uri, n_events, stream_uri, qfi_uri,
            qf_monthly_uri_list, si_uri)

        recharge_uri = os.path.join(
            args['workspace_dir'], 'recharge%s.tif' % file_suffix)
        seasonal_water_yield_core.calculate_recharge(
            precip_uri_aligned_list, et0_uri_aligned_list, qf_monthly_uri_list,
            flow_dir_uri, outflow_weights_uri, outflow_direction_uri,
            dem_uri_aligned, lulc_uri_aligned, kc_lookup, alpha_m, beta_i,
            gamma, stream_uri, recharge_uri, recharge_avail_uri,
            r_sum_avail_uri, aet_uri, kc_uri)
    else:
        recharge_uri = recharge_aligned_uri
        recharge_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri(
            recharge_uri)
        def calc_recharge_avail(recharge_array):
            recharge_threshold = recharge_array * gamma
            recharge_threshold[recharge_threshold < 0] = 0.0
            return numpy.where(
                recharge_array != recharge_nodata,
                recharge_threshold, recharge_nodata)

        #calc recharge avail
        pygeoprocessing.geoprocessing.vectorize_datasets(
            [recharge_aligned_uri], calc_recharge_avail, recharge_avail_uri,
            gdal.GDT_Float32, recharge_nodata, pixel_size, 'intersection',
            vectorize_op=False, datasets_are_pre_aligned=True)
        #calc r_sum_avail with flux accumulation
        loss_uri = pygeoprocessing.geoprocessing.temporary_filename()
        zero_absorption_source_uri = (
            pygeoprocessing.geoprocessing.temporary_filename())
        pygeoprocessing.make_constant_raster_from_base_uri(
            dem_uri_aligned, 0.0, zero_absorption_source_uri)

        pygeoprocessing.routing.route_flux(
            flow_dir_uri, dem_uri_aligned, recharge_avail_uri,
            zero_absorption_source_uri, loss_uri, r_sum_avail_uri, 'flux_only',
            include_source=False)

    #calcualte Qb as the sum of recharge_avail over the aoi
    qb_results = pygeoprocessing.geoprocessing.aggregate_raster_values_uri(
        recharge_avail_uri, args['aoi_uri'])

    qb_result = qb_results.total[9999] / qb_results.n_pixels[9999]
    #9999 is the value used to index fields if no shapefile ID is provided
    qb_file = open(qb_out_uri, 'w')
    qb_file.write("%f\n" % qb_result)
    qb_file.close()
    LOGGER.info("Qb = %f", qb_result)

    pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri(
        recharge_uri)
    ri_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri(recharge_uri)

    def vri_op(ri_array):
        """calc vri index"""
        return numpy.where(
            ri_array != ri_nodata, ri_array / qb_result, ri_nodata)

    pygeoprocessing.geoprocessing.vectorize_datasets(
        [recharge_uri], vri_op, vri_uri,
        gdal.GDT_Float32, ri_nodata, pixel_size, 'intersection',
        vectorize_op=False, datasets_are_pre_aligned=True)

    LOGGER.info('calculating r_sum_avail_pour')
    seasonal_water_yield_core.calculate_r_sum_avail_pour(
        r_sum_avail_uri, outflow_weights_uri, outflow_direction_uri,
        r_sum_avail_pour_uri)

    LOGGER.info('calculating slow flow')
    print dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri,\
        r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri,\
        stream_uri, sf_uri, sf_down_uri

    seasonal_water_yield_core.route_sf(
        dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri,
        r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri,
        stream_uri, sf_uri, sf_down_uri)

    LOGGER.info('  (\\w/)  SWY Complete!')
    LOGGER.info('  (..  \\ ')
    LOGGER.info(' _/  )  \\______')
    LOGGER.info('(oo /\'\\        )`,')
    LOGGER.info(' `--\' (v  __( / ||')
    LOGGER.info('       |||  ||| ||')
    LOGGER.info('      //_| //_|')
Beispiel #10
0
def _calc_cost_of_per_hectare_inputs(vars_dict, crop, lulc_raster):
    '''
    CostPerHectareInputTotal_crop = Mask_raster * CostPerHectare_input *
        ha_per_cell
    '''

    # Determine the crop lucode based on its name
    crop_lucode = None
    for lucode, luname in vars_dict['crop_lookup_dict'].iteritems():
        if luname == crop:
            crop_lucode = lucode
            continue

    lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_raster.uri)
    economics_table_crop = vars_dict['economics_table_dict'][crop]
    datatype_out = gdal.GDT_Float32
    nodata_out = NODATA_FLOAT
    pixel_size_out = pygeoprocessing.get_cell_size_from_uri(lulc_raster.uri)
    ha_per_m2 = 0.0001
    cell_area_ha = pixel_size_out**2 * ha_per_m2

    # The scalar cost is identical for all crop pixels of the current class,
    # and is based on the presence of absence of columns in the user-provided
    # economics table.  We only need to calculate this once.
    cost_scalar = 0.0
    for key in ['cost_labor_per_ha', 'cost_machine_per_ha', 'cost_seed_per_ha', 'cost_irrigation_per_ha']:
        try:
            cost_scalar += (economics_table_crop[key] * cell_area_ha)
        except KeyError:
            LOGGER.warning('Key missing from economics table: %s', key)

    def _calculate_cost(lulc_matrix):
        """
        Calculate the total cost on a single pixel.

        <pseudocode>
            If lulc_pixel is nodata:
                return nodata
            else:
                if lulc_pixel is of our crop type:
                    return the cost of this crop (in cost_scalar, above)
                else:
                    return 0.0
        </pseudocode>
        """
        return np.where(lulc_matrix == lulc_nodata, nodata_out,
                        np.where(lulc_matrix == crop_lucode, cost_scalar, 0.0))

    new_raster_uri = pygeoprocessing.geoprocessing.temporary_filename()
    pygeoprocessing.vectorize_datasets(
        [lulc_raster.uri],
        _calculate_cost,
        new_raster_uri,
        datatype_out,
        nodata_out,
        pixel_size_out,
        bounding_box_mode='intersection',
        vectorize_op=False,
        datasets_are_pre_aligned=True
    )

    return Raster.from_file(new_raster_uri, 'GTiff')
Beispiel #11
0
def execute(args):
    """Main entry point for GLOBIO model.

        The model operates in two modes.  Mode (a) generates a landcover map
            based on a base landcover map and information about crop yields,
            infrastructure, and more.  Mode (b) assumes the globio landcover
            map is generated.  These modes are used below to describe input
            parameters.

        args['workspace_dir'] - (string) output directory for intermediate,
            temporary, and final files
        args['predefined_globio'] - (boolean) if True then "mode (b)" else
            "mode (a)"
        args['results_suffix'] - (optional) (string) string to append to any
            output files
        args['lulc_uri'] - (string) used in "mode (a)" path to a base landcover
            map with integer codes
        args['lulc_to_globio_table_uri'] - (string) used in "mode (a)" path to
            table that translates the land-cover args['lulc_uri'] to
            intermediate GLOBIO classes, from which they will be further
            differentiated using the additional data in the model.

                'lucode': Land use and land cover class code of the dataset
                    used. LULC codes match the 'values' column in the LULC
                    raster of mode (b) and must be numeric and unique.
                'globio_lucode': The LULC code corresponding to the GLOBIO class
                    to which it should be converted, using intermediate codes
                    described in the example below.

        args['infrastructure_dir'] - (string) used in "mode (a)" a path to a
            folder containing maps of any forms of infrastructure to
            consider in the calculation of MSAI. These data may be in either
            raster or vector format.
        args['pasture_uri'] - (string) used in "mode (a)" path to pasture raster
        args['potential_vegetation_uri'] - (string) used in "mode (a)" path to
            potential vegetation raster
        args['intensification_uri'] - (string) used in "mode (a)" a path to
            intensification raster
        args['pasture_threshold'] - (float) used in "mode (a)"
        args['intensification_threshold'] - (float) used in "mode (a)"
        args['primary_threshold'] - (float) used in "mode (a)"
        args['msa_parameters_uri'] - (string) path to MSA classification
            parameters
        args['aoi_uri'] - (string) (optional) if it exists then final MSA raster
            is summarized by AOI
        args['globio_lulc_uri'] - (string) used in "mode (b)" path to predefined
            globio raster.
    """

    msa_parameter_table = load_msa_parameter_table(args['msa_parameters_uri'])

    #append a _ to the suffix if it's not empty and doens't already have one
    try:
        file_suffix = args['results_suffix']
        if file_suffix != "" and not file_suffix.startswith('_'):
            file_suffix = '_' + file_suffix
    except KeyError:
        file_suffix = ''

    #create working directories
    output_dir = os.path.join(args['workspace_dir'], 'output')
    intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate')
    tmp_dir = os.path.join(args['workspace_dir'], 'tmp')

    pygeoprocessing.geoprocessing.create_directories(
        [output_dir, intermediate_dir, tmp_dir])

    #the cell size should be based on the landcover map
    if not args['predefined_globio']:
        out_pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri(
            args['lulc_uri'])
        globio_lulc_uri = _calculate_globio_lulc_map(args, file_suffix,
                                                     intermediate_dir, tmp_dir,
                                                     out_pixel_size)
    else:
        out_pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri(
            args['globio_lulc_uri'])
        LOGGER.info('no need to calcualte GLOBIO LULC because it is passed in')
        globio_lulc_uri = args['globio_lulc_uri']

    globio_nodata = pygeoprocessing.get_nodata_from_uri(globio_lulc_uri)

    #load the infrastructure layers from disk
    infrastructure_filenames = []
    infrastructure_nodata_list = []
    for root_directory, _, filename_list in os.walk(
            args['infrastructure_dir']):

        for filename in filename_list:
            if filename.lower().endswith(".tif"):
                infrastructure_filenames.append(
                    os.path.join(root_directory, filename))
                infrastructure_nodata_list.append(
                    pygeoprocessing.geoprocessing.get_nodata_from_uri(
                        infrastructure_filenames[-1]))
            if filename.lower().endswith(".shp"):
                infrastructure_tmp_raster = (os.path.join(
                    tmp_dir, os.path.basename(filename.lower() + ".tif")))
                pygeoprocessing.geoprocessing.new_raster_from_base_uri(
                    globio_lulc_uri,
                    infrastructure_tmp_raster,
                    'GTiff',
                    -1.0,
                    gdal.GDT_Int32,
                    fill_value=0)
                pygeoprocessing.geoprocessing.rasterize_layer_uri(
                    infrastructure_tmp_raster,
                    os.path.join(root_directory, filename),
                    burn_values=[1],
                    option_list=["ALL_TOUCHED=TRUE"])
                infrastructure_filenames.append(infrastructure_tmp_raster)
                infrastructure_nodata_list.append(
                    pygeoprocessing.geoprocessing.get_nodata_from_uri(
                        infrastructure_filenames[-1]))

    if len(infrastructure_filenames) == 0:
        raise ValueError(
            "infrastructure directory didn't have any GeoTIFFS or "
            "Shapefiles at %s", args['infrastructure_dir'])

    infrastructure_nodata = -1
    infrastructure_uri = os.path.join(
        intermediate_dir, 'combined_infrastructure%s.tif' % file_suffix)

    def _collapse_infrastructure_op(*infrastructure_array_list):
        """Combines all input infrastructure into a single map where if any
            pixel on the stack is 1 gets passed through, any nodata pixel
            masks out all of them"""
        nodata_mask = (
            infrastructure_array_list[0] == infrastructure_nodata_list[0])
        infrastructure_result = infrastructure_array_list[0] > 0
        for index in range(1, len(infrastructure_array_list)):
            current_nodata = (infrastructure_array_list[index] ==
                              infrastructure_nodata_list[index])

            infrastructure_result = (infrastructure_result | (
                (infrastructure_array_list[index] > 0) & ~current_nodata))

            nodata_mask = (nodata_mask & current_nodata)

        return numpy.where(nodata_mask, infrastructure_nodata,
                           infrastructure_result)

    LOGGER.info('collapse infrastructure into one raster')
    pygeoprocessing.geoprocessing.vectorize_datasets(
        infrastructure_filenames,
        _collapse_infrastructure_op,
        infrastructure_uri,
        gdal.GDT_Byte,
        infrastructure_nodata,
        out_pixel_size,
        "intersection",
        dataset_to_align_index=0,
        assert_datasets_projected=False,
        vectorize_op=False)

    #calc_msa_f
    primary_veg_mask_uri = os.path.join(tmp_dir,
                                        'primary_veg_mask%s.tif' % file_suffix)
    primary_veg_mask_nodata = -1

    def _primary_veg_mask_op(lulc_array):
        """masking out natural areas"""
        nodata_mask = lulc_array == globio_nodata
        result = (lulc_array == 1)
        return numpy.where(nodata_mask, primary_veg_mask_nodata, result)

    LOGGER.info("create mask of primary veg areas")
    pygeoprocessing.geoprocessing.vectorize_datasets(
        [globio_lulc_uri],
        _primary_veg_mask_op,
        primary_veg_mask_uri,
        gdal.GDT_Int32,
        primary_veg_mask_nodata,
        out_pixel_size,
        "intersection",
        dataset_to_align_index=0,
        assert_datasets_projected=False,
        vectorize_op=False)

    LOGGER.info('gaussian filter primary veg')
    sigma = 9.0
    gaussian_kernel_uri = os.path.join(tmp_dir,
                                       'gaussian_kernel%s.tif' % file_suffix)
    make_gaussian_kernel_uri(sigma, gaussian_kernel_uri)
    smoothed_primary_veg_mask_uri = os.path.join(
        tmp_dir, 'smoothed_primary_veg_mask%s.tif' % file_suffix)
    pygeoprocessing.geoprocessing.convolve_2d_uri(
        primary_veg_mask_uri, gaussian_kernel_uri,
        smoothed_primary_veg_mask_uri)

    primary_veg_smooth_uri = os.path.join(
        intermediate_dir, 'primary_veg_smooth%s.tif' % file_suffix)

    def _primary_veg_smooth_op(primary_veg_mask_array,
                               smoothed_primary_veg_mask):
        """mask out ffqi only where there's an ffqi"""
        return numpy.where(primary_veg_mask_array != primary_veg_mask_nodata,
                           primary_veg_mask_array * smoothed_primary_veg_mask,
                           primary_veg_mask_nodata)

    LOGGER.info('calculate primary_veg_smooth')
    pygeoprocessing.geoprocessing.vectorize_datasets(
        [primary_veg_mask_uri, smoothed_primary_veg_mask_uri],
        _primary_veg_smooth_op,
        primary_veg_smooth_uri,
        gdal.GDT_Float32,
        primary_veg_mask_nodata,
        out_pixel_size,
        "intersection",
        dataset_to_align_index=0,
        assert_datasets_projected=False,
        vectorize_op=False)

    msa_nodata = -1

    msa_f_table = msa_parameter_table['msa_f']
    msa_f_values = sorted(msa_f_table)

    def _msa_f_op(primary_veg_smooth):
        """calcualte msa fragmentation"""
        nodata_mask = primary_veg_mask_nodata == primary_veg_smooth

        msa_f = numpy.empty(primary_veg_smooth.shape)

        for value in reversed(msa_f_values):
            #special case if it's a > or < value
            if value == '>':
                msa_f[primary_veg_smooth > msa_f_table['>'][0]] = (
                    msa_f_table['>'][1])
            elif value == '<':
                continue
            else:
                msa_f[primary_veg_smooth <= value] = msa_f_table[value]

        if '<' in msa_f_table:
            msa_f[primary_veg_smooth < msa_f_table['<'][0]] = (
                msa_f_table['<'][1])

        msa_f[nodata_mask] = msa_nodata

        return msa_f

    LOGGER.info('calculate msa_f')
    msa_f_uri = os.path.join(output_dir, 'msa_f%s.tif' % file_suffix)
    pygeoprocessing.geoprocessing.vectorize_datasets(
        [primary_veg_smooth_uri],
        _msa_f_op,
        msa_f_uri,
        gdal.GDT_Float32,
        msa_nodata,
        out_pixel_size,
        "intersection",
        dataset_to_align_index=0,
        assert_datasets_projected=False,
        vectorize_op=False)

    #calc_msa_i
    msa_f_values = sorted(msa_f_table)
    msa_i_other_table = msa_parameter_table['msa_i_other']
    msa_i_primary_table = msa_parameter_table['msa_i_primary']
    msa_i_other_values = sorted(msa_i_other_table)
    msa_i_primary_values = sorted(msa_i_primary_table)

    def _msa_i_op(lulc_array, distance_to_infrastructure):
        """calculate msa infrastructure"""

        distance_to_infrastructure *= out_pixel_size  #convert to meters
        msa_i_primary = numpy.empty(lulc_array.shape)
        msa_i_other = numpy.empty(lulc_array.shape)

        for value in reversed(msa_i_primary_values):
            #special case if it's a > or < value
            if value == '>':
                msa_i_primary[distance_to_infrastructure >
                              msa_i_primary_table['>'][0]] = (
                                  msa_i_primary_table['>'][1])
            elif value == '<':
                continue
            else:
                msa_i_primary[distance_to_infrastructure <= value] = (
                    msa_i_primary_table[value])

        if '<' in msa_i_primary_table:
            msa_i_primary[distance_to_infrastructure < msa_i_primary_table['<']
                          [0]] = (msa_i_primary_table['<'][1])

        for value in reversed(msa_i_other_values):
            #special case if it's a > or < value
            if value == '>':
                msa_i_other[distance_to_infrastructure > msa_i_other_table['>']
                            [0]] = (msa_i_other_table['>'][1])
            elif value == '<':
                continue
            else:
                msa_i_other[distance_to_infrastructure <= value] = (
                    msa_i_other_table[value])

        if '<' in msa_i_other_table:
            msa_i_other[distance_to_infrastructure < msa_i_other_table['<']
                        [0]] = (msa_i_other_table['<'][1])

        msa_i = numpy.where((lulc_array >= 1) & (lulc_array <= 5),
                            msa_i_primary, 1.0)
        msa_i = numpy.where((lulc_array >= 6) & (lulc_array <= 12),
                            msa_i_other, msa_i)
        return msa_i

    LOGGER.info('calculate msa_i')
    distance_to_infrastructure_uri = os.path.join(
        intermediate_dir, 'distance_to_infrastructure%s.tif' % file_suffix)
    pygeoprocessing.geoprocessing.distance_transform_edt(
        infrastructure_uri, distance_to_infrastructure_uri)
    msa_i_uri = os.path.join(output_dir, 'msa_i%s.tif' % file_suffix)
    pygeoprocessing.geoprocessing.vectorize_datasets(
        [globio_lulc_uri, distance_to_infrastructure_uri],
        _msa_i_op,
        msa_i_uri,
        gdal.GDT_Float32,
        msa_nodata,
        out_pixel_size,
        "intersection",
        dataset_to_align_index=0,
        assert_datasets_projected=False,
        vectorize_op=False)

    #calc_msa_lu
    msa_lu_uri = os.path.join(output_dir, 'msa_lu%s.tif' % file_suffix)
    LOGGER.info('calculate msa_lu')
    pygeoprocessing.geoprocessing.reclassify_dataset_uri(
        globio_lulc_uri,
        msa_parameter_table['msa_lu'],
        msa_lu_uri,
        gdal.GDT_Float32,
        globio_nodata,
        exception_flag='values_required')

    LOGGER.info('calculate msa')
    msa_uri = os.path.join(output_dir, 'msa%s.tif' % file_suffix)

    def _msa_op(msa_f, msa_lu, msa_i):
        """Calculate the MSA which is the product of the sub msas"""
        return numpy.where(msa_f != globio_nodata, msa_f * msa_lu * msa_i,
                           globio_nodata)

    pygeoprocessing.geoprocessing.vectorize_datasets(
        [msa_f_uri, msa_lu_uri, msa_i_uri],
        _msa_op,
        msa_uri,
        gdal.GDT_Float32,
        msa_nodata,
        out_pixel_size,
        "intersection",
        dataset_to_align_index=0,
        assert_datasets_projected=False,
        vectorize_op=False)

    if 'aoi_uri' in args:
        #copy the aoi to an output shapefile
        original_datasource = ogr.Open(args['aoi_uri'])
        summary_aoi_uri = os.path.join(output_dir,
                                       'aoi_summary%s.shp' % file_suffix)
        #If there is already an existing shapefile with the same name and path,
        # delete it
        if os.path.isfile(summary_aoi_uri):
            os.remove(summary_aoi_uri)
        #Copy the input shapefile into the designated output folder
        esri_driver = ogr.GetDriverByName('ESRI Shapefile')
        datasource_copy = esri_driver.CopyDataSource(original_datasource,
                                                     summary_aoi_uri)
        layer = datasource_copy.GetLayer()
        msa_summary_field_def = ogr.FieldDefn('msa_mean', ogr.OFTReal)
        layer.CreateField(msa_summary_field_def)

        #make an identifying id per polygon that can be used for aggregation
        layer_defn = layer.GetLayerDefn()
        while True:
            #last 8 characters because shapefile fields are limited to 8 chars
            poly_id_field = str(uuid.uuid4())[-8:]
            if layer_defn.GetFieldIndex(poly_id_field) == -1:
                break
        layer_id_field = ogr.FieldDefn(poly_id_field, ogr.OFTInteger)
        layer.CreateField(layer_id_field)
        for poly_index, poly_feat in enumerate(layer):
            poly_feat.SetField(poly_id_field, poly_index)
            layer.SetFeature(poly_feat)
        layer.SyncToDisk()

        #aggregate by ID
        msa_summary = pygeoprocessing.aggregate_raster_values_uri(
            msa_uri, summary_aoi_uri, shapefile_field=poly_id_field)

        #add new column to output file
        for feature_id in xrange(layer.GetFeatureCount()):
            feature = layer.GetFeature(feature_id)
            key_value = feature.GetFieldAsInteger(poly_id_field)
            feature.SetField('msa_mean',
                             float(msa_summary.pixel_mean[key_value]))
            layer.SetFeature(feature)

        # don't need a random poly id anymore
        layer.DeleteField(layer_defn.GetFieldIndex(poly_id_field))
Beispiel #12
0
    aoi_uri=yosemite_vector,
)

# Next we need to calculate the slope layer.
LOGGER.info('Calculating slope')
slope_raster = os.path.join(OUTPUT_DIR, 'slope.tif')
pygeoprocessing.calculate_slope(
    dem_dataset_uri=joined_dem,
    slope_uri=slope_raster)

# OK!  Now we add it all together with a call to vectorize_datasets
LOGGER.info('Finding high-elevation, steep grasslands')
lulc = '/data/landcover.tif'

# segfault if I do this: gdal.Open(lulc).GetRasterBand(1).GetNoDataValue()
lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc)
dem_nodata = pygeoprocessing.get_nodata_from_uri(joined_dem)
slope_nodata = pygeoprocessing.get_nodata_from_uri(slope_raster)

out_nodata = -1
def _find_grasslands(lulc_blk, dem_blk, slope_blk):
    # All blocks will be the same dimensions

    # Create a mask of invalid pixels due to nodata values
    valid_mask = ((lulc_blk != lulc_nodata) &
                  (dem_blk != dem_nodata) &
                  (slope_blk!= slope_nodata))

    # grasslands are lulc code 10
    matching_grasslands = ((lulc_blk[valid_mask] == 10) &
                           (slope_blk[valid_mask] >= 45) &
def _sort_to_disk(dataset_uri, score_weight=1.0, cache_element_size=2**25):
    """Sorts the non-nodata pixels in the dataset on disk and returns
    an iterable in sorted order.

    Parameters:
        dataset_uri (string): a path to a floating point GDAL dataset
        score_weight (float): a number to multiply all values by, which can be
            used to reverse the order of the iteration if negative.
        cache_element_size (int): approximate number of single elements to hold
            in memory before flushing to disk.  Due to the internal blocksize
            of the input raster, it is possible this cache could go over
            this value by that size before the cache is flushed.

    Returns:
        an iterable that produces (value * score_weight, flat_index) in
        decreasing sorted order by value * score_weight"""
    def _read_score_index_from_disk(score_file_name,
                                    index_file_name,
                                    buffer_size=4 * 10000):
        """Generator to yield a float/int value from the given filenames.
        reads a buffer of `buffer_size` big before to avoid keeping the
        file open between generations."""

        score_buffer = ''
        index_buffer = ''
        file_offset = 0
        buffer_offset = 1  # initialize to 1 to trigger the first load

        while True:
            if buffer_offset > len(score_buffer):
                score_file = open(score_file_name, 'rb')
                index_file = open(index_file_name, 'rb')
                score_file.seek(file_offset)
                index_file.seek(file_offset)

                score_buffer = score_file.read(buffer_size)
                index_buffer = index_file.read(buffer_size)
                score_file.close()
                index_file.close()

                file_offset += buffer_size
                buffer_offset = 0
            packed_score = score_buffer[buffer_offset:buffer_offset + 4]
            packed_index = index_buffer[buffer_offset:buffer_offset + 4]
            buffer_offset += 4
            if not packed_score:
                break
            yield (struct.unpack('f', packed_score)[0],
                   struct.unpack('i', packed_index)[0])

    def _sort_cache_to_iterator(index_cache, score_cache):
        """Flushes the current cache to a heap and returns it

        Parameters:
            index_cache (1d numpy.array): contains flat indexes to the
                score pixels `score_cache`
            score_cache (1d numpy.array): contains score pixels

        Returns:
            Iterable to visit scores/indexes in increasing score order."""

        # sort the whole bunch to disk
        sort_index = score_cache.argsort()
        score_cache = score_cache[sort_index]
        index_cache = index_cache[sort_index]

        #Dump all the scores and indexes to disk
        score_file = tempfile.NamedTemporaryFile(delete=False)
        score_file.write(struct.pack('%sf' % score_cache.size, *score_cache))
        index_file = tempfile.NamedTemporaryFile(delete=False)
        index_file.write(struct.pack('%si' % index_cache.size, *index_cache))

        #Get the filename and register a command to delete it after the
        #interpreter exits
        score_file_name = score_file.name
        score_file.close()
        index_file_name = index_file.name
        index_file.close()

        def _remove_file(path):
            """Function to remove a file and handle exceptions to
                register in atexit."""
            try:
                os.remove(path)
            except OSError:
                # This happens if the file didn't exist, okay because
                # maybe we deleted it in a method
                pass

        atexit.register(_remove_file, score_file_name)
        atexit.register(_remove_file, index_file_name)
        return _read_score_index_from_disk(score_file_name, index_file_name)

    nodata = pygeoprocessing.get_nodata_from_uri(dataset_uri)
    nodata *= score_weight  # scale the nodata so they can be filtered out

    # This will be a list of file iterators we'll pass to heap.merge
    iters = []

    _, n_cols = pygeoprocessing.get_row_col_from_uri(dataset_uri)

    index_cache = numpy.empty((0, ), dtype=numpy.float32)
    score_cache = numpy.empty((0, ), dtype=numpy.int32)
    for scores_data, scores_block in pygeoprocessing.iterblocks(dataset_uri):
        # flatten and scale the results
        scores_block = scores_block.flatten() * score_weight

        col_coords, row_coords = numpy.meshgrid(
            xrange(scores_data['xoff'],
                   scores_data['xoff'] + scores_data['win_xsize']),
            xrange(scores_data['yoff'],
                   scores_data['yoff'] + scores_data['win_ysize']))

        flat_indexes = (col_coords + row_coords * n_cols).flatten()

        sort_index = scores_block.argsort()
        sorted_scores = scores_block[sort_index]
        sorted_indexes = flat_indexes[sort_index]

        # search for nodata values are so we can splice them out
        left_index = numpy.searchsorted(sorted_scores, nodata, side='left')
        right_index = numpy.searchsorted(sorted_scores, nodata, side='right')

        # remove nodata values and sort in decreasing order
        score_cache = numpy.concatenate(
            (score_cache, sorted_scores[0:left_index],
             sorted_scores[right_index::]))
        index_cache = numpy.concatenate(
            (index_cache, sorted_indexes[0:left_index],
             sorted_indexes[right_index::]))

        # check if we need to flush the cache
        if index_cache.size >= cache_element_size:
            iters.append(_sort_cache_to_iterator(index_cache, score_cache))
            index_cache = numpy.empty((0, ), dtype=numpy.float32)
            score_cache = numpy.empty((0, ), dtype=numpy.int32)

    iters.append(_sort_cache_to_iterator(index_cache, score_cache))
    return heapq.merge(*iters)
Beispiel #14
0
def execute(args):
    """InVEST Carbon Edge Model calculates the carbon due to edge effects in
    forest pixels.

    Parameters:
        args['workspace_dir'] (string): a uri to the directory that will write
            output and other temporary files during calculation. (required)
        args['results_suffix'] (string): a string to append to any output file
            name (optional)
        args['n_nearest_model_points'] (int): number of nearest neighbor model
            points to search for
        args['aoi_uri'] (string): (optional) if present, a path to a
            shapefile that will be used to aggregate carbon stock results at
            the end of the run.
        args['biophysical_table_uri'] (string): a path to a CSV table that has
            at least a header for an 'lucode', 'is_forest', and 'c_above'.
                'lucode': an integer that corresponds to landcover codes in
                    the raster args['lulc_uri']
                'is_forest': either 0 or 1 indicating whether the landcover
                    type is forest (1) or not (0).  If 1, the value in c_above
                    is ignored and instead calculated from the edge regression
                    model.
                'c_above': floating point number indicating tons of carbon per
                    hectare for that landcover type

                Example:
                    lucode, is_forest, c_above
                    0,0,32.8
                    1,1,n/a
                    2,1,n/a
                    16,0,28.1

                    Note the "n/a" are optional since that field is ignored
                    when is_forest==1.

        args['lulc_uri'] (string): path to a integer landcover code raster
        args['forest_edge_carbon_model_shape_uri'] (string): path to a
            shapefile that defines the regions for the local carbon edge
            models.  Has at least the fields 'method', 'theta1', 'theta2',
            'theta3'.  Where 'method' is an int between 1..3 describing the
            biomass regression model, and the thetas are floating point numbers
            that have different meanings depending on the 'method' parameter.
            Specifically,

                method 1 asymptotic model:
                    biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km)
                method 2 logarithmic model:
                    biomass = theta1 + theta2 * numpy.log(edge_dist_km)
                     (theta3 is ignored for this method)
                method 3 linear regression:
                    biomass = theta1 + theta2 * edge_dist_km

        args['biomass_to_carbon_conversion_factor'] (string/float): Number by
            which to multiply forest biomass to convert to carbon in the edge
            effect calculation.

    Returns:
        None"""

    output_dir = args['workspace_dir']
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    pygeoprocessing.create_directories([output_dir, intermediate_dir])
    try:
        file_suffix = args['results_suffix']
        if file_suffix != "" and not file_suffix.startswith('_'):
            file_suffix = '_' + file_suffix
    except KeyError:
        file_suffix = ''

    # used to keep track of files generated by this module
    output_file_registry = {
        'non_forest_carbon_stocks':
        os.path.join(intermediate_dir,
                     'non_forest_carbon_stocks%s.tif' % file_suffix),
        'edge_distance':
        os.path.join(intermediate_dir, 'edge_distance%s.tif' % file_suffix),
        'forest_edge_carbon_map':
        os.path.join(intermediate_dir,
                     'forest_edge_carbon_stocks%s.tif' % file_suffix),
        'carbon_map':
        os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix),
        'aoi_datasource':
        os.path.join(output_dir, 'aggregated_carbon_stocks.shp')
    }

    # Map non-forest landcover codes to carbon biomasses
    LOGGER.info('calculating non-forest carbon')
    _calculate_lulc_carbon_map(
        args['lulc_uri'], args['biophysical_table_uri'],
        output_file_registry['non_forest_carbon_stocks'])

    # generate a map of pixel distance to forest edge from the landcover map
    LOGGER.info('calculating distance from forest edge')
    _map_distance_from_forest_edge(args['lulc_uri'],
                                   args['biophysical_table_uri'],
                                   output_file_registry['edge_distance'])

    # Build spatial index for gridded global model for closest 3 points
    LOGGER.info('Building spatial index for forest edge models.')
    kd_tree, theta_model_parameters, method_model_parameter = (
        _build_spatial_index(args['lulc_uri'], intermediate_dir,
                             args['forest_edge_carbon_model_shape_uri']))

    # calculate the edge carbon effect on forests
    LOGGER.info('calculating forest edge carbon')
    _calculate_forest_edge_carbon_map(
        output_file_registry['edge_distance'], kd_tree, theta_model_parameters,
        method_model_parameter, int(args['n_nearest_model_points']),
        float(args['biomass_to_carbon_conversion_factor']),
        output_file_registry['forest_edge_carbon_map'])

    # combine maps into output
    LOGGER.info('combining forest and non forest carbon into single raster')
    cell_size_in_meters = pygeoprocessing.get_cell_size_from_uri(
        args['lulc_uri'])
    carbon_edge_nodata = pygeoprocessing.get_nodata_from_uri(
        output_file_registry['forest_edge_carbon_map'])

    def combine_carbon_maps(non_forest_carbon, forest_carbon):
        """This combines the forest and non forest maps into one"""
        return numpy.where(forest_carbon == carbon_edge_nodata,
                           non_forest_carbon, forest_carbon)

    pygeoprocessing.vectorize_datasets([
        output_file_registry['non_forest_carbon_stocks'],
        output_file_registry['forest_edge_carbon_map']
    ],
                                       combine_carbon_maps,
                                       output_file_registry['carbon_map'],
                                       gdal.GDT_Float32,
                                       carbon_edge_nodata,
                                       cell_size_in_meters,
                                       'intersection',
                                       vectorize_op=False,
                                       datasets_are_pre_aligned=True)

    # generate report (optional) by aoi if they exist
    if 'aoi_uri' in args:
        LOGGER.info('aggregating carbon map by aoi')
        _aggregate_carbon_map(args['aoi_uri'],
                              output_file_registry['carbon_map'],
                              output_file_registry['aoi_datasource'])