예제 #1
0
파일: regression.py 프로젝트: whigg/iceflow
def _compare_with_make_stack(stack_trend_file, pgp_trend_file, diff_file):
    """Compare trend of ``make_regression`` with trend from ``make_stack.py``.

    Comparison is done as a per-pixel diff on any pixel pairs where both
    pixels are not nodata.  If either pixel in a pixel stack is nodata, the
    stack is ignored and nodata is returned for that pixel value.

    The diff looks like this::

        diff_file = stack_trend_file - pgp_trend_file

    Parameters:
        stack_trend_file (string): The path to the trend raster output of
            ``make_stack.py`` (usually named ``stack_trend.tif``).  This
            file must exist on disk.
        pgp_trend_file (string): The path to the trend raster output from
            ``make_regression()``, also in this module.  This file must
            exist on disk.
        diff_file (string): The path to where the difference raster should be
            saved.

    Returns:
        ``None``"""
    stack_nodata = pygeoprocessing.get_nodata_from_uri(stack_trend_file)
    pgp_nodata = pygeoprocessing.get_nodata_from_uri(pgp_trend_file)

    def _diff(stack_trend, pgp_trend):
        """Calculate a diff between two matrices, ignoring nodata.

        Parameters:
            stack_trend (numpy.ndarray): Array of values from the stack trend
                raster.
            pgp_trend (numpy.ndarray): Array of values from the pygeoprocessing
                trend raster.

        Returns:
            ``numpy.ndarray`` of the difference between ``stack_trend`` and
            ``pgp_trend``"""
        valid_mask = ((stack_trend != stack_nodata) &
                      (pgp_trend != pgp_nodata))
        out_array = numpy.empty_like(stack_trend)
        out_array[:] = -9999
        out_array[valid_mask] = stack_trend[valid_mask] - pgp_trend[valid_mask]
        return out_array

    pygeoprocessing.vectorize_datasets(
        dataset_uri_list=[stack_trend_file, pgp_trend_file],
        dataset_pixel_op=_diff,
        dataset_out_uri=diff_file,
        datatype_out=gdal.GDT_Float32,
        nodata_out=-9999,
        pixel_size_out=32.,
        bounding_box_mode='intersection',
        vectorize_op=False,
        datasets_are_pre_aligned=False)
예제 #2
0
def _map_distance_from_forest_edge(lulc_uri, biophysical_table_uri,
                                   edge_distance_uri):
    """Generates a raster of forest edge distances where each pixel is the
    distance to the edge of the forest in meters.

    Parameters:
        lulc_uri (string): path to the landcover raster that contains integer
            landcover codes
        biophysical_table_uri (string): a path to a csv table that indexes
            landcover codes to forest type, contains at least the fields
            'lucode' (landcover integer code) and 'is_forest' (0 or 1 depending
            on landcover code type)
        edge_distance_uri (string): path to output raster where each pixel
            contains the euclidian pixel distance to nearest forest edges on
            all non-nodata values of lulc_uri

    Returns:
        None"""

    # Build a list of forest lucodes
    biophysical_table = pygeoprocessing.get_lookup_from_table(
        biophysical_table_uri, 'lucode')
    forest_codes = [
        lucode for (lucode, ludata) in biophysical_table.iteritems()
        if int(ludata['is_forest']) == 1
    ]

    # Make a raster where 1 is non-forest landcover types and 0 is forest
    forest_mask_nodata = 255
    lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri)

    def mask_non_forest_op(lulc_array):
        """converts forest lulc codes to 1"""
        non_forest_mask = ~numpy.in1d(lulc_array.flatten(),
                                      forest_codes).reshape(lulc_array.shape)
        nodata_mask = lulc_array == lulc_nodata
        return numpy.where(nodata_mask, forest_mask_nodata, non_forest_mask)

    non_forest_mask_uri = pygeoprocessing.temporary_filename()
    out_pixel_size = pygeoprocessing.get_cell_size_from_uri(lulc_uri)
    pygeoprocessing.vectorize_datasets([lulc_uri],
                                       mask_non_forest_op,
                                       non_forest_mask_uri,
                                       gdal.GDT_Byte,
                                       forest_mask_nodata,
                                       out_pixel_size,
                                       "intersection",
                                       vectorize_op=False)

    # Do the distance transform on non-forest pixels
    pygeoprocessing.distance_transform_edt(non_forest_mask_uri,
                                           edge_distance_uri)

    # good practice to delete temporary files when we're done with them
    os.remove(non_forest_mask_uri)
예제 #3
0
    def test_vect_datasets_identity_aoi(self):
        """PGP.geoprocessing: vectorize_datasets f(x)=x with AOI."""
        pixel_matrix = numpy.ones((5, 5), numpy.int16)
        reference = sampledata.SRS_COLOMBIA
        nodata = -1
        pygeoprocessing.testing.create_raster_on_disk(
            [pixel_matrix],
            reference.origin,
            reference.projection,
            nodata,
            reference.pixel_size(30),
            filename=self.raster_filename)

        polygons = [
            Polygon([
                (reference.origin[0] + reference.pixel_size(30)[0] * 0,
                 reference.origin[1] + reference.pixel_size(30)[1] * 0),
                (reference.origin[0] + reference.pixel_size(30)[0] * 5,
                 reference.origin[1] + reference.pixel_size(30)[1] * 0),
                (reference.origin[0] + reference.pixel_size(30)[0] * 5,
                 reference.origin[1] + reference.pixel_size(30)[1] * 5),
                (reference.origin[0] + reference.pixel_size(30)[0] * 0,
                 reference.origin[1] + reference.pixel_size(30)[1] * 5),
                (reference.origin[0] + reference.pixel_size(30)[0] * 0,
                 reference.origin[1] + reference.pixel_size(30)[1] * 0),
            ]),
        ]
        pygeoprocessing.testing.create_vector_on_disk(
            polygons, reference.projection, filename=self.aoi_filename)

        out_filename = pygeoprocessing.temporary_filename()
        pygeoprocessing.vectorize_datasets([self.raster_filename],
                                           lambda x: x,
                                           out_filename,
                                           gdal.GDT_Int32,
                                           nodata,
                                           30,
                                           'intersection',
                                           aoi_uri=self.aoi_filename)

        pygeoprocessing.testing.assert_rasters_equal(self.raster_filename,
                                                     out_filename,
                                                     rel_tol=1e-9)
예제 #4
0
    def test_vect_datasets_identity(self):
        """PGP.geoprocessing: vectorize_datasets f(x)=x."""
        pixel_matrix = numpy.ones((5, 5), numpy.int16)
        reference = sampledata.SRS_COLOMBIA
        nodata = -1
        pygeoprocessing.testing.create_raster_on_disk(
            [pixel_matrix],
            reference.origin,
            reference.projection,
            nodata,
            reference.pixel_size(30),
            filename=self.raster_filename)

        out_filename = pygeoprocessing.temporary_filename()
        pygeoprocessing.vectorize_datasets([self.raster_filename], lambda x: x,
                                           out_filename, gdal.GDT_Int32,
                                           nodata, 30, 'intersection')

        pygeoprocessing.testing.assert_rasters_equal(self.raster_filename,
                                                     out_filename,
                                                     rel_tol=1e-9)
예제 #5
0
    def test_vect_datasets_(self):
        """PGP.geoprocessing: vect..._datasets expected error for non-list."""
        pixel_matrix = numpy.ones((5, 5), numpy.int16)
        reference = sampledata.SRS_COLOMBIA
        nodata = -1
        pygeoprocessing.testing.create_raster_on_disk(
            [pixel_matrix],
            reference.origin,
            reference.projection,
            nodata,
            reference.pixel_size(30),
            filename=self.raster_filename)

        out_filename = pygeoprocessing.temporary_filename()
        with self.assertRaises(ValueError):
            # intentionally passing a filename rather than a list of files
            # to get an expected exception
            pygeoprocessing.vectorize_datasets(self.raster_filename,
                                               lambda x: x, out_filename,
                                               gdal.GDT_Int32, nodata, 30,
                                               'intersection')
예제 #6
0
def main():
    system = platform.platform()
    logfile_uri = 'md5_check_%s.log' % system
    logfile = open(logfile_uri, 'w')
    _write = lambda x: logfile.write(x + '\n')

    for base_raster in ['landuse_cur_200m.tif', 'gaussian.tif']:
        if base_raster == 'gaussian.tif':
            src_ds = 'landuse_cur_200m.tif'
            dest_ds = 'gaussian.tif'
            nodata = pygeoprocessing.get_nodata_from_uri(src_ds)
            pygeoprocessing.gaussian_filter_dataset_uri(
                src_ds, 4, dest_ds, nodata)

        _write(base_raster + '\n')
        base_nodata = pygeoprocessing.get_nodata_from_uri(base_raster)
        base_pixel_size = pygeoprocessing.get_cell_size_from_uri(base_raster)

        _write('System: %s' % system)
        _write('Python %s' % platform.python_version())
        _write('GDAL version: %s' % gdal.__version__)
        _write('numpy version: %s' % numpy.__version__)
        _write('scipy version: %s' % scipy.__version__)
        _write('base MD5sum: %s' % md5sum(base_raster))

        for gdal_type, gdal_type_label in GDAL_DTYPES.iteritems():
            if gdal_type_label in ['GDT_Unknown', 'GDT_TypeCount']:
                continue

            print gdal_type_label

            # convert the raster (via vectorize_datasets) to a new dtype
            new_uri = '%s.tif' % gdal_type_label
            pygeoprocessing.vectorize_datasets([base_raster], lambda x: x,
                new_uri, gdal_type, base_nodata, base_pixel_size, 'intersection')

            _write("%-15s: %s" % (gdal_type_label, md5sum(new_uri)))
        _write('\n')
def _convert_landscape(
        base_lulc_uri, replacement_lucode, area_to_convert,
        focal_landcover_codes, convertible_type_list, score_weight, n_steps,
        smooth_distance_from_edge_uri, output_landscape_raster_uri,
        stats_uri):
    """Expand replacement lucodes in relation to the focal lucodes.

    If the sign on `score_weight` is positive, expansion occurs marches
    away from the focal types, while if `score_weight` is negative conversion
    marches toward the focal types.

    Parameters:
        base_lulc_uri (string): path to landcover raster that will be used as
            the base landcover map to agriculture pixels
        replacement_lucode (int): agriculture landcover code type found in the
            raster at `base_lulc_uri`
        area_to_convert (float): area (Ha) to convert to agriculture
        focal_landcover_codes (list of int): landcover codes that are used to
            calculate proximity
        convertible_type_list (list of int): landcover codes that are allowable
            to be converted to agriculture
        score_weight (float): this value is used to multiply the distance from
            the focal landcover types when prioritizing which pixels in
            `convertable_type_list` are to be converted.  If negative,
            conversion occurs toward the focal types, if positive occurs away
            from the focal types.
        n_steps (int): number of steps to convert the landscape.  On each step
            the distance transform will be applied on the
            current value of the `focal_landcover_codes` pixels in
            `output_landscape_raster_uri`.  On the first step the distance
            is calculated from `base_lulc_uri`.
        smooth_distance_from_edge_uri (string): an intermediate output showing
            the pixel distance from the edge of the base landcover types
        output_landscape_raster_uri (string): an output raster that will
            contain the final fragmented forest layer.
        stats_uri (string): a path to an output csv that records the number
            type, and area of pixels converted in `output_landscape_raster_uri`

    Returns:
        None.
    """
    tmp_file_registry = {
        'non_base_mask': pygeoprocessing.temporary_filename(),
        'base_mask': pygeoprocessing.temporary_filename(),
        'gaussian_kernel': pygeoprocessing.temporary_filename(),
        'distance_from_base_mask_edge': pygeoprocessing.temporary_filename(),
        'distance_from_non_base_mask_edge':
            pygeoprocessing.temporary_filename(),
        'convertible_distances': pygeoprocessing.temporary_filename(),
        'smooth_distance_from_edge': pygeoprocessing.temporary_filename(),
        'distance_from_edge': pygeoprocessing.temporary_filename(),
    }
    # a sigma of 1.0 gives nice visual results to smooth pixel level artifacts
    # since a pixel is the 1.0 unit
    _make_gaussian_kernel_uri(1.0, tmp_file_registry['gaussian_kernel'])

    # create the output raster first as a copy of the base landcover so it can
    # be looped on for each step
    lulc_nodata = pygeoprocessing.get_nodata_from_uri(base_lulc_uri)
    pixel_size_out = pygeoprocessing.get_cell_size_from_uri(base_lulc_uri)
    mask_nodata = 2
    pygeoprocessing.vectorize_datasets(
        [base_lulc_uri], lambda x: x, output_landscape_raster_uri,
        gdal.GDT_Int32, lulc_nodata, pixel_size_out, "intersection",
        vectorize_op=False, datasets_are_pre_aligned=True)

    # convert everything furthest from edge for each of n_steps
    pixel_area_ha = (
            pygeoprocessing.get_cell_size_from_uri(base_lulc_uri)**2 / 10000.0)
    max_pixels_to_convert = int(math.ceil(area_to_convert / pixel_area_ha))
    convertible_type_nodata = -1
    pixels_left_to_convert = max_pixels_to_convert
    pixels_to_convert = max_pixels_to_convert / n_steps
    stats_cache = collections.defaultdict(int)

    # pylint complains when these are defined inside the loop
    invert_mask = None
    distance_nodata = None

    for step_index in xrange(n_steps):
        LOGGER.info('step %d of %d', step_index+1, n_steps)
        pixels_left_to_convert -= pixels_to_convert

        # Often the last segement of the steps will overstep the  number of
        # pixels to convert, this check converts the exact amount
        if pixels_left_to_convert < 0:
            pixels_to_convert += pixels_left_to_convert

        # create distance transforms for inside and outside the base lulc codes
        LOGGER.info('create distance transform for current landcover')
        for invert_mask, mask_id, distance_id in [
            (False, 'non_base_mask', 'distance_from_non_base_mask_edge'),
            (True, 'base_mask', 'distance_from_base_mask_edge')]:

            def _mask_base_op(lulc_array):
                """Create a mask of valid non-base pixels only."""
                base_mask = numpy.in1d(
                    lulc_array.flatten(), focal_landcover_codes).reshape(
                    lulc_array.shape)
                if invert_mask:
                    base_mask = ~base_mask
                return numpy.where(
                    lulc_array == lulc_nodata, mask_nodata, base_mask)
            pygeoprocessing.vectorize_datasets(
                [output_landscape_raster_uri], _mask_base_op,
                tmp_file_registry[mask_id], gdal.GDT_Byte,
                mask_nodata, pixel_size_out, "intersection",
                vectorize_op=False, datasets_are_pre_aligned=True)

            # create distance transform for the current mask
            pygeoprocessing.distance_transform_edt(
                tmp_file_registry[mask_id], tmp_file_registry[distance_id])

        # combine inner and outer distance transforms into one
        distance_nodata = pygeoprocessing.get_nodata_from_uri(
            tmp_file_registry['distance_from_base_mask_edge'])

        def _combine_masks(base_distance_array, non_base_distance_array):
            """create a mask of valid non-base pixels only."""
            result = non_base_distance_array
            valid_base_mask = base_distance_array > 0.0
            result[valid_base_mask] = base_distance_array[valid_base_mask]
            return result
        pygeoprocessing.vectorize_datasets(
            [tmp_file_registry['distance_from_base_mask_edge'],
             tmp_file_registry['distance_from_non_base_mask_edge']],
            _combine_masks, tmp_file_registry['distance_from_edge'],
            gdal.GDT_Float32, distance_nodata, pixel_size_out, "intersection",
            vectorize_op=False, datasets_are_pre_aligned=True)

        # smooth the distance transform to avoid scanline artifacts
        pygeoprocessing.convolve_2d_uri(
            tmp_file_registry['distance_from_edge'],
            tmp_file_registry['gaussian_kernel'],
            smooth_distance_from_edge_uri)

        # turn inside and outside masks into a single mask
        def _mask_to_convertible_codes(distance_from_base_edge, lulc):
            """Mask out the distance transform to a set of lucodes."""
            convertible_mask = numpy.in1d(
                lulc.flatten(), convertible_type_list).reshape(lulc.shape)
            return numpy.where(
                convertible_mask, distance_from_base_edge,
                convertible_type_nodata)
        pygeoprocessing.vectorize_datasets(
            [smooth_distance_from_edge_uri, output_landscape_raster_uri],
            _mask_to_convertible_codes,
            tmp_file_registry['convertible_distances'], gdal.GDT_Float32,
            convertible_type_nodata, pixel_size_out, "intersection",
            vectorize_op=False, datasets_are_pre_aligned=True)

        LOGGER.info(
            'convert %d pixels to lucode %d', pixels_to_convert,
            replacement_lucode)
        _convert_by_score(
            tmp_file_registry['convertible_distances'], pixels_to_convert,
            output_landscape_raster_uri, replacement_lucode, stats_cache,
            score_weight)

    _log_stats(stats_cache, pixel_area_ha, stats_uri)
    for filename in tmp_file_registry.values():
        os.remove(filename)
def execute(args):
    """This function invokes the seasonal water yield model given
        URI inputs of files. It may write log, warning, or error messages to
        stdout.
    """

    alpha_m = float(fractions.Fraction(args['alpha_m']))
    beta_i = float(fractions.Fraction(args['beta_i']))
    gamma = float(fractions.Fraction(args['gamma']))

    try:
        file_suffix = args['results_suffix']
        if file_suffix != "" and not file_suffix.startswith('_'):
            file_suffix = '_' + file_suffix
    except KeyError:
        file_suffix = ''

    pygeoprocessing.geoprocessing.create_directories([args['workspace_dir']])

    qfi_uri = os.path.join(args['workspace_dir'], 'qf%s.tif' % file_suffix)
    cn_uri = os.path.join(args['workspace_dir'], 'cn%s.tif' % file_suffix)

    lulc_uri_aligned = pygeoprocessing.temporary_filename()
    dem_uri_aligned = pygeoprocessing.temporary_filename()

    pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri(
        args['lulc_uri'])

    LOGGER.info('Aligning and clipping dataset list')
    input_align_list = [args['lulc_uri'], args['dem_uri']]
    output_align_list = [lulc_uri_aligned, dem_uri_aligned]

    if not args['user_defined_recharge']:
        precip_uri_list = []
        et0_uri_list = []

        et0_dir_list = [
            os.path.join(args['et0_dir'], f) for f in os.listdir(args['et0_dir'])]
        precip_dir_list = [
            os.path.join(args['precip_dir'], f) for f in os.listdir(
                args['precip_dir'])]

        qf_monthly_uri_list = []
        for m_index in range(1, N_MONTHS + 1):
            qf_monthly_uri_list.append(
                os.path.join(
                    args['workspace_dir'], 'qf_%d%s.tif' %
                    (m_index, file_suffix)))

        for month_index in range(1, N_MONTHS + 1):
            month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index)

            for data_type, dir_list, uri_list in [
                    ('et0', et0_dir_list, et0_uri_list),
                    ('Precip', precip_dir_list, precip_uri_list)]:

                file_list = [x for x in dir_list if month_file_match.match(x)]
                if len(file_list) == 0:
                    raise ValueError(
                        "No %s found for month %d" % (data_type, month_index))
                if len(file_list) > 1:
                    raise ValueError(
                        "Ambiguous set of files found for month %d: %s" %
                        (month_index, file_list))
                uri_list.append(file_list[0])

        soil_group_uri_aligned = pygeoprocessing.temporary_filename()

        #pre align all the datasets
        precip_uri_aligned_list = [
            pygeoprocessing.geoprocessing.temporary_filename() for _ in
            range(len(precip_uri_list))]
        et0_uri_aligned_list = [
            pygeoprocessing.geoprocessing.temporary_filename() for _ in
            range(len(precip_uri_list))]
        input_align_list = (
            precip_uri_list + [args['soil_group_uri']] + et0_uri_list +
            input_align_list)
        output_align_list = (
            precip_uri_aligned_list + [soil_group_uri_aligned] +
            et0_uri_aligned_list + output_align_list)

    interpolate_list = ['nearest'] * len(input_align_list)
    align_index = 0
    if args['user_defined_recharge']:
        input_align_list.append(args['recharge_uri'])
        recharge_aligned_uri = (
            pygeoprocessing.geoprocessing.temporary_filename())
        output_align_list.append(recharge_aligned_uri)
        interpolate_list.append('nearest')
        align_index = len(interpolate_list) - 1

    pygeoprocessing.geoprocessing.align_dataset_list(
        input_align_list, output_align_list,
        interpolate_list,
        pixel_size, 'intersection', align_index, aoi_uri=args['aoi_uri'],
        assert_datasets_projected=True)

    flow_dir_uri = os.path.join(
        args['workspace_dir'], 'flow_dir%s.tif' % file_suffix)
    LOGGER.info('calc flow direction')
    pygeoprocessing.routing.flow_direction_d_inf(dem_uri_aligned, flow_dir_uri)

    flow_accum_uri = os.path.join(
        args['workspace_dir'], 'flow_accum%s.tif' % file_suffix)
    LOGGER.info('calc flow accumulation')
    pygeoprocessing.routing.flow_accumulation(
        flow_dir_uri, dem_uri_aligned, flow_accum_uri)
    stream_uri = os.path.join(
        args['workspace_dir'], 'stream%s.tif' % file_suffix)
    threshold_flow_accumulation = 1000
    pygeoprocessing.routing.stream_threshold(
        flow_accum_uri, threshold_flow_accumulation, stream_uri)

    LOGGER.info('calculating flow weights')
    outflow_weights_uri = os.path.join(
        args['workspace_dir'], 'outflow_weights%s.tif' % file_suffix)
    outflow_direction_uri = os.path.join(
        args['workspace_dir'], 'outflow_direction%s.tif' % file_suffix)
    seasonal_water_yield_core.calculate_flow_weights(
        flow_dir_uri, outflow_weights_uri, outflow_direction_uri)


    si_uri = os.path.join(args['workspace_dir'], 'si%s.tif' % file_suffix)

    biophysical_table = pygeoprocessing.geoprocessing.get_lookup_from_table(
        args['biophysical_table_uri'], 'lucode')

    kc_lookup = dict([
        (lucode, biophysical_table[lucode]['kc']) for lucode in
        biophysical_table])

    recharge_avail_uri = os.path.join(
        args['workspace_dir'], 'recharge_avail%s.tif' % file_suffix)
    r_sum_avail_uri = os.path.join(
        args['workspace_dir'], 'r_sum_avail%s.tif' % file_suffix)
    vri_uri = os.path.join(args['workspace_dir'], 'vri%s.tif' % file_suffix)
    aet_uri = os.path.join(args['workspace_dir'], 'aet%s.tif' % file_suffix)

    r_sum_avail_pour_uri = os.path.join(
        args['workspace_dir'], 'r_sum_avail_pour%s.tif' % file_suffix)
    sf_uri = os.path.join(
        args['workspace_dir'], 'sf%s.tif' % file_suffix)
    sf_down_uri = os.path.join(
        args['workspace_dir'], 'sf_down%s.tif' % file_suffix)
    qb_out_uri = os.path.join(
        args['workspace_dir'], 'qb%s.txt' % file_suffix)

    LOGGER.info('classifying kc')
    kc_uri = os.path.join(args['workspace_dir'], 'kc%s.tif' % file_suffix)
    pygeoprocessing.geoprocessing.reclassify_dataset_uri(
        lulc_uri_aligned, kc_lookup, kc_uri, gdal.GDT_Float32, -1)

    LOGGER.info('calculate slow flow')
    if not args['user_defined_recharge']:
        LOGGER.info('loading number of monthly events')
        rain_events_lookup = (
            pygeoprocessing.geoprocessing.get_lookup_from_table(
                args['rain_events_table_uri'], 'month'))
        n_events = dict([
            (month, rain_events_lookup[month]['events'])
            for month in rain_events_lookup])

        LOGGER.info('calculating curve number')
        soil_nodata = pygeoprocessing.get_nodata_from_uri(
            args['soil_group_uri'])
        map_soil_type_to_header = {
            1: 'cn_a',
            2: 'cn_b',
            3: 'cn_c',
            4: 'cn_d',
        }
        cn_nodata = -1
        lulc_to_soil = {}
        lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri_aligned)
        for soil_id, soil_column in map_soil_type_to_header.iteritems():
            lulc_to_soil[soil_id] = {
                'lulc_values': [],
                'cn_values': []
            }
            for lucode in sorted(biophysical_table.keys() + [lulc_nodata]):
                try:
                    lulc_to_soil[soil_id]['cn_values'].append(
                        biophysical_table[lucode][soil_column])
                    lulc_to_soil[soil_id]['lulc_values'].append(lucode)
                except KeyError:
                    if lucode == lulc_nodata:
                        lulc_to_soil[soil_id]['lulc_values'].append(lucode)
                        lulc_to_soil[soil_id]['cn_values'].append(cn_nodata)
                    else:
                        raise
            lulc_to_soil[soil_id]['lulc_values'] = (
                numpy.array(lulc_to_soil[soil_id]['lulc_values'],
                        dtype=numpy.int32))
            lulc_to_soil[soil_id]['cn_values'] = (
                numpy.array(lulc_to_soil[soil_id]['cn_values'],
                        dtype=numpy.float32))

        def cn_op(lulc_array, soil_group_array):
            """map lulc code and soil to a curve number"""
            cn_result = numpy.empty(lulc_array.shape)
            cn_result[:] = cn_nodata
            for soil_group_id in numpy.unique(soil_group_array):
                if soil_group_id == soil_nodata:
                    continue
                current_soil_mask = (soil_group_array == soil_group_id)
                index = numpy.digitize(
                    lulc_array.ravel(),
                    lulc_to_soil[soil_group_id]['lulc_values'], right=True)
                cn_values = (
                    lulc_to_soil[soil_group_id]['cn_values'][index]).reshape(
                        lulc_array.shape)
                cn_result[current_soil_mask] = cn_values[current_soil_mask]
            return cn_result

        cn_nodata = -1
        pygeoprocessing.vectorize_datasets(
            [lulc_uri_aligned, soil_group_uri_aligned], cn_op, cn_uri,
            gdal.GDT_Float32, cn_nodata, pixel_size, 'intersection',
            vectorize_op=False, datasets_are_pre_aligned=True)

        LOGGER.info('calculate quick flow')
        calculate_quick_flow(
            precip_uri_aligned_list,
            lulc_uri_aligned, cn_uri, n_events, stream_uri, qfi_uri,
            qf_monthly_uri_list, si_uri)

        recharge_uri = os.path.join(
            args['workspace_dir'], 'recharge%s.tif' % file_suffix)
        seasonal_water_yield_core.calculate_recharge(
            precip_uri_aligned_list, et0_uri_aligned_list, qf_monthly_uri_list,
            flow_dir_uri, outflow_weights_uri, outflow_direction_uri,
            dem_uri_aligned, lulc_uri_aligned, kc_lookup, alpha_m, beta_i,
            gamma, stream_uri, recharge_uri, recharge_avail_uri,
            r_sum_avail_uri, aet_uri, kc_uri)
    else:
        recharge_uri = recharge_aligned_uri
        recharge_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri(
            recharge_uri)
        def calc_recharge_avail(recharge_array):
            recharge_threshold = recharge_array * gamma
            recharge_threshold[recharge_threshold < 0] = 0.0
            return numpy.where(
                recharge_array != recharge_nodata,
                recharge_threshold, recharge_nodata)

        #calc recharge avail
        pygeoprocessing.geoprocessing.vectorize_datasets(
            [recharge_aligned_uri], calc_recharge_avail, recharge_avail_uri,
            gdal.GDT_Float32, recharge_nodata, pixel_size, 'intersection',
            vectorize_op=False, datasets_are_pre_aligned=True)
        #calc r_sum_avail with flux accumulation
        loss_uri = pygeoprocessing.geoprocessing.temporary_filename()
        zero_absorption_source_uri = (
            pygeoprocessing.geoprocessing.temporary_filename())
        pygeoprocessing.make_constant_raster_from_base_uri(
            dem_uri_aligned, 0.0, zero_absorption_source_uri)

        pygeoprocessing.routing.route_flux(
            flow_dir_uri, dem_uri_aligned, recharge_avail_uri,
            zero_absorption_source_uri, loss_uri, r_sum_avail_uri, 'flux_only',
            include_source=False)

    #calcualte Qb as the sum of recharge_avail over the aoi
    qb_results = pygeoprocessing.geoprocessing.aggregate_raster_values_uri(
        recharge_avail_uri, args['aoi_uri'])

    qb_result = qb_results.total[9999] / qb_results.n_pixels[9999]
    #9999 is the value used to index fields if no shapefile ID is provided
    qb_file = open(qb_out_uri, 'w')
    qb_file.write("%f\n" % qb_result)
    qb_file.close()
    LOGGER.info("Qb = %f", qb_result)

    pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri(
        recharge_uri)
    ri_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri(recharge_uri)

    def vri_op(ri_array):
        """calc vri index"""
        return numpy.where(
            ri_array != ri_nodata, ri_array / qb_result, ri_nodata)

    pygeoprocessing.geoprocessing.vectorize_datasets(
        [recharge_uri], vri_op, vri_uri,
        gdal.GDT_Float32, ri_nodata, pixel_size, 'intersection',
        vectorize_op=False, datasets_are_pre_aligned=True)

    LOGGER.info('calculating r_sum_avail_pour')
    seasonal_water_yield_core.calculate_r_sum_avail_pour(
        r_sum_avail_uri, outflow_weights_uri, outflow_direction_uri,
        r_sum_avail_pour_uri)

    LOGGER.info('calculating slow flow')
    print dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri,\
        r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri,\
        stream_uri, sf_uri, sf_down_uri

    seasonal_water_yield_core.route_sf(
        dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri,
        r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri,
        stream_uri, sf_uri, sf_down_uri)

    LOGGER.info('  (\\w/)  SWY Complete!')
    LOGGER.info('  (..  \\ ')
    LOGGER.info(' _/  )  \\______')
    LOGGER.info('(oo /\'\\        )`,')
    LOGGER.info(' `--\' (v  __( / ||')
    LOGGER.info('       |||  ||| ||')
    LOGGER.info('      //_| //_|')
예제 #9
0
def _calc_cost_of_per_hectare_inputs(vars_dict, crop, lulc_raster):
    '''
    CostPerHectareInputTotal_crop = Mask_raster * CostPerHectare_input *
        ha_per_cell
    '''

    # Determine the crop lucode based on its name
    crop_lucode = None
    for lucode, luname in vars_dict['crop_lookup_dict'].iteritems():
        if luname == crop:
            crop_lucode = lucode
            continue

    lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_raster.uri)
    economics_table_crop = vars_dict['economics_table_dict'][crop]
    datatype_out = gdal.GDT_Float32
    nodata_out = NODATA_FLOAT
    pixel_size_out = pygeoprocessing.get_cell_size_from_uri(lulc_raster.uri)
    ha_per_m2 = 0.0001
    cell_area_ha = pixel_size_out**2 * ha_per_m2

    # The scalar cost is identical for all crop pixels of the current class,
    # and is based on the presence of absence of columns in the user-provided
    # economics table.  We only need to calculate this once.
    cost_scalar = 0.0
    for key in ['cost_labor_per_ha', 'cost_machine_per_ha', 'cost_seed_per_ha', 'cost_irrigation_per_ha']:
        try:
            cost_scalar += (economics_table_crop[key] * cell_area_ha)
        except KeyError:
            LOGGER.warning('Key missing from economics table: %s', key)

    def _calculate_cost(lulc_matrix):
        """
        Calculate the total cost on a single pixel.

        <pseudocode>
            If lulc_pixel is nodata:
                return nodata
            else:
                if lulc_pixel is of our crop type:
                    return the cost of this crop (in cost_scalar, above)
                else:
                    return 0.0
        </pseudocode>
        """
        return np.where(lulc_matrix == lulc_nodata, nodata_out,
                        np.where(lulc_matrix == crop_lucode, cost_scalar, 0.0))

    new_raster_uri = pygeoprocessing.geoprocessing.temporary_filename()
    pygeoprocessing.vectorize_datasets(
        [lulc_raster.uri],
        _calculate_cost,
        new_raster_uri,
        datatype_out,
        nodata_out,
        pixel_size_out,
        bounding_box_mode='intersection',
        vectorize_op=False,
        datasets_are_pre_aligned=True
    )

    return Raster.from_file(new_raster_uri, 'GTiff')
예제 #10
0
    out_matrix[:] = -1
    matching_landcover_mask = lulc_block == 1
    out_matrix[matching_landcover_mask] = dem_block[matching_landcover_mask]
    return out_matrix


out_path = '/shared/mean_elevation_exercise/matching_pixels.tif'
out_dir = os.path.dirname(out_path)
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

dem_path = '/shared/grasslands_demo/joined_dem.tif'
pygeoprocessing.vectorize_datasets(
    dataset_uri_list=['/data/landcover.tif', dem_path],
    dataset_pixel_op=_dem_values_under_evergreen_forest,
    dataset_out_uri=out_path,
    datatype_out=gdal.GDT_Int16,
    nodata_out=-1,
    pixel_size_out=pygeoprocessing.get_cell_size_from_uri(dem_path),
    bounding_box_mode='intersection')

stats = pygeoprocessing.aggregate_raster_values_uri(out_path,
                                                    '/data/yosemite.shp')

print stats.pixel_mean[9999]

################

aligned_lulc = os.path.join(out_dir, 'aligned_lulc.tif')
aligned_dem = os.path.join(out_dir, 'aligned_dem.tif')
pygeoprocessing.align_dataset_list(
    datset_uri_list=['/data/landcover.tif', dem_path],
예제 #11
0
import numpy
def _merge_dems(north_block, south_block):
    valid_mask = (north_block != -1) | (south_block != -1)
    out_matrix = numpy.empty(north_block.shape)
    out_matrix[:] = -1
    out_matrix[valid_mask] = numpy.maximum(north_block[valid_mask],
                                           south_block[valid_mask])
    return out_matrix

LOGGER.info('Merging DEMs')
pygeoprocessing.vectorize_datasets(
    dataset_uri_list=[north_dem, south_dem],
    dataset_pixel_op=_merge_dems,
    dataset_out_uri=joined_dem,
    datatype_out=gdal.GDT_Int16,
    nodata_out=-1.0,
    # We could calculate projected units by hand, but this is more convenient.
    pixel_size_out=30.0,
    bounding_box_mode='union',
    vectorize_op=False,
    aoi_uri=yosemite_vector,
)

# Next we need to calculate the slope layer.
LOGGER.info('Calculating slope')
slope_raster = os.path.join(OUTPUT_DIR, 'slope.tif')
pygeoprocessing.calculate_slope(
    dem_dataset_uri=joined_dem,
    slope_uri=slope_raster)

# OK!  Now we add it all together with a call to vectorize_datasets
LOGGER.info('Finding high-elevation, steep grasslands')
예제 #12
0
파일: regression.py 프로젝트: whigg/iceflow
def make_regression(worldview_folder, out_filename, deg=1, weights=None):
    """Calculate a regression between worldview DEMs within a folder.

    Note:
        Any pixel stacks that contain 1 or more nodata values will be
        excluded from the regression calculations.

        Additionally, this function assumes that all worldview rasters
        have a nodata value of ``0``.

    Parameters:
        worldview_folder (string): The path to a folder on disk containing
            GeoTiffs representing elevation data.  Any files with a ``'.tif``
            extension will be analyzed within this folder.  There is no
            upper limit to the number of files that can be analyzed.
        out_filename (string): The path on disk to where the regression raster
            should be stored.  If the file already exists on disk, it will be
            overwritten.
        deg=1 (int): The order of the regression.  Passed directly to
            ``numpy.polyfit`` via the ``deg`` parameter.  1 represents
            linear regression.
        weights=None (``numpy.ndarray`` or ``None``): If None, the inputs will
            be unweighted in the regression.  If an ``ndarray``, this array
            must a 1D array with the same length as there are files in
            ``worldview_folder``.

    Raises:
        ``ValueError``: When the length of ``weights`` does not equal
            the number of geotiffs found in ``worldview_folder``.

    Returns:
        ``None``"""
    if not os.path.exists(worldview_folder):
        raise IOError('Folder %s not found' % worldview_folder)

    rasters = sorted(glob.glob(worldview_folder + '/*.tif'),
                     key=lambda x: int(os.path.basename(x).split('_')[0]))
    LOGGER.info('Using rasters %s', rasters)

    timesteps = [_date_from_filename(r) for r in rasters]
    timesteps = numpy.array([(d - timesteps[0]).days for d in timesteps])
    LOGGER.info('Timesteps: %s' % timesteps)

    if weights and not (len(weights) == len(timesteps)):
        raise ValueError(('Weights length (%s) does not match timesteps '
                          'length (%s)') % (len(weights), len(timesteps)))

    def _regression(*blocks):
        """Compute linear regression from a stack of DEM matrices.

        Note:
            Any pixel stacks that contain 1 or more values of 0 will have an
            output value of ``0``.

        Parameters:
            blocks (list): A list of 2D ``numpy.ndarray`` instances with pixel
                values from the stack of rasters passed to
                ``vectorize_datasets`` call.  There is no upper limit to the
                number of timesteps that can be calculated.

        Returns:
            ``numpy.ndarray``, in 2 dimensions.  This will contain the ``m``
            parameter from the fitted line.
        """
        stacked_array = numpy.dstack(blocks)
        new_shape = (stacked_array.shape[0] * stacked_array.shape[1],
                     len(timesteps))
        reshaped = numpy.swapaxes(numpy.reshape(stacked_array, new_shape), 0,
                                  1)
        regression = numpy.polyfit(timesteps, reshaped, deg=deg, w=weights)[0]
        out_block = regression.reshape(blocks[0].shape)
        # Mask out any pixel stacks where there's a nodata value in the stack.
        # Out block is multiplied by 365.25 to convert m/day to m/year trend.
        return numpy.where(
            numpy.min(stacked_array, axis=2) == 0, 0, out_block * 365.25)

    raster_cell_sizes = [
        pygeoprocessing.get_cell_size_from_uri(r) for r in rasters
    ]
    min_cell_size = min(raster_cell_sizes)
    if not len(set(raster_cell_sizes)) == 1:
        warnings.warn(('Cell sizes of input rasters do not all match. '
                       'Using min pixelsize of %s. Mismatched values: %s') %
                      (min_cell_size, set(raster_cell_sizes)))

    pygeoprocessing.vectorize_datasets(dataset_uri_list=rasters,
                                       dataset_pixel_op=_regression,
                                       dataset_out_uri=out_filename,
                                       datatype_out=gdal.GDT_Float32,
                                       nodata_out=0,
                                       pixel_size_out=min_cell_size,
                                       bounding_box_mode='intersection',
                                       vectorize_op=False,
                                       datasets_are_pre_aligned=False)
예제 #13
0
def execute(args):
    """InVEST Carbon Edge Model calculates the carbon due to edge effects in
    forest pixels.

    Parameters:
        args['workspace_dir'] (string): a uri to the directory that will write
            output and other temporary files during calculation. (required)
        args['results_suffix'] (string): a string to append to any output file
            name (optional)
        args['n_nearest_model_points'] (int): number of nearest neighbor model
            points to search for
        args['aoi_uri'] (string): (optional) if present, a path to a
            shapefile that will be used to aggregate carbon stock results at
            the end of the run.
        args['biophysical_table_uri'] (string): a path to a CSV table that has
            at least a header for an 'lucode', 'is_forest', and 'c_above'.
                'lucode': an integer that corresponds to landcover codes in
                    the raster args['lulc_uri']
                'is_forest': either 0 or 1 indicating whether the landcover
                    type is forest (1) or not (0).  If 1, the value in c_above
                    is ignored and instead calculated from the edge regression
                    model.
                'c_above': floating point number indicating tons of carbon per
                    hectare for that landcover type

                Example:
                    lucode, is_forest, c_above
                    0,0,32.8
                    1,1,n/a
                    2,1,n/a
                    16,0,28.1

                    Note the "n/a" are optional since that field is ignored
                    when is_forest==1.

        args['lulc_uri'] (string): path to a integer landcover code raster
        args['forest_edge_carbon_model_shape_uri'] (string): path to a
            shapefile that defines the regions for the local carbon edge
            models.  Has at least the fields 'method', 'theta1', 'theta2',
            'theta3'.  Where 'method' is an int between 1..3 describing the
            biomass regression model, and the thetas are floating point numbers
            that have different meanings depending on the 'method' parameter.
            Specifically,

                method 1 asymptotic model:
                    biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km)
                method 2 logarithmic model:
                    biomass = theta1 + theta2 * numpy.log(edge_dist_km)
                     (theta3 is ignored for this method)
                method 3 linear regression:
                    biomass = theta1 + theta2 * edge_dist_km

        args['biomass_to_carbon_conversion_factor'] (string/float): Number by
            which to multiply forest biomass to convert to carbon in the edge
            effect calculation.

    Returns:
        None"""

    output_dir = args['workspace_dir']
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    pygeoprocessing.create_directories([output_dir, intermediate_dir])
    try:
        file_suffix = args['results_suffix']
        if file_suffix != "" and not file_suffix.startswith('_'):
            file_suffix = '_' + file_suffix
    except KeyError:
        file_suffix = ''

    # used to keep track of files generated by this module
    output_file_registry = {
        'non_forest_carbon_stocks':
        os.path.join(intermediate_dir,
                     'non_forest_carbon_stocks%s.tif' % file_suffix),
        'edge_distance':
        os.path.join(intermediate_dir, 'edge_distance%s.tif' % file_suffix),
        'forest_edge_carbon_map':
        os.path.join(intermediate_dir,
                     'forest_edge_carbon_stocks%s.tif' % file_suffix),
        'carbon_map':
        os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix),
        'aoi_datasource':
        os.path.join(output_dir, 'aggregated_carbon_stocks.shp')
    }

    # Map non-forest landcover codes to carbon biomasses
    LOGGER.info('calculating non-forest carbon')
    _calculate_lulc_carbon_map(
        args['lulc_uri'], args['biophysical_table_uri'],
        output_file_registry['non_forest_carbon_stocks'])

    # generate a map of pixel distance to forest edge from the landcover map
    LOGGER.info('calculating distance from forest edge')
    _map_distance_from_forest_edge(args['lulc_uri'],
                                   args['biophysical_table_uri'],
                                   output_file_registry['edge_distance'])

    # Build spatial index for gridded global model for closest 3 points
    LOGGER.info('Building spatial index for forest edge models.')
    kd_tree, theta_model_parameters, method_model_parameter = (
        _build_spatial_index(args['lulc_uri'], intermediate_dir,
                             args['forest_edge_carbon_model_shape_uri']))

    # calculate the edge carbon effect on forests
    LOGGER.info('calculating forest edge carbon')
    _calculate_forest_edge_carbon_map(
        output_file_registry['edge_distance'], kd_tree, theta_model_parameters,
        method_model_parameter, int(args['n_nearest_model_points']),
        float(args['biomass_to_carbon_conversion_factor']),
        output_file_registry['forest_edge_carbon_map'])

    # combine maps into output
    LOGGER.info('combining forest and non forest carbon into single raster')
    cell_size_in_meters = pygeoprocessing.get_cell_size_from_uri(
        args['lulc_uri'])
    carbon_edge_nodata = pygeoprocessing.get_nodata_from_uri(
        output_file_registry['forest_edge_carbon_map'])

    def combine_carbon_maps(non_forest_carbon, forest_carbon):
        """This combines the forest and non forest maps into one"""
        return numpy.where(forest_carbon == carbon_edge_nodata,
                           non_forest_carbon, forest_carbon)

    pygeoprocessing.vectorize_datasets([
        output_file_registry['non_forest_carbon_stocks'],
        output_file_registry['forest_edge_carbon_map']
    ],
                                       combine_carbon_maps,
                                       output_file_registry['carbon_map'],
                                       gdal.GDT_Float32,
                                       carbon_edge_nodata,
                                       cell_size_in_meters,
                                       'intersection',
                                       vectorize_op=False,
                                       datasets_are_pre_aligned=True)

    # generate report (optional) by aoi if they exist
    if 'aoi_uri' in args:
        LOGGER.info('aggregating carbon map by aoi')
        _aggregate_carbon_map(args['aoi_uri'],
                              output_file_registry['carbon_map'],
                              output_file_registry['aoi_datasource'])