def _compare_with_make_stack(stack_trend_file, pgp_trend_file, diff_file): """Compare trend of ``make_regression`` with trend from ``make_stack.py``. Comparison is done as a per-pixel diff on any pixel pairs where both pixels are not nodata. If either pixel in a pixel stack is nodata, the stack is ignored and nodata is returned for that pixel value. The diff looks like this:: diff_file = stack_trend_file - pgp_trend_file Parameters: stack_trend_file (string): The path to the trend raster output of ``make_stack.py`` (usually named ``stack_trend.tif``). This file must exist on disk. pgp_trend_file (string): The path to the trend raster output from ``make_regression()``, also in this module. This file must exist on disk. diff_file (string): The path to where the difference raster should be saved. Returns: ``None``""" stack_nodata = pygeoprocessing.get_nodata_from_uri(stack_trend_file) pgp_nodata = pygeoprocessing.get_nodata_from_uri(pgp_trend_file) def _diff(stack_trend, pgp_trend): """Calculate a diff between two matrices, ignoring nodata. Parameters: stack_trend (numpy.ndarray): Array of values from the stack trend raster. pgp_trend (numpy.ndarray): Array of values from the pygeoprocessing trend raster. Returns: ``numpy.ndarray`` of the difference between ``stack_trend`` and ``pgp_trend``""" valid_mask = ((stack_trend != stack_nodata) & (pgp_trend != pgp_nodata)) out_array = numpy.empty_like(stack_trend) out_array[:] = -9999 out_array[valid_mask] = stack_trend[valid_mask] - pgp_trend[valid_mask] return out_array pygeoprocessing.vectorize_datasets( dataset_uri_list=[stack_trend_file, pgp_trend_file], dataset_pixel_op=_diff, dataset_out_uri=diff_file, datatype_out=gdal.GDT_Float32, nodata_out=-9999, pixel_size_out=32., bounding_box_mode='intersection', vectorize_op=False, datasets_are_pre_aligned=False)
def _map_distance_from_forest_edge(lulc_uri, biophysical_table_uri, edge_distance_uri): """Generates a raster of forest edge distances where each pixel is the distance to the edge of the forest in meters. Parameters: lulc_uri (string): path to the landcover raster that contains integer landcover codes biophysical_table_uri (string): a path to a csv table that indexes landcover codes to forest type, contains at least the fields 'lucode' (landcover integer code) and 'is_forest' (0 or 1 depending on landcover code type) edge_distance_uri (string): path to output raster where each pixel contains the euclidian pixel distance to nearest forest edges on all non-nodata values of lulc_uri Returns: None""" # Build a list of forest lucodes biophysical_table = pygeoprocessing.get_lookup_from_table( biophysical_table_uri, 'lucode') forest_codes = [ lucode for (lucode, ludata) in biophysical_table.iteritems() if int(ludata['is_forest']) == 1 ] # Make a raster where 1 is non-forest landcover types and 0 is forest forest_mask_nodata = 255 lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri) def mask_non_forest_op(lulc_array): """converts forest lulc codes to 1""" non_forest_mask = ~numpy.in1d(lulc_array.flatten(), forest_codes).reshape(lulc_array.shape) nodata_mask = lulc_array == lulc_nodata return numpy.where(nodata_mask, forest_mask_nodata, non_forest_mask) non_forest_mask_uri = pygeoprocessing.temporary_filename() out_pixel_size = pygeoprocessing.get_cell_size_from_uri(lulc_uri) pygeoprocessing.vectorize_datasets([lulc_uri], mask_non_forest_op, non_forest_mask_uri, gdal.GDT_Byte, forest_mask_nodata, out_pixel_size, "intersection", vectorize_op=False) # Do the distance transform on non-forest pixels pygeoprocessing.distance_transform_edt(non_forest_mask_uri, edge_distance_uri) # good practice to delete temporary files when we're done with them os.remove(non_forest_mask_uri)
def test_vect_datasets_identity_aoi(self): """PGP.geoprocessing: vectorize_datasets f(x)=x with AOI.""" pixel_matrix = numpy.ones((5, 5), numpy.int16) reference = sampledata.SRS_COLOMBIA nodata = -1 pygeoprocessing.testing.create_raster_on_disk( [pixel_matrix], reference.origin, reference.projection, nodata, reference.pixel_size(30), filename=self.raster_filename) polygons = [ Polygon([ (reference.origin[0] + reference.pixel_size(30)[0] * 0, reference.origin[1] + reference.pixel_size(30)[1] * 0), (reference.origin[0] + reference.pixel_size(30)[0] * 5, reference.origin[1] + reference.pixel_size(30)[1] * 0), (reference.origin[0] + reference.pixel_size(30)[0] * 5, reference.origin[1] + reference.pixel_size(30)[1] * 5), (reference.origin[0] + reference.pixel_size(30)[0] * 0, reference.origin[1] + reference.pixel_size(30)[1] * 5), (reference.origin[0] + reference.pixel_size(30)[0] * 0, reference.origin[1] + reference.pixel_size(30)[1] * 0), ]), ] pygeoprocessing.testing.create_vector_on_disk( polygons, reference.projection, filename=self.aoi_filename) out_filename = pygeoprocessing.temporary_filename() pygeoprocessing.vectorize_datasets([self.raster_filename], lambda x: x, out_filename, gdal.GDT_Int32, nodata, 30, 'intersection', aoi_uri=self.aoi_filename) pygeoprocessing.testing.assert_rasters_equal(self.raster_filename, out_filename, rel_tol=1e-9)
def test_vect_datasets_identity(self): """PGP.geoprocessing: vectorize_datasets f(x)=x.""" pixel_matrix = numpy.ones((5, 5), numpy.int16) reference = sampledata.SRS_COLOMBIA nodata = -1 pygeoprocessing.testing.create_raster_on_disk( [pixel_matrix], reference.origin, reference.projection, nodata, reference.pixel_size(30), filename=self.raster_filename) out_filename = pygeoprocessing.temporary_filename() pygeoprocessing.vectorize_datasets([self.raster_filename], lambda x: x, out_filename, gdal.GDT_Int32, nodata, 30, 'intersection') pygeoprocessing.testing.assert_rasters_equal(self.raster_filename, out_filename, rel_tol=1e-9)
def test_vect_datasets_(self): """PGP.geoprocessing: vect..._datasets expected error for non-list.""" pixel_matrix = numpy.ones((5, 5), numpy.int16) reference = sampledata.SRS_COLOMBIA nodata = -1 pygeoprocessing.testing.create_raster_on_disk( [pixel_matrix], reference.origin, reference.projection, nodata, reference.pixel_size(30), filename=self.raster_filename) out_filename = pygeoprocessing.temporary_filename() with self.assertRaises(ValueError): # intentionally passing a filename rather than a list of files # to get an expected exception pygeoprocessing.vectorize_datasets(self.raster_filename, lambda x: x, out_filename, gdal.GDT_Int32, nodata, 30, 'intersection')
def main(): system = platform.platform() logfile_uri = 'md5_check_%s.log' % system logfile = open(logfile_uri, 'w') _write = lambda x: logfile.write(x + '\n') for base_raster in ['landuse_cur_200m.tif', 'gaussian.tif']: if base_raster == 'gaussian.tif': src_ds = 'landuse_cur_200m.tif' dest_ds = 'gaussian.tif' nodata = pygeoprocessing.get_nodata_from_uri(src_ds) pygeoprocessing.gaussian_filter_dataset_uri( src_ds, 4, dest_ds, nodata) _write(base_raster + '\n') base_nodata = pygeoprocessing.get_nodata_from_uri(base_raster) base_pixel_size = pygeoprocessing.get_cell_size_from_uri(base_raster) _write('System: %s' % system) _write('Python %s' % platform.python_version()) _write('GDAL version: %s' % gdal.__version__) _write('numpy version: %s' % numpy.__version__) _write('scipy version: %s' % scipy.__version__) _write('base MD5sum: %s' % md5sum(base_raster)) for gdal_type, gdal_type_label in GDAL_DTYPES.iteritems(): if gdal_type_label in ['GDT_Unknown', 'GDT_TypeCount']: continue print gdal_type_label # convert the raster (via vectorize_datasets) to a new dtype new_uri = '%s.tif' % gdal_type_label pygeoprocessing.vectorize_datasets([base_raster], lambda x: x, new_uri, gdal_type, base_nodata, base_pixel_size, 'intersection') _write("%-15s: %s" % (gdal_type_label, md5sum(new_uri))) _write('\n')
def _convert_landscape( base_lulc_uri, replacement_lucode, area_to_convert, focal_landcover_codes, convertible_type_list, score_weight, n_steps, smooth_distance_from_edge_uri, output_landscape_raster_uri, stats_uri): """Expand replacement lucodes in relation to the focal lucodes. If the sign on `score_weight` is positive, expansion occurs marches away from the focal types, while if `score_weight` is negative conversion marches toward the focal types. Parameters: base_lulc_uri (string): path to landcover raster that will be used as the base landcover map to agriculture pixels replacement_lucode (int): agriculture landcover code type found in the raster at `base_lulc_uri` area_to_convert (float): area (Ha) to convert to agriculture focal_landcover_codes (list of int): landcover codes that are used to calculate proximity convertible_type_list (list of int): landcover codes that are allowable to be converted to agriculture score_weight (float): this value is used to multiply the distance from the focal landcover types when prioritizing which pixels in `convertable_type_list` are to be converted. If negative, conversion occurs toward the focal types, if positive occurs away from the focal types. n_steps (int): number of steps to convert the landscape. On each step the distance transform will be applied on the current value of the `focal_landcover_codes` pixels in `output_landscape_raster_uri`. On the first step the distance is calculated from `base_lulc_uri`. smooth_distance_from_edge_uri (string): an intermediate output showing the pixel distance from the edge of the base landcover types output_landscape_raster_uri (string): an output raster that will contain the final fragmented forest layer. stats_uri (string): a path to an output csv that records the number type, and area of pixels converted in `output_landscape_raster_uri` Returns: None. """ tmp_file_registry = { 'non_base_mask': pygeoprocessing.temporary_filename(), 'base_mask': pygeoprocessing.temporary_filename(), 'gaussian_kernel': pygeoprocessing.temporary_filename(), 'distance_from_base_mask_edge': pygeoprocessing.temporary_filename(), 'distance_from_non_base_mask_edge': pygeoprocessing.temporary_filename(), 'convertible_distances': pygeoprocessing.temporary_filename(), 'smooth_distance_from_edge': pygeoprocessing.temporary_filename(), 'distance_from_edge': pygeoprocessing.temporary_filename(), } # a sigma of 1.0 gives nice visual results to smooth pixel level artifacts # since a pixel is the 1.0 unit _make_gaussian_kernel_uri(1.0, tmp_file_registry['gaussian_kernel']) # create the output raster first as a copy of the base landcover so it can # be looped on for each step lulc_nodata = pygeoprocessing.get_nodata_from_uri(base_lulc_uri) pixel_size_out = pygeoprocessing.get_cell_size_from_uri(base_lulc_uri) mask_nodata = 2 pygeoprocessing.vectorize_datasets( [base_lulc_uri], lambda x: x, output_landscape_raster_uri, gdal.GDT_Int32, lulc_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) # convert everything furthest from edge for each of n_steps pixel_area_ha = ( pygeoprocessing.get_cell_size_from_uri(base_lulc_uri)**2 / 10000.0) max_pixels_to_convert = int(math.ceil(area_to_convert / pixel_area_ha)) convertible_type_nodata = -1 pixels_left_to_convert = max_pixels_to_convert pixels_to_convert = max_pixels_to_convert / n_steps stats_cache = collections.defaultdict(int) # pylint complains when these are defined inside the loop invert_mask = None distance_nodata = None for step_index in xrange(n_steps): LOGGER.info('step %d of %d', step_index+1, n_steps) pixels_left_to_convert -= pixels_to_convert # Often the last segement of the steps will overstep the number of # pixels to convert, this check converts the exact amount if pixels_left_to_convert < 0: pixels_to_convert += pixels_left_to_convert # create distance transforms for inside and outside the base lulc codes LOGGER.info('create distance transform for current landcover') for invert_mask, mask_id, distance_id in [ (False, 'non_base_mask', 'distance_from_non_base_mask_edge'), (True, 'base_mask', 'distance_from_base_mask_edge')]: def _mask_base_op(lulc_array): """Create a mask of valid non-base pixels only.""" base_mask = numpy.in1d( lulc_array.flatten(), focal_landcover_codes).reshape( lulc_array.shape) if invert_mask: base_mask = ~base_mask return numpy.where( lulc_array == lulc_nodata, mask_nodata, base_mask) pygeoprocessing.vectorize_datasets( [output_landscape_raster_uri], _mask_base_op, tmp_file_registry[mask_id], gdal.GDT_Byte, mask_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) # create distance transform for the current mask pygeoprocessing.distance_transform_edt( tmp_file_registry[mask_id], tmp_file_registry[distance_id]) # combine inner and outer distance transforms into one distance_nodata = pygeoprocessing.get_nodata_from_uri( tmp_file_registry['distance_from_base_mask_edge']) def _combine_masks(base_distance_array, non_base_distance_array): """create a mask of valid non-base pixels only.""" result = non_base_distance_array valid_base_mask = base_distance_array > 0.0 result[valid_base_mask] = base_distance_array[valid_base_mask] return result pygeoprocessing.vectorize_datasets( [tmp_file_registry['distance_from_base_mask_edge'], tmp_file_registry['distance_from_non_base_mask_edge']], _combine_masks, tmp_file_registry['distance_from_edge'], gdal.GDT_Float32, distance_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) # smooth the distance transform to avoid scanline artifacts pygeoprocessing.convolve_2d_uri( tmp_file_registry['distance_from_edge'], tmp_file_registry['gaussian_kernel'], smooth_distance_from_edge_uri) # turn inside and outside masks into a single mask def _mask_to_convertible_codes(distance_from_base_edge, lulc): """Mask out the distance transform to a set of lucodes.""" convertible_mask = numpy.in1d( lulc.flatten(), convertible_type_list).reshape(lulc.shape) return numpy.where( convertible_mask, distance_from_base_edge, convertible_type_nodata) pygeoprocessing.vectorize_datasets( [smooth_distance_from_edge_uri, output_landscape_raster_uri], _mask_to_convertible_codes, tmp_file_registry['convertible_distances'], gdal.GDT_Float32, convertible_type_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) LOGGER.info( 'convert %d pixels to lucode %d', pixels_to_convert, replacement_lucode) _convert_by_score( tmp_file_registry['convertible_distances'], pixels_to_convert, output_landscape_raster_uri, replacement_lucode, stats_cache, score_weight) _log_stats(stats_cache, pixel_area_ha, stats_uri) for filename in tmp_file_registry.values(): os.remove(filename)
def execute(args): """This function invokes the seasonal water yield model given URI inputs of files. It may write log, warning, or error messages to stdout. """ alpha_m = float(fractions.Fraction(args['alpha_m'])) beta_i = float(fractions.Fraction(args['beta_i'])) gamma = float(fractions.Fraction(args['gamma'])) try: file_suffix = args['results_suffix'] if file_suffix != "" and not file_suffix.startswith('_'): file_suffix = '_' + file_suffix except KeyError: file_suffix = '' pygeoprocessing.geoprocessing.create_directories([args['workspace_dir']]) qfi_uri = os.path.join(args['workspace_dir'], 'qf%s.tif' % file_suffix) cn_uri = os.path.join(args['workspace_dir'], 'cn%s.tif' % file_suffix) lulc_uri_aligned = pygeoprocessing.temporary_filename() dem_uri_aligned = pygeoprocessing.temporary_filename() pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri( args['lulc_uri']) LOGGER.info('Aligning and clipping dataset list') input_align_list = [args['lulc_uri'], args['dem_uri']] output_align_list = [lulc_uri_aligned, dem_uri_aligned] if not args['user_defined_recharge']: precip_uri_list = [] et0_uri_list = [] et0_dir_list = [ os.path.join(args['et0_dir'], f) for f in os.listdir(args['et0_dir'])] precip_dir_list = [ os.path.join(args['precip_dir'], f) for f in os.listdir( args['precip_dir'])] qf_monthly_uri_list = [] for m_index in range(1, N_MONTHS + 1): qf_monthly_uri_list.append( os.path.join( args['workspace_dir'], 'qf_%d%s.tif' % (m_index, file_suffix))) for month_index in range(1, N_MONTHS + 1): month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index) for data_type, dir_list, uri_list in [ ('et0', et0_dir_list, et0_uri_list), ('Precip', precip_dir_list, precip_uri_list)]: file_list = [x for x in dir_list if month_file_match.match(x)] if len(file_list) == 0: raise ValueError( "No %s found for month %d" % (data_type, month_index)) if len(file_list) > 1: raise ValueError( "Ambiguous set of files found for month %d: %s" % (month_index, file_list)) uri_list.append(file_list[0]) soil_group_uri_aligned = pygeoprocessing.temporary_filename() #pre align all the datasets precip_uri_aligned_list = [ pygeoprocessing.geoprocessing.temporary_filename() for _ in range(len(precip_uri_list))] et0_uri_aligned_list = [ pygeoprocessing.geoprocessing.temporary_filename() for _ in range(len(precip_uri_list))] input_align_list = ( precip_uri_list + [args['soil_group_uri']] + et0_uri_list + input_align_list) output_align_list = ( precip_uri_aligned_list + [soil_group_uri_aligned] + et0_uri_aligned_list + output_align_list) interpolate_list = ['nearest'] * len(input_align_list) align_index = 0 if args['user_defined_recharge']: input_align_list.append(args['recharge_uri']) recharge_aligned_uri = ( pygeoprocessing.geoprocessing.temporary_filename()) output_align_list.append(recharge_aligned_uri) interpolate_list.append('nearest') align_index = len(interpolate_list) - 1 pygeoprocessing.geoprocessing.align_dataset_list( input_align_list, output_align_list, interpolate_list, pixel_size, 'intersection', align_index, aoi_uri=args['aoi_uri'], assert_datasets_projected=True) flow_dir_uri = os.path.join( args['workspace_dir'], 'flow_dir%s.tif' % file_suffix) LOGGER.info('calc flow direction') pygeoprocessing.routing.flow_direction_d_inf(dem_uri_aligned, flow_dir_uri) flow_accum_uri = os.path.join( args['workspace_dir'], 'flow_accum%s.tif' % file_suffix) LOGGER.info('calc flow accumulation') pygeoprocessing.routing.flow_accumulation( flow_dir_uri, dem_uri_aligned, flow_accum_uri) stream_uri = os.path.join( args['workspace_dir'], 'stream%s.tif' % file_suffix) threshold_flow_accumulation = 1000 pygeoprocessing.routing.stream_threshold( flow_accum_uri, threshold_flow_accumulation, stream_uri) LOGGER.info('calculating flow weights') outflow_weights_uri = os.path.join( args['workspace_dir'], 'outflow_weights%s.tif' % file_suffix) outflow_direction_uri = os.path.join( args['workspace_dir'], 'outflow_direction%s.tif' % file_suffix) seasonal_water_yield_core.calculate_flow_weights( flow_dir_uri, outflow_weights_uri, outflow_direction_uri) si_uri = os.path.join(args['workspace_dir'], 'si%s.tif' % file_suffix) biophysical_table = pygeoprocessing.geoprocessing.get_lookup_from_table( args['biophysical_table_uri'], 'lucode') kc_lookup = dict([ (lucode, biophysical_table[lucode]['kc']) for lucode in biophysical_table]) recharge_avail_uri = os.path.join( args['workspace_dir'], 'recharge_avail%s.tif' % file_suffix) r_sum_avail_uri = os.path.join( args['workspace_dir'], 'r_sum_avail%s.tif' % file_suffix) vri_uri = os.path.join(args['workspace_dir'], 'vri%s.tif' % file_suffix) aet_uri = os.path.join(args['workspace_dir'], 'aet%s.tif' % file_suffix) r_sum_avail_pour_uri = os.path.join( args['workspace_dir'], 'r_sum_avail_pour%s.tif' % file_suffix) sf_uri = os.path.join( args['workspace_dir'], 'sf%s.tif' % file_suffix) sf_down_uri = os.path.join( args['workspace_dir'], 'sf_down%s.tif' % file_suffix) qb_out_uri = os.path.join( args['workspace_dir'], 'qb%s.txt' % file_suffix) LOGGER.info('classifying kc') kc_uri = os.path.join(args['workspace_dir'], 'kc%s.tif' % file_suffix) pygeoprocessing.geoprocessing.reclassify_dataset_uri( lulc_uri_aligned, kc_lookup, kc_uri, gdal.GDT_Float32, -1) LOGGER.info('calculate slow flow') if not args['user_defined_recharge']: LOGGER.info('loading number of monthly events') rain_events_lookup = ( pygeoprocessing.geoprocessing.get_lookup_from_table( args['rain_events_table_uri'], 'month')) n_events = dict([ (month, rain_events_lookup[month]['events']) for month in rain_events_lookup]) LOGGER.info('calculating curve number') soil_nodata = pygeoprocessing.get_nodata_from_uri( args['soil_group_uri']) map_soil_type_to_header = { 1: 'cn_a', 2: 'cn_b', 3: 'cn_c', 4: 'cn_d', } cn_nodata = -1 lulc_to_soil = {} lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri_aligned) for soil_id, soil_column in map_soil_type_to_header.iteritems(): lulc_to_soil[soil_id] = { 'lulc_values': [], 'cn_values': [] } for lucode in sorted(biophysical_table.keys() + [lulc_nodata]): try: lulc_to_soil[soil_id]['cn_values'].append( biophysical_table[lucode][soil_column]) lulc_to_soil[soil_id]['lulc_values'].append(lucode) except KeyError: if lucode == lulc_nodata: lulc_to_soil[soil_id]['lulc_values'].append(lucode) lulc_to_soil[soil_id]['cn_values'].append(cn_nodata) else: raise lulc_to_soil[soil_id]['lulc_values'] = ( numpy.array(lulc_to_soil[soil_id]['lulc_values'], dtype=numpy.int32)) lulc_to_soil[soil_id]['cn_values'] = ( numpy.array(lulc_to_soil[soil_id]['cn_values'], dtype=numpy.float32)) def cn_op(lulc_array, soil_group_array): """map lulc code and soil to a curve number""" cn_result = numpy.empty(lulc_array.shape) cn_result[:] = cn_nodata for soil_group_id in numpy.unique(soil_group_array): if soil_group_id == soil_nodata: continue current_soil_mask = (soil_group_array == soil_group_id) index = numpy.digitize( lulc_array.ravel(), lulc_to_soil[soil_group_id]['lulc_values'], right=True) cn_values = ( lulc_to_soil[soil_group_id]['cn_values'][index]).reshape( lulc_array.shape) cn_result[current_soil_mask] = cn_values[current_soil_mask] return cn_result cn_nodata = -1 pygeoprocessing.vectorize_datasets( [lulc_uri_aligned, soil_group_uri_aligned], cn_op, cn_uri, gdal.GDT_Float32, cn_nodata, pixel_size, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) LOGGER.info('calculate quick flow') calculate_quick_flow( precip_uri_aligned_list, lulc_uri_aligned, cn_uri, n_events, stream_uri, qfi_uri, qf_monthly_uri_list, si_uri) recharge_uri = os.path.join( args['workspace_dir'], 'recharge%s.tif' % file_suffix) seasonal_water_yield_core.calculate_recharge( precip_uri_aligned_list, et0_uri_aligned_list, qf_monthly_uri_list, flow_dir_uri, outflow_weights_uri, outflow_direction_uri, dem_uri_aligned, lulc_uri_aligned, kc_lookup, alpha_m, beta_i, gamma, stream_uri, recharge_uri, recharge_avail_uri, r_sum_avail_uri, aet_uri, kc_uri) else: recharge_uri = recharge_aligned_uri recharge_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri( recharge_uri) def calc_recharge_avail(recharge_array): recharge_threshold = recharge_array * gamma recharge_threshold[recharge_threshold < 0] = 0.0 return numpy.where( recharge_array != recharge_nodata, recharge_threshold, recharge_nodata) #calc recharge avail pygeoprocessing.geoprocessing.vectorize_datasets( [recharge_aligned_uri], calc_recharge_avail, recharge_avail_uri, gdal.GDT_Float32, recharge_nodata, pixel_size, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) #calc r_sum_avail with flux accumulation loss_uri = pygeoprocessing.geoprocessing.temporary_filename() zero_absorption_source_uri = ( pygeoprocessing.geoprocessing.temporary_filename()) pygeoprocessing.make_constant_raster_from_base_uri( dem_uri_aligned, 0.0, zero_absorption_source_uri) pygeoprocessing.routing.route_flux( flow_dir_uri, dem_uri_aligned, recharge_avail_uri, zero_absorption_source_uri, loss_uri, r_sum_avail_uri, 'flux_only', include_source=False) #calcualte Qb as the sum of recharge_avail over the aoi qb_results = pygeoprocessing.geoprocessing.aggregate_raster_values_uri( recharge_avail_uri, args['aoi_uri']) qb_result = qb_results.total[9999] / qb_results.n_pixels[9999] #9999 is the value used to index fields if no shapefile ID is provided qb_file = open(qb_out_uri, 'w') qb_file.write("%f\n" % qb_result) qb_file.close() LOGGER.info("Qb = %f", qb_result) pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri( recharge_uri) ri_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri(recharge_uri) def vri_op(ri_array): """calc vri index""" return numpy.where( ri_array != ri_nodata, ri_array / qb_result, ri_nodata) pygeoprocessing.geoprocessing.vectorize_datasets( [recharge_uri], vri_op, vri_uri, gdal.GDT_Float32, ri_nodata, pixel_size, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) LOGGER.info('calculating r_sum_avail_pour') seasonal_water_yield_core.calculate_r_sum_avail_pour( r_sum_avail_uri, outflow_weights_uri, outflow_direction_uri, r_sum_avail_pour_uri) LOGGER.info('calculating slow flow') print dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri,\ r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri,\ stream_uri, sf_uri, sf_down_uri seasonal_water_yield_core.route_sf( dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri, r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri, stream_uri, sf_uri, sf_down_uri) LOGGER.info(' (\\w/) SWY Complete!') LOGGER.info(' (.. \\ ') LOGGER.info(' _/ ) \\______') LOGGER.info('(oo /\'\\ )`,') LOGGER.info(' `--\' (v __( / ||') LOGGER.info(' ||| ||| ||') LOGGER.info(' //_| //_|')
def _calc_cost_of_per_hectare_inputs(vars_dict, crop, lulc_raster): ''' CostPerHectareInputTotal_crop = Mask_raster * CostPerHectare_input * ha_per_cell ''' # Determine the crop lucode based on its name crop_lucode = None for lucode, luname in vars_dict['crop_lookup_dict'].iteritems(): if luname == crop: crop_lucode = lucode continue lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_raster.uri) economics_table_crop = vars_dict['economics_table_dict'][crop] datatype_out = gdal.GDT_Float32 nodata_out = NODATA_FLOAT pixel_size_out = pygeoprocessing.get_cell_size_from_uri(lulc_raster.uri) ha_per_m2 = 0.0001 cell_area_ha = pixel_size_out**2 * ha_per_m2 # The scalar cost is identical for all crop pixels of the current class, # and is based on the presence of absence of columns in the user-provided # economics table. We only need to calculate this once. cost_scalar = 0.0 for key in ['cost_labor_per_ha', 'cost_machine_per_ha', 'cost_seed_per_ha', 'cost_irrigation_per_ha']: try: cost_scalar += (economics_table_crop[key] * cell_area_ha) except KeyError: LOGGER.warning('Key missing from economics table: %s', key) def _calculate_cost(lulc_matrix): """ Calculate the total cost on a single pixel. <pseudocode> If lulc_pixel is nodata: return nodata else: if lulc_pixel is of our crop type: return the cost of this crop (in cost_scalar, above) else: return 0.0 </pseudocode> """ return np.where(lulc_matrix == lulc_nodata, nodata_out, np.where(lulc_matrix == crop_lucode, cost_scalar, 0.0)) new_raster_uri = pygeoprocessing.geoprocessing.temporary_filename() pygeoprocessing.vectorize_datasets( [lulc_raster.uri], _calculate_cost, new_raster_uri, datatype_out, nodata_out, pixel_size_out, bounding_box_mode='intersection', vectorize_op=False, datasets_are_pre_aligned=True ) return Raster.from_file(new_raster_uri, 'GTiff')
out_matrix[:] = -1 matching_landcover_mask = lulc_block == 1 out_matrix[matching_landcover_mask] = dem_block[matching_landcover_mask] return out_matrix out_path = '/shared/mean_elevation_exercise/matching_pixels.tif' out_dir = os.path.dirname(out_path) if not os.path.exists(out_dir): os.makedirs(out_dir) dem_path = '/shared/grasslands_demo/joined_dem.tif' pygeoprocessing.vectorize_datasets( dataset_uri_list=['/data/landcover.tif', dem_path], dataset_pixel_op=_dem_values_under_evergreen_forest, dataset_out_uri=out_path, datatype_out=gdal.GDT_Int16, nodata_out=-1, pixel_size_out=pygeoprocessing.get_cell_size_from_uri(dem_path), bounding_box_mode='intersection') stats = pygeoprocessing.aggregate_raster_values_uri(out_path, '/data/yosemite.shp') print stats.pixel_mean[9999] ################ aligned_lulc = os.path.join(out_dir, 'aligned_lulc.tif') aligned_dem = os.path.join(out_dir, 'aligned_dem.tif') pygeoprocessing.align_dataset_list( datset_uri_list=['/data/landcover.tif', dem_path],
import numpy def _merge_dems(north_block, south_block): valid_mask = (north_block != -1) | (south_block != -1) out_matrix = numpy.empty(north_block.shape) out_matrix[:] = -1 out_matrix[valid_mask] = numpy.maximum(north_block[valid_mask], south_block[valid_mask]) return out_matrix LOGGER.info('Merging DEMs') pygeoprocessing.vectorize_datasets( dataset_uri_list=[north_dem, south_dem], dataset_pixel_op=_merge_dems, dataset_out_uri=joined_dem, datatype_out=gdal.GDT_Int16, nodata_out=-1.0, # We could calculate projected units by hand, but this is more convenient. pixel_size_out=30.0, bounding_box_mode='union', vectorize_op=False, aoi_uri=yosemite_vector, ) # Next we need to calculate the slope layer. LOGGER.info('Calculating slope') slope_raster = os.path.join(OUTPUT_DIR, 'slope.tif') pygeoprocessing.calculate_slope( dem_dataset_uri=joined_dem, slope_uri=slope_raster) # OK! Now we add it all together with a call to vectorize_datasets LOGGER.info('Finding high-elevation, steep grasslands')
def make_regression(worldview_folder, out_filename, deg=1, weights=None): """Calculate a regression between worldview DEMs within a folder. Note: Any pixel stacks that contain 1 or more nodata values will be excluded from the regression calculations. Additionally, this function assumes that all worldview rasters have a nodata value of ``0``. Parameters: worldview_folder (string): The path to a folder on disk containing GeoTiffs representing elevation data. Any files with a ``'.tif`` extension will be analyzed within this folder. There is no upper limit to the number of files that can be analyzed. out_filename (string): The path on disk to where the regression raster should be stored. If the file already exists on disk, it will be overwritten. deg=1 (int): The order of the regression. Passed directly to ``numpy.polyfit`` via the ``deg`` parameter. 1 represents linear regression. weights=None (``numpy.ndarray`` or ``None``): If None, the inputs will be unweighted in the regression. If an ``ndarray``, this array must a 1D array with the same length as there are files in ``worldview_folder``. Raises: ``ValueError``: When the length of ``weights`` does not equal the number of geotiffs found in ``worldview_folder``. Returns: ``None``""" if not os.path.exists(worldview_folder): raise IOError('Folder %s not found' % worldview_folder) rasters = sorted(glob.glob(worldview_folder + '/*.tif'), key=lambda x: int(os.path.basename(x).split('_')[0])) LOGGER.info('Using rasters %s', rasters) timesteps = [_date_from_filename(r) for r in rasters] timesteps = numpy.array([(d - timesteps[0]).days for d in timesteps]) LOGGER.info('Timesteps: %s' % timesteps) if weights and not (len(weights) == len(timesteps)): raise ValueError(('Weights length (%s) does not match timesteps ' 'length (%s)') % (len(weights), len(timesteps))) def _regression(*blocks): """Compute linear regression from a stack of DEM matrices. Note: Any pixel stacks that contain 1 or more values of 0 will have an output value of ``0``. Parameters: blocks (list): A list of 2D ``numpy.ndarray`` instances with pixel values from the stack of rasters passed to ``vectorize_datasets`` call. There is no upper limit to the number of timesteps that can be calculated. Returns: ``numpy.ndarray``, in 2 dimensions. This will contain the ``m`` parameter from the fitted line. """ stacked_array = numpy.dstack(blocks) new_shape = (stacked_array.shape[0] * stacked_array.shape[1], len(timesteps)) reshaped = numpy.swapaxes(numpy.reshape(stacked_array, new_shape), 0, 1) regression = numpy.polyfit(timesteps, reshaped, deg=deg, w=weights)[0] out_block = regression.reshape(blocks[0].shape) # Mask out any pixel stacks where there's a nodata value in the stack. # Out block is multiplied by 365.25 to convert m/day to m/year trend. return numpy.where( numpy.min(stacked_array, axis=2) == 0, 0, out_block * 365.25) raster_cell_sizes = [ pygeoprocessing.get_cell_size_from_uri(r) for r in rasters ] min_cell_size = min(raster_cell_sizes) if not len(set(raster_cell_sizes)) == 1: warnings.warn(('Cell sizes of input rasters do not all match. ' 'Using min pixelsize of %s. Mismatched values: %s') % (min_cell_size, set(raster_cell_sizes))) pygeoprocessing.vectorize_datasets(dataset_uri_list=rasters, dataset_pixel_op=_regression, dataset_out_uri=out_filename, datatype_out=gdal.GDT_Float32, nodata_out=0, pixel_size_out=min_cell_size, bounding_box_mode='intersection', vectorize_op=False, datasets_are_pre_aligned=False)
def execute(args): """InVEST Carbon Edge Model calculates the carbon due to edge effects in forest pixels. Parameters: args['workspace_dir'] (string): a uri to the directory that will write output and other temporary files during calculation. (required) args['results_suffix'] (string): a string to append to any output file name (optional) args['n_nearest_model_points'] (int): number of nearest neighbor model points to search for args['aoi_uri'] (string): (optional) if present, a path to a shapefile that will be used to aggregate carbon stock results at the end of the run. args['biophysical_table_uri'] (string): a path to a CSV table that has at least a header for an 'lucode', 'is_forest', and 'c_above'. 'lucode': an integer that corresponds to landcover codes in the raster args['lulc_uri'] 'is_forest': either 0 or 1 indicating whether the landcover type is forest (1) or not (0). If 1, the value in c_above is ignored and instead calculated from the edge regression model. 'c_above': floating point number indicating tons of carbon per hectare for that landcover type Example: lucode, is_forest, c_above 0,0,32.8 1,1,n/a 2,1,n/a 16,0,28.1 Note the "n/a" are optional since that field is ignored when is_forest==1. args['lulc_uri'] (string): path to a integer landcover code raster args['forest_edge_carbon_model_shape_uri'] (string): path to a shapefile that defines the regions for the local carbon edge models. Has at least the fields 'method', 'theta1', 'theta2', 'theta3'. Where 'method' is an int between 1..3 describing the biomass regression model, and the thetas are floating point numbers that have different meanings depending on the 'method' parameter. Specifically, method 1 asymptotic model: biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km) method 2 logarithmic model: biomass = theta1 + theta2 * numpy.log(edge_dist_km) (theta3 is ignored for this method) method 3 linear regression: biomass = theta1 + theta2 * edge_dist_km args['biomass_to_carbon_conversion_factor'] (string/float): Number by which to multiply forest biomass to convert to carbon in the edge effect calculation. Returns: None""" output_dir = args['workspace_dir'] intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') pygeoprocessing.create_directories([output_dir, intermediate_dir]) try: file_suffix = args['results_suffix'] if file_suffix != "" and not file_suffix.startswith('_'): file_suffix = '_' + file_suffix except KeyError: file_suffix = '' # used to keep track of files generated by this module output_file_registry = { 'non_forest_carbon_stocks': os.path.join(intermediate_dir, 'non_forest_carbon_stocks%s.tif' % file_suffix), 'edge_distance': os.path.join(intermediate_dir, 'edge_distance%s.tif' % file_suffix), 'forest_edge_carbon_map': os.path.join(intermediate_dir, 'forest_edge_carbon_stocks%s.tif' % file_suffix), 'carbon_map': os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix), 'aoi_datasource': os.path.join(output_dir, 'aggregated_carbon_stocks.shp') } # Map non-forest landcover codes to carbon biomasses LOGGER.info('calculating non-forest carbon') _calculate_lulc_carbon_map( args['lulc_uri'], args['biophysical_table_uri'], output_file_registry['non_forest_carbon_stocks']) # generate a map of pixel distance to forest edge from the landcover map LOGGER.info('calculating distance from forest edge') _map_distance_from_forest_edge(args['lulc_uri'], args['biophysical_table_uri'], output_file_registry['edge_distance']) # Build spatial index for gridded global model for closest 3 points LOGGER.info('Building spatial index for forest edge models.') kd_tree, theta_model_parameters, method_model_parameter = ( _build_spatial_index(args['lulc_uri'], intermediate_dir, args['forest_edge_carbon_model_shape_uri'])) # calculate the edge carbon effect on forests LOGGER.info('calculating forest edge carbon') _calculate_forest_edge_carbon_map( output_file_registry['edge_distance'], kd_tree, theta_model_parameters, method_model_parameter, int(args['n_nearest_model_points']), float(args['biomass_to_carbon_conversion_factor']), output_file_registry['forest_edge_carbon_map']) # combine maps into output LOGGER.info('combining forest and non forest carbon into single raster') cell_size_in_meters = pygeoprocessing.get_cell_size_from_uri( args['lulc_uri']) carbon_edge_nodata = pygeoprocessing.get_nodata_from_uri( output_file_registry['forest_edge_carbon_map']) def combine_carbon_maps(non_forest_carbon, forest_carbon): """This combines the forest and non forest maps into one""" return numpy.where(forest_carbon == carbon_edge_nodata, non_forest_carbon, forest_carbon) pygeoprocessing.vectorize_datasets([ output_file_registry['non_forest_carbon_stocks'], output_file_registry['forest_edge_carbon_map'] ], combine_carbon_maps, output_file_registry['carbon_map'], gdal.GDT_Float32, carbon_edge_nodata, cell_size_in_meters, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) # generate report (optional) by aoi if they exist if 'aoi_uri' in args: LOGGER.info('aggregating carbon map by aoi') _aggregate_carbon_map(args['aoi_uri'], output_file_registry['carbon_map'], output_file_registry['aoi_datasource'])