def _compare_with_make_stack(stack_trend_file, pgp_trend_file, diff_file): """Compare trend of ``make_regression`` with trend from ``make_stack.py``. Comparison is done as a per-pixel diff on any pixel pairs where both pixels are not nodata. If either pixel in a pixel stack is nodata, the stack is ignored and nodata is returned for that pixel value. The diff looks like this:: diff_file = stack_trend_file - pgp_trend_file Parameters: stack_trend_file (string): The path to the trend raster output of ``make_stack.py`` (usually named ``stack_trend.tif``). This file must exist on disk. pgp_trend_file (string): The path to the trend raster output from ``make_regression()``, also in this module. This file must exist on disk. diff_file (string): The path to where the difference raster should be saved. Returns: ``None``""" stack_nodata = pygeoprocessing.get_nodata_from_uri(stack_trend_file) pgp_nodata = pygeoprocessing.get_nodata_from_uri(pgp_trend_file) def _diff(stack_trend, pgp_trend): """Calculate a diff between two matrices, ignoring nodata. Parameters: stack_trend (numpy.ndarray): Array of values from the stack trend raster. pgp_trend (numpy.ndarray): Array of values from the pygeoprocessing trend raster. Returns: ``numpy.ndarray`` of the difference between ``stack_trend`` and ``pgp_trend``""" valid_mask = ((stack_trend != stack_nodata) & (pgp_trend != pgp_nodata)) out_array = numpy.empty_like(stack_trend) out_array[:] = -9999 out_array[valid_mask] = stack_trend[valid_mask] - pgp_trend[valid_mask] return out_array pygeoprocessing.vectorize_datasets( dataset_uri_list=[stack_trend_file, pgp_trend_file], dataset_pixel_op=_diff, dataset_out_uri=diff_file, datatype_out=gdal.GDT_Float32, nodata_out=-9999, pixel_size_out=32., bounding_box_mode='intersection', vectorize_op=False, datasets_are_pre_aligned=False)
def main(): src_ds = 'landuse_cur_200m.tif' dest_ds = 'gaussian.tif' nodata = pygeoprocessing.get_nodata_from_uri(src_ds) pygeoprocessing.gaussian_filter_dataset_uri( src_ds, 4, dest_ds, nodata)
def _map_distance_from_forest_edge(lulc_uri, biophysical_table_uri, edge_distance_uri): """Generates a raster of forest edge distances where each pixel is the distance to the edge of the forest in meters. Parameters: lulc_uri (string): path to the landcover raster that contains integer landcover codes biophysical_table_uri (string): a path to a csv table that indexes landcover codes to forest type, contains at least the fields 'lucode' (landcover integer code) and 'is_forest' (0 or 1 depending on landcover code type) edge_distance_uri (string): path to output raster where each pixel contains the euclidian pixel distance to nearest forest edges on all non-nodata values of lulc_uri Returns: None""" # Build a list of forest lucodes biophysical_table = pygeoprocessing.get_lookup_from_table( biophysical_table_uri, 'lucode') forest_codes = [ lucode for (lucode, ludata) in biophysical_table.iteritems() if int(ludata['is_forest']) == 1 ] # Make a raster where 1 is non-forest landcover types and 0 is forest forest_mask_nodata = 255 lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri) def mask_non_forest_op(lulc_array): """converts forest lulc codes to 1""" non_forest_mask = ~numpy.in1d(lulc_array.flatten(), forest_codes).reshape(lulc_array.shape) nodata_mask = lulc_array == lulc_nodata return numpy.where(nodata_mask, forest_mask_nodata, non_forest_mask) non_forest_mask_uri = pygeoprocessing.temporary_filename() out_pixel_size = pygeoprocessing.get_cell_size_from_uri(lulc_uri) pygeoprocessing.vectorize_datasets([lulc_uri], mask_non_forest_op, non_forest_mask_uri, gdal.GDT_Byte, forest_mask_nodata, out_pixel_size, "intersection", vectorize_op=False) # Do the distance transform on non-forest pixels pygeoprocessing.distance_transform_edt(non_forest_mask_uri, edge_distance_uri) # good practice to delete temporary files when we're done with them os.remove(non_forest_mask_uri)
def main(): system = platform.platform() logfile_uri = 'md5_check_%s.log' % system logfile = open(logfile_uri, 'w') _write = lambda x: logfile.write(x + '\n') for base_raster in ['landuse_cur_200m.tif', 'gaussian.tif']: if base_raster == 'gaussian.tif': src_ds = 'landuse_cur_200m.tif' dest_ds = 'gaussian.tif' nodata = pygeoprocessing.get_nodata_from_uri(src_ds) pygeoprocessing.gaussian_filter_dataset_uri( src_ds, 4, dest_ds, nodata) _write(base_raster + '\n') base_nodata = pygeoprocessing.get_nodata_from_uri(base_raster) base_pixel_size = pygeoprocessing.get_cell_size_from_uri(base_raster) _write('System: %s' % system) _write('Python %s' % platform.python_version()) _write('GDAL version: %s' % gdal.__version__) _write('numpy version: %s' % numpy.__version__) _write('scipy version: %s' % scipy.__version__) _write('base MD5sum: %s' % md5sum(base_raster)) for gdal_type, gdal_type_label in GDAL_DTYPES.iteritems(): if gdal_type_label in ['GDT_Unknown', 'GDT_TypeCount']: continue print gdal_type_label # convert the raster (via vectorize_datasets) to a new dtype new_uri = '%s.tif' % gdal_type_label pygeoprocessing.vectorize_datasets([base_raster], lambda x: x, new_uri, gdal_type, base_nodata, base_pixel_size, 'intersection') _write("%-15s: %s" % (gdal_type_label, md5sum(new_uri))) _write('\n')
def test_raster_nodata_notset(self): """When nodata=None, a nodata value should not be set.""" from pygeoprocessing.testing import create_raster_on_disk from pygeoprocessing.testing.sampledata import SRS_WILLAMETTE pixels = [numpy.array([[0]])] nodata = None reference = SRS_WILLAMETTE filename = pygeoprocessing.temporary_filename() create_raster_on_disk( pixels, reference.origin, reference.projection, nodata, reference.pixel_size(30), datatype='auto', filename=filename) set_nodata_value = pygeoprocessing.get_nodata_from_uri(filename) self.assertEqual(set_nodata_value, None)
def test_get_nodata(self): """PGP.geoprocessing: Test nodata values get set and read.""" pixel_matrix = numpy.ones((5, 5), numpy.int16) reference = sampledata.SRS_COLOMBIA for nodata in [5, 10, -5, 9999]: pygeoprocessing.testing.create_raster_on_disk( [pixel_matrix], reference.origin, reference.projection, nodata, reference.pixel_size(30), filename=self.raster_filename) raster_nodata = pygeoprocessing.get_nodata_from_uri( self.raster_filename) self.assertEqual(raster_nodata, nodata)
def _sort_to_disk(dataset_uri, score_weight=1.0): """Return an iterable of non-nodata pixels in sorted order. Parameters: dataset_uri (string): a path to a floating point GDAL dataset score_weight (float): a number to multiply all values by, which can be used to reverse the order of the iteration if negative. Returns: an iterable that produces (value * score_weight, flat_index) in decreasing sorted order by value * score_weight """ def _read_score_index_from_disk( score_file_path, index_file_path): """Generator to yield a float/int value from the given filenames. Reads a buffer of `buffer_size` big before to avoid keeping the file open between generations. score_file_path (string): a path to a file that has 32 bit floats packed consecutively index_file_path (string): a path to a file that has 32 bit ints packed consecutively Yields: next (score, index) tuple in the given score and index files. """ try: score_buffer = '' index_buffer = '' file_offset = 0 buffer_offset = 0 # initialize to 0 to trigger the first load # ensure buffer size that is not a perfect multiple of 4 read_buffer_size = int(math.sqrt(_BLOCK_SIZE)) read_buffer_size = read_buffer_size - read_buffer_size % 4 while True: if buffer_offset == len(score_buffer): score_file = open(score_file_path, 'rb') index_file = open(index_file_path, 'rb') score_file.seek(file_offset) index_file.seek(file_offset) score_buffer = score_file.read(read_buffer_size) index_buffer = index_file.read(read_buffer_size) score_file.close() index_file.close() file_offset += read_buffer_size buffer_offset = 0 packed_score = score_buffer[buffer_offset:buffer_offset+4] packed_index = index_buffer[buffer_offset:buffer_offset+4] buffer_offset += 4 if not packed_score: break yield (struct.unpack('f', packed_score)[0], struct.unpack('i', packed_index)[0]) finally: # deletes the files when generator goes out of scope or ends os.remove(score_file_path) os.remove(index_file_path) def _sort_cache_to_iterator( index_cache, score_cache): """Flushe the current cache to a heap and return it. Parameters: index_cache (1d numpy.array): contains flat indexes to the score pixels `score_cache` score_cache (1d numpy.array): contains score pixels Returns: Iterable to visit scores/indexes in increasing score order. """ # sort the whole bunch to disk score_file = tempfile.NamedTemporaryFile(delete=False) index_file = tempfile.NamedTemporaryFile(delete=False) sort_index = score_cache.argsort() score_cache = score_cache[sort_index] index_cache = index_cache[sort_index] for index in xrange(0, score_cache.size, _LARGEST_STRUCT_PACK): score_block = score_cache[index:index+_LARGEST_STRUCT_PACK] index_block = index_cache[index:index+_LARGEST_STRUCT_PACK] score_file.write( struct.pack('%sf' % score_block.size, *score_block)) index_file.write( struct.pack('%si' % index_block.size, *index_block)) score_file_path = score_file.name index_file_path = index_file.name score_file.close() index_file.close() return _read_score_index_from_disk(score_file_path, index_file_path) nodata = pygeoprocessing.get_nodata_from_uri(dataset_uri) nodata *= score_weight # scale the nodata so they can be filtered out # This will be a list of file iterators we'll pass to heap.merge iters = [] _, n_cols = pygeoprocessing.get_row_col_from_uri(dataset_uri) for scores_data, scores_block in pygeoprocessing.iterblocks( dataset_uri, largest_block=_BLOCK_SIZE): # flatten and scale the results scores_block = scores_block.flatten() * score_weight col_coords, row_coords = numpy.meshgrid( xrange(scores_data['xoff'], scores_data['xoff'] + scores_data['win_xsize']), xrange(scores_data['yoff'], scores_data['yoff'] + scores_data['win_ysize'])) flat_indexes = (col_coords + row_coords * n_cols).flatten() sort_index = scores_block.argsort() sorted_scores = scores_block[sort_index] sorted_indexes = flat_indexes[sort_index] # search for nodata values are so we can splice them out left_index = numpy.searchsorted(sorted_scores, nodata, side='left') right_index = numpy.searchsorted( sorted_scores, nodata, side='right') # remove nodata values score_cache = numpy.concatenate( (sorted_scores[0:left_index], sorted_scores[right_index::])) index_cache = numpy.concatenate( (sorted_indexes[0:left_index], sorted_indexes[right_index::])) iters.append(_sort_cache_to_iterator(index_cache, score_cache)) return heapq.merge(*iters)
def _convert_landscape( base_lulc_uri, replacement_lucode, area_to_convert, focal_landcover_codes, convertible_type_list, score_weight, n_steps, smooth_distance_from_edge_uri, output_landscape_raster_uri, stats_uri): """Expand replacement lucodes in relation to the focal lucodes. If the sign on `score_weight` is positive, expansion occurs marches away from the focal types, while if `score_weight` is negative conversion marches toward the focal types. Parameters: base_lulc_uri (string): path to landcover raster that will be used as the base landcover map to agriculture pixels replacement_lucode (int): agriculture landcover code type found in the raster at `base_lulc_uri` area_to_convert (float): area (Ha) to convert to agriculture focal_landcover_codes (list of int): landcover codes that are used to calculate proximity convertible_type_list (list of int): landcover codes that are allowable to be converted to agriculture score_weight (float): this value is used to multiply the distance from the focal landcover types when prioritizing which pixels in `convertable_type_list` are to be converted. If negative, conversion occurs toward the focal types, if positive occurs away from the focal types. n_steps (int): number of steps to convert the landscape. On each step the distance transform will be applied on the current value of the `focal_landcover_codes` pixels in `output_landscape_raster_uri`. On the first step the distance is calculated from `base_lulc_uri`. smooth_distance_from_edge_uri (string): an intermediate output showing the pixel distance from the edge of the base landcover types output_landscape_raster_uri (string): an output raster that will contain the final fragmented forest layer. stats_uri (string): a path to an output csv that records the number type, and area of pixels converted in `output_landscape_raster_uri` Returns: None. """ tmp_file_registry = { 'non_base_mask': pygeoprocessing.temporary_filename(), 'base_mask': pygeoprocessing.temporary_filename(), 'gaussian_kernel': pygeoprocessing.temporary_filename(), 'distance_from_base_mask_edge': pygeoprocessing.temporary_filename(), 'distance_from_non_base_mask_edge': pygeoprocessing.temporary_filename(), 'convertible_distances': pygeoprocessing.temporary_filename(), 'smooth_distance_from_edge': pygeoprocessing.temporary_filename(), 'distance_from_edge': pygeoprocessing.temporary_filename(), } # a sigma of 1.0 gives nice visual results to smooth pixel level artifacts # since a pixel is the 1.0 unit _make_gaussian_kernel_uri(1.0, tmp_file_registry['gaussian_kernel']) # create the output raster first as a copy of the base landcover so it can # be looped on for each step lulc_nodata = pygeoprocessing.get_nodata_from_uri(base_lulc_uri) pixel_size_out = pygeoprocessing.get_cell_size_from_uri(base_lulc_uri) mask_nodata = 2 pygeoprocessing.vectorize_datasets( [base_lulc_uri], lambda x: x, output_landscape_raster_uri, gdal.GDT_Int32, lulc_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) # convert everything furthest from edge for each of n_steps pixel_area_ha = ( pygeoprocessing.get_cell_size_from_uri(base_lulc_uri)**2 / 10000.0) max_pixels_to_convert = int(math.ceil(area_to_convert / pixel_area_ha)) convertible_type_nodata = -1 pixels_left_to_convert = max_pixels_to_convert pixels_to_convert = max_pixels_to_convert / n_steps stats_cache = collections.defaultdict(int) # pylint complains when these are defined inside the loop invert_mask = None distance_nodata = None for step_index in xrange(n_steps): LOGGER.info('step %d of %d', step_index+1, n_steps) pixels_left_to_convert -= pixels_to_convert # Often the last segement of the steps will overstep the number of # pixels to convert, this check converts the exact amount if pixels_left_to_convert < 0: pixels_to_convert += pixels_left_to_convert # create distance transforms for inside and outside the base lulc codes LOGGER.info('create distance transform for current landcover') for invert_mask, mask_id, distance_id in [ (False, 'non_base_mask', 'distance_from_non_base_mask_edge'), (True, 'base_mask', 'distance_from_base_mask_edge')]: def _mask_base_op(lulc_array): """Create a mask of valid non-base pixels only.""" base_mask = numpy.in1d( lulc_array.flatten(), focal_landcover_codes).reshape( lulc_array.shape) if invert_mask: base_mask = ~base_mask return numpy.where( lulc_array == lulc_nodata, mask_nodata, base_mask) pygeoprocessing.vectorize_datasets( [output_landscape_raster_uri], _mask_base_op, tmp_file_registry[mask_id], gdal.GDT_Byte, mask_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) # create distance transform for the current mask pygeoprocessing.distance_transform_edt( tmp_file_registry[mask_id], tmp_file_registry[distance_id]) # combine inner and outer distance transforms into one distance_nodata = pygeoprocessing.get_nodata_from_uri( tmp_file_registry['distance_from_base_mask_edge']) def _combine_masks(base_distance_array, non_base_distance_array): """create a mask of valid non-base pixels only.""" result = non_base_distance_array valid_base_mask = base_distance_array > 0.0 result[valid_base_mask] = base_distance_array[valid_base_mask] return result pygeoprocessing.vectorize_datasets( [tmp_file_registry['distance_from_base_mask_edge'], tmp_file_registry['distance_from_non_base_mask_edge']], _combine_masks, tmp_file_registry['distance_from_edge'], gdal.GDT_Float32, distance_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) # smooth the distance transform to avoid scanline artifacts pygeoprocessing.convolve_2d_uri( tmp_file_registry['distance_from_edge'], tmp_file_registry['gaussian_kernel'], smooth_distance_from_edge_uri) # turn inside and outside masks into a single mask def _mask_to_convertible_codes(distance_from_base_edge, lulc): """Mask out the distance transform to a set of lucodes.""" convertible_mask = numpy.in1d( lulc.flatten(), convertible_type_list).reshape(lulc.shape) return numpy.where( convertible_mask, distance_from_base_edge, convertible_type_nodata) pygeoprocessing.vectorize_datasets( [smooth_distance_from_edge_uri, output_landscape_raster_uri], _mask_to_convertible_codes, tmp_file_registry['convertible_distances'], gdal.GDT_Float32, convertible_type_nodata, pixel_size_out, "intersection", vectorize_op=False, datasets_are_pre_aligned=True) LOGGER.info( 'convert %d pixels to lucode %d', pixels_to_convert, replacement_lucode) _convert_by_score( tmp_file_registry['convertible_distances'], pixels_to_convert, output_landscape_raster_uri, replacement_lucode, stats_cache, score_weight) _log_stats(stats_cache, pixel_area_ha, stats_uri) for filename in tmp_file_registry.values(): os.remove(filename)
def execute(args): """This function invokes the seasonal water yield model given URI inputs of files. It may write log, warning, or error messages to stdout. """ alpha_m = float(fractions.Fraction(args['alpha_m'])) beta_i = float(fractions.Fraction(args['beta_i'])) gamma = float(fractions.Fraction(args['gamma'])) try: file_suffix = args['results_suffix'] if file_suffix != "" and not file_suffix.startswith('_'): file_suffix = '_' + file_suffix except KeyError: file_suffix = '' pygeoprocessing.geoprocessing.create_directories([args['workspace_dir']]) qfi_uri = os.path.join(args['workspace_dir'], 'qf%s.tif' % file_suffix) cn_uri = os.path.join(args['workspace_dir'], 'cn%s.tif' % file_suffix) lulc_uri_aligned = pygeoprocessing.temporary_filename() dem_uri_aligned = pygeoprocessing.temporary_filename() pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri( args['lulc_uri']) LOGGER.info('Aligning and clipping dataset list') input_align_list = [args['lulc_uri'], args['dem_uri']] output_align_list = [lulc_uri_aligned, dem_uri_aligned] if not args['user_defined_recharge']: precip_uri_list = [] et0_uri_list = [] et0_dir_list = [ os.path.join(args['et0_dir'], f) for f in os.listdir(args['et0_dir'])] precip_dir_list = [ os.path.join(args['precip_dir'], f) for f in os.listdir( args['precip_dir'])] qf_monthly_uri_list = [] for m_index in range(1, N_MONTHS + 1): qf_monthly_uri_list.append( os.path.join( args['workspace_dir'], 'qf_%d%s.tif' % (m_index, file_suffix))) for month_index in range(1, N_MONTHS + 1): month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index) for data_type, dir_list, uri_list in [ ('et0', et0_dir_list, et0_uri_list), ('Precip', precip_dir_list, precip_uri_list)]: file_list = [x for x in dir_list if month_file_match.match(x)] if len(file_list) == 0: raise ValueError( "No %s found for month %d" % (data_type, month_index)) if len(file_list) > 1: raise ValueError( "Ambiguous set of files found for month %d: %s" % (month_index, file_list)) uri_list.append(file_list[0]) soil_group_uri_aligned = pygeoprocessing.temporary_filename() #pre align all the datasets precip_uri_aligned_list = [ pygeoprocessing.geoprocessing.temporary_filename() for _ in range(len(precip_uri_list))] et0_uri_aligned_list = [ pygeoprocessing.geoprocessing.temporary_filename() for _ in range(len(precip_uri_list))] input_align_list = ( precip_uri_list + [args['soil_group_uri']] + et0_uri_list + input_align_list) output_align_list = ( precip_uri_aligned_list + [soil_group_uri_aligned] + et0_uri_aligned_list + output_align_list) interpolate_list = ['nearest'] * len(input_align_list) align_index = 0 if args['user_defined_recharge']: input_align_list.append(args['recharge_uri']) recharge_aligned_uri = ( pygeoprocessing.geoprocessing.temporary_filename()) output_align_list.append(recharge_aligned_uri) interpolate_list.append('nearest') align_index = len(interpolate_list) - 1 pygeoprocessing.geoprocessing.align_dataset_list( input_align_list, output_align_list, interpolate_list, pixel_size, 'intersection', align_index, aoi_uri=args['aoi_uri'], assert_datasets_projected=True) flow_dir_uri = os.path.join( args['workspace_dir'], 'flow_dir%s.tif' % file_suffix) LOGGER.info('calc flow direction') pygeoprocessing.routing.flow_direction_d_inf(dem_uri_aligned, flow_dir_uri) flow_accum_uri = os.path.join( args['workspace_dir'], 'flow_accum%s.tif' % file_suffix) LOGGER.info('calc flow accumulation') pygeoprocessing.routing.flow_accumulation( flow_dir_uri, dem_uri_aligned, flow_accum_uri) stream_uri = os.path.join( args['workspace_dir'], 'stream%s.tif' % file_suffix) threshold_flow_accumulation = 1000 pygeoprocessing.routing.stream_threshold( flow_accum_uri, threshold_flow_accumulation, stream_uri) LOGGER.info('calculating flow weights') outflow_weights_uri = os.path.join( args['workspace_dir'], 'outflow_weights%s.tif' % file_suffix) outflow_direction_uri = os.path.join( args['workspace_dir'], 'outflow_direction%s.tif' % file_suffix) seasonal_water_yield_core.calculate_flow_weights( flow_dir_uri, outflow_weights_uri, outflow_direction_uri) si_uri = os.path.join(args['workspace_dir'], 'si%s.tif' % file_suffix) biophysical_table = pygeoprocessing.geoprocessing.get_lookup_from_table( args['biophysical_table_uri'], 'lucode') kc_lookup = dict([ (lucode, biophysical_table[lucode]['kc']) for lucode in biophysical_table]) recharge_avail_uri = os.path.join( args['workspace_dir'], 'recharge_avail%s.tif' % file_suffix) r_sum_avail_uri = os.path.join( args['workspace_dir'], 'r_sum_avail%s.tif' % file_suffix) vri_uri = os.path.join(args['workspace_dir'], 'vri%s.tif' % file_suffix) aet_uri = os.path.join(args['workspace_dir'], 'aet%s.tif' % file_suffix) r_sum_avail_pour_uri = os.path.join( args['workspace_dir'], 'r_sum_avail_pour%s.tif' % file_suffix) sf_uri = os.path.join( args['workspace_dir'], 'sf%s.tif' % file_suffix) sf_down_uri = os.path.join( args['workspace_dir'], 'sf_down%s.tif' % file_suffix) qb_out_uri = os.path.join( args['workspace_dir'], 'qb%s.txt' % file_suffix) LOGGER.info('classifying kc') kc_uri = os.path.join(args['workspace_dir'], 'kc%s.tif' % file_suffix) pygeoprocessing.geoprocessing.reclassify_dataset_uri( lulc_uri_aligned, kc_lookup, kc_uri, gdal.GDT_Float32, -1) LOGGER.info('calculate slow flow') if not args['user_defined_recharge']: LOGGER.info('loading number of monthly events') rain_events_lookup = ( pygeoprocessing.geoprocessing.get_lookup_from_table( args['rain_events_table_uri'], 'month')) n_events = dict([ (month, rain_events_lookup[month]['events']) for month in rain_events_lookup]) LOGGER.info('calculating curve number') soil_nodata = pygeoprocessing.get_nodata_from_uri( args['soil_group_uri']) map_soil_type_to_header = { 1: 'cn_a', 2: 'cn_b', 3: 'cn_c', 4: 'cn_d', } cn_nodata = -1 lulc_to_soil = {} lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_uri_aligned) for soil_id, soil_column in map_soil_type_to_header.iteritems(): lulc_to_soil[soil_id] = { 'lulc_values': [], 'cn_values': [] } for lucode in sorted(biophysical_table.keys() + [lulc_nodata]): try: lulc_to_soil[soil_id]['cn_values'].append( biophysical_table[lucode][soil_column]) lulc_to_soil[soil_id]['lulc_values'].append(lucode) except KeyError: if lucode == lulc_nodata: lulc_to_soil[soil_id]['lulc_values'].append(lucode) lulc_to_soil[soil_id]['cn_values'].append(cn_nodata) else: raise lulc_to_soil[soil_id]['lulc_values'] = ( numpy.array(lulc_to_soil[soil_id]['lulc_values'], dtype=numpy.int32)) lulc_to_soil[soil_id]['cn_values'] = ( numpy.array(lulc_to_soil[soil_id]['cn_values'], dtype=numpy.float32)) def cn_op(lulc_array, soil_group_array): """map lulc code and soil to a curve number""" cn_result = numpy.empty(lulc_array.shape) cn_result[:] = cn_nodata for soil_group_id in numpy.unique(soil_group_array): if soil_group_id == soil_nodata: continue current_soil_mask = (soil_group_array == soil_group_id) index = numpy.digitize( lulc_array.ravel(), lulc_to_soil[soil_group_id]['lulc_values'], right=True) cn_values = ( lulc_to_soil[soil_group_id]['cn_values'][index]).reshape( lulc_array.shape) cn_result[current_soil_mask] = cn_values[current_soil_mask] return cn_result cn_nodata = -1 pygeoprocessing.vectorize_datasets( [lulc_uri_aligned, soil_group_uri_aligned], cn_op, cn_uri, gdal.GDT_Float32, cn_nodata, pixel_size, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) LOGGER.info('calculate quick flow') calculate_quick_flow( precip_uri_aligned_list, lulc_uri_aligned, cn_uri, n_events, stream_uri, qfi_uri, qf_monthly_uri_list, si_uri) recharge_uri = os.path.join( args['workspace_dir'], 'recharge%s.tif' % file_suffix) seasonal_water_yield_core.calculate_recharge( precip_uri_aligned_list, et0_uri_aligned_list, qf_monthly_uri_list, flow_dir_uri, outflow_weights_uri, outflow_direction_uri, dem_uri_aligned, lulc_uri_aligned, kc_lookup, alpha_m, beta_i, gamma, stream_uri, recharge_uri, recharge_avail_uri, r_sum_avail_uri, aet_uri, kc_uri) else: recharge_uri = recharge_aligned_uri recharge_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri( recharge_uri) def calc_recharge_avail(recharge_array): recharge_threshold = recharge_array * gamma recharge_threshold[recharge_threshold < 0] = 0.0 return numpy.where( recharge_array != recharge_nodata, recharge_threshold, recharge_nodata) #calc recharge avail pygeoprocessing.geoprocessing.vectorize_datasets( [recharge_aligned_uri], calc_recharge_avail, recharge_avail_uri, gdal.GDT_Float32, recharge_nodata, pixel_size, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) #calc r_sum_avail with flux accumulation loss_uri = pygeoprocessing.geoprocessing.temporary_filename() zero_absorption_source_uri = ( pygeoprocessing.geoprocessing.temporary_filename()) pygeoprocessing.make_constant_raster_from_base_uri( dem_uri_aligned, 0.0, zero_absorption_source_uri) pygeoprocessing.routing.route_flux( flow_dir_uri, dem_uri_aligned, recharge_avail_uri, zero_absorption_source_uri, loss_uri, r_sum_avail_uri, 'flux_only', include_source=False) #calcualte Qb as the sum of recharge_avail over the aoi qb_results = pygeoprocessing.geoprocessing.aggregate_raster_values_uri( recharge_avail_uri, args['aoi_uri']) qb_result = qb_results.total[9999] / qb_results.n_pixels[9999] #9999 is the value used to index fields if no shapefile ID is provided qb_file = open(qb_out_uri, 'w') qb_file.write("%f\n" % qb_result) qb_file.close() LOGGER.info("Qb = %f", qb_result) pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri( recharge_uri) ri_nodata = pygeoprocessing.geoprocessing.get_nodata_from_uri(recharge_uri) def vri_op(ri_array): """calc vri index""" return numpy.where( ri_array != ri_nodata, ri_array / qb_result, ri_nodata) pygeoprocessing.geoprocessing.vectorize_datasets( [recharge_uri], vri_op, vri_uri, gdal.GDT_Float32, ri_nodata, pixel_size, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) LOGGER.info('calculating r_sum_avail_pour') seasonal_water_yield_core.calculate_r_sum_avail_pour( r_sum_avail_uri, outflow_weights_uri, outflow_direction_uri, r_sum_avail_pour_uri) LOGGER.info('calculating slow flow') print dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri,\ r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri,\ stream_uri, sf_uri, sf_down_uri seasonal_water_yield_core.route_sf( dem_uri_aligned, recharge_avail_uri, r_sum_avail_uri, r_sum_avail_pour_uri, outflow_direction_uri, outflow_weights_uri, stream_uri, sf_uri, sf_down_uri) LOGGER.info(' (\\w/) SWY Complete!') LOGGER.info(' (.. \\ ') LOGGER.info(' _/ ) \\______') LOGGER.info('(oo /\'\\ )`,') LOGGER.info(' `--\' (v __( / ||') LOGGER.info(' ||| ||| ||') LOGGER.info(' //_| //_|')
def _calc_cost_of_per_hectare_inputs(vars_dict, crop, lulc_raster): ''' CostPerHectareInputTotal_crop = Mask_raster * CostPerHectare_input * ha_per_cell ''' # Determine the crop lucode based on its name crop_lucode = None for lucode, luname in vars_dict['crop_lookup_dict'].iteritems(): if luname == crop: crop_lucode = lucode continue lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc_raster.uri) economics_table_crop = vars_dict['economics_table_dict'][crop] datatype_out = gdal.GDT_Float32 nodata_out = NODATA_FLOAT pixel_size_out = pygeoprocessing.get_cell_size_from_uri(lulc_raster.uri) ha_per_m2 = 0.0001 cell_area_ha = pixel_size_out**2 * ha_per_m2 # The scalar cost is identical for all crop pixels of the current class, # and is based on the presence of absence of columns in the user-provided # economics table. We only need to calculate this once. cost_scalar = 0.0 for key in ['cost_labor_per_ha', 'cost_machine_per_ha', 'cost_seed_per_ha', 'cost_irrigation_per_ha']: try: cost_scalar += (economics_table_crop[key] * cell_area_ha) except KeyError: LOGGER.warning('Key missing from economics table: %s', key) def _calculate_cost(lulc_matrix): """ Calculate the total cost on a single pixel. <pseudocode> If lulc_pixel is nodata: return nodata else: if lulc_pixel is of our crop type: return the cost of this crop (in cost_scalar, above) else: return 0.0 </pseudocode> """ return np.where(lulc_matrix == lulc_nodata, nodata_out, np.where(lulc_matrix == crop_lucode, cost_scalar, 0.0)) new_raster_uri = pygeoprocessing.geoprocessing.temporary_filename() pygeoprocessing.vectorize_datasets( [lulc_raster.uri], _calculate_cost, new_raster_uri, datatype_out, nodata_out, pixel_size_out, bounding_box_mode='intersection', vectorize_op=False, datasets_are_pre_aligned=True ) return Raster.from_file(new_raster_uri, 'GTiff')
def execute(args): """Main entry point for GLOBIO model. The model operates in two modes. Mode (a) generates a landcover map based on a base landcover map and information about crop yields, infrastructure, and more. Mode (b) assumes the globio landcover map is generated. These modes are used below to describe input parameters. args['workspace_dir'] - (string) output directory for intermediate, temporary, and final files args['predefined_globio'] - (boolean) if True then "mode (b)" else "mode (a)" args['results_suffix'] - (optional) (string) string to append to any output files args['lulc_uri'] - (string) used in "mode (a)" path to a base landcover map with integer codes args['lulc_to_globio_table_uri'] - (string) used in "mode (a)" path to table that translates the land-cover args['lulc_uri'] to intermediate GLOBIO classes, from which they will be further differentiated using the additional data in the model. 'lucode': Land use and land cover class code of the dataset used. LULC codes match the 'values' column in the LULC raster of mode (b) and must be numeric and unique. 'globio_lucode': The LULC code corresponding to the GLOBIO class to which it should be converted, using intermediate codes described in the example below. args['infrastructure_dir'] - (string) used in "mode (a)" a path to a folder containing maps of any forms of infrastructure to consider in the calculation of MSAI. These data may be in either raster or vector format. args['pasture_uri'] - (string) used in "mode (a)" path to pasture raster args['potential_vegetation_uri'] - (string) used in "mode (a)" path to potential vegetation raster args['intensification_uri'] - (string) used in "mode (a)" a path to intensification raster args['pasture_threshold'] - (float) used in "mode (a)" args['intensification_threshold'] - (float) used in "mode (a)" args['primary_threshold'] - (float) used in "mode (a)" args['msa_parameters_uri'] - (string) path to MSA classification parameters args['aoi_uri'] - (string) (optional) if it exists then final MSA raster is summarized by AOI args['globio_lulc_uri'] - (string) used in "mode (b)" path to predefined globio raster. """ msa_parameter_table = load_msa_parameter_table(args['msa_parameters_uri']) #append a _ to the suffix if it's not empty and doens't already have one try: file_suffix = args['results_suffix'] if file_suffix != "" and not file_suffix.startswith('_'): file_suffix = '_' + file_suffix except KeyError: file_suffix = '' #create working directories output_dir = os.path.join(args['workspace_dir'], 'output') intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate') tmp_dir = os.path.join(args['workspace_dir'], 'tmp') pygeoprocessing.geoprocessing.create_directories( [output_dir, intermediate_dir, tmp_dir]) #the cell size should be based on the landcover map if not args['predefined_globio']: out_pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri( args['lulc_uri']) globio_lulc_uri = _calculate_globio_lulc_map(args, file_suffix, intermediate_dir, tmp_dir, out_pixel_size) else: out_pixel_size = pygeoprocessing.geoprocessing.get_cell_size_from_uri( args['globio_lulc_uri']) LOGGER.info('no need to calcualte GLOBIO LULC because it is passed in') globio_lulc_uri = args['globio_lulc_uri'] globio_nodata = pygeoprocessing.get_nodata_from_uri(globio_lulc_uri) #load the infrastructure layers from disk infrastructure_filenames = [] infrastructure_nodata_list = [] for root_directory, _, filename_list in os.walk( args['infrastructure_dir']): for filename in filename_list: if filename.lower().endswith(".tif"): infrastructure_filenames.append( os.path.join(root_directory, filename)) infrastructure_nodata_list.append( pygeoprocessing.geoprocessing.get_nodata_from_uri( infrastructure_filenames[-1])) if filename.lower().endswith(".shp"): infrastructure_tmp_raster = (os.path.join( tmp_dir, os.path.basename(filename.lower() + ".tif"))) pygeoprocessing.geoprocessing.new_raster_from_base_uri( globio_lulc_uri, infrastructure_tmp_raster, 'GTiff', -1.0, gdal.GDT_Int32, fill_value=0) pygeoprocessing.geoprocessing.rasterize_layer_uri( infrastructure_tmp_raster, os.path.join(root_directory, filename), burn_values=[1], option_list=["ALL_TOUCHED=TRUE"]) infrastructure_filenames.append(infrastructure_tmp_raster) infrastructure_nodata_list.append( pygeoprocessing.geoprocessing.get_nodata_from_uri( infrastructure_filenames[-1])) if len(infrastructure_filenames) == 0: raise ValueError( "infrastructure directory didn't have any GeoTIFFS or " "Shapefiles at %s", args['infrastructure_dir']) infrastructure_nodata = -1 infrastructure_uri = os.path.join( intermediate_dir, 'combined_infrastructure%s.tif' % file_suffix) def _collapse_infrastructure_op(*infrastructure_array_list): """Combines all input infrastructure into a single map where if any pixel on the stack is 1 gets passed through, any nodata pixel masks out all of them""" nodata_mask = ( infrastructure_array_list[0] == infrastructure_nodata_list[0]) infrastructure_result = infrastructure_array_list[0] > 0 for index in range(1, len(infrastructure_array_list)): current_nodata = (infrastructure_array_list[index] == infrastructure_nodata_list[index]) infrastructure_result = (infrastructure_result | ( (infrastructure_array_list[index] > 0) & ~current_nodata)) nodata_mask = (nodata_mask & current_nodata) return numpy.where(nodata_mask, infrastructure_nodata, infrastructure_result) LOGGER.info('collapse infrastructure into one raster') pygeoprocessing.geoprocessing.vectorize_datasets( infrastructure_filenames, _collapse_infrastructure_op, infrastructure_uri, gdal.GDT_Byte, infrastructure_nodata, out_pixel_size, "intersection", dataset_to_align_index=0, assert_datasets_projected=False, vectorize_op=False) #calc_msa_f primary_veg_mask_uri = os.path.join(tmp_dir, 'primary_veg_mask%s.tif' % file_suffix) primary_veg_mask_nodata = -1 def _primary_veg_mask_op(lulc_array): """masking out natural areas""" nodata_mask = lulc_array == globio_nodata result = (lulc_array == 1) return numpy.where(nodata_mask, primary_veg_mask_nodata, result) LOGGER.info("create mask of primary veg areas") pygeoprocessing.geoprocessing.vectorize_datasets( [globio_lulc_uri], _primary_veg_mask_op, primary_veg_mask_uri, gdal.GDT_Int32, primary_veg_mask_nodata, out_pixel_size, "intersection", dataset_to_align_index=0, assert_datasets_projected=False, vectorize_op=False) LOGGER.info('gaussian filter primary veg') sigma = 9.0 gaussian_kernel_uri = os.path.join(tmp_dir, 'gaussian_kernel%s.tif' % file_suffix) make_gaussian_kernel_uri(sigma, gaussian_kernel_uri) smoothed_primary_veg_mask_uri = os.path.join( tmp_dir, 'smoothed_primary_veg_mask%s.tif' % file_suffix) pygeoprocessing.geoprocessing.convolve_2d_uri( primary_veg_mask_uri, gaussian_kernel_uri, smoothed_primary_veg_mask_uri) primary_veg_smooth_uri = os.path.join( intermediate_dir, 'primary_veg_smooth%s.tif' % file_suffix) def _primary_veg_smooth_op(primary_veg_mask_array, smoothed_primary_veg_mask): """mask out ffqi only where there's an ffqi""" return numpy.where(primary_veg_mask_array != primary_veg_mask_nodata, primary_veg_mask_array * smoothed_primary_veg_mask, primary_veg_mask_nodata) LOGGER.info('calculate primary_veg_smooth') pygeoprocessing.geoprocessing.vectorize_datasets( [primary_veg_mask_uri, smoothed_primary_veg_mask_uri], _primary_veg_smooth_op, primary_veg_smooth_uri, gdal.GDT_Float32, primary_veg_mask_nodata, out_pixel_size, "intersection", dataset_to_align_index=0, assert_datasets_projected=False, vectorize_op=False) msa_nodata = -1 msa_f_table = msa_parameter_table['msa_f'] msa_f_values = sorted(msa_f_table) def _msa_f_op(primary_veg_smooth): """calcualte msa fragmentation""" nodata_mask = primary_veg_mask_nodata == primary_veg_smooth msa_f = numpy.empty(primary_veg_smooth.shape) for value in reversed(msa_f_values): #special case if it's a > or < value if value == '>': msa_f[primary_veg_smooth > msa_f_table['>'][0]] = ( msa_f_table['>'][1]) elif value == '<': continue else: msa_f[primary_veg_smooth <= value] = msa_f_table[value] if '<' in msa_f_table: msa_f[primary_veg_smooth < msa_f_table['<'][0]] = ( msa_f_table['<'][1]) msa_f[nodata_mask] = msa_nodata return msa_f LOGGER.info('calculate msa_f') msa_f_uri = os.path.join(output_dir, 'msa_f%s.tif' % file_suffix) pygeoprocessing.geoprocessing.vectorize_datasets( [primary_veg_smooth_uri], _msa_f_op, msa_f_uri, gdal.GDT_Float32, msa_nodata, out_pixel_size, "intersection", dataset_to_align_index=0, assert_datasets_projected=False, vectorize_op=False) #calc_msa_i msa_f_values = sorted(msa_f_table) msa_i_other_table = msa_parameter_table['msa_i_other'] msa_i_primary_table = msa_parameter_table['msa_i_primary'] msa_i_other_values = sorted(msa_i_other_table) msa_i_primary_values = sorted(msa_i_primary_table) def _msa_i_op(lulc_array, distance_to_infrastructure): """calculate msa infrastructure""" distance_to_infrastructure *= out_pixel_size #convert to meters msa_i_primary = numpy.empty(lulc_array.shape) msa_i_other = numpy.empty(lulc_array.shape) for value in reversed(msa_i_primary_values): #special case if it's a > or < value if value == '>': msa_i_primary[distance_to_infrastructure > msa_i_primary_table['>'][0]] = ( msa_i_primary_table['>'][1]) elif value == '<': continue else: msa_i_primary[distance_to_infrastructure <= value] = ( msa_i_primary_table[value]) if '<' in msa_i_primary_table: msa_i_primary[distance_to_infrastructure < msa_i_primary_table['<'] [0]] = (msa_i_primary_table['<'][1]) for value in reversed(msa_i_other_values): #special case if it's a > or < value if value == '>': msa_i_other[distance_to_infrastructure > msa_i_other_table['>'] [0]] = (msa_i_other_table['>'][1]) elif value == '<': continue else: msa_i_other[distance_to_infrastructure <= value] = ( msa_i_other_table[value]) if '<' in msa_i_other_table: msa_i_other[distance_to_infrastructure < msa_i_other_table['<'] [0]] = (msa_i_other_table['<'][1]) msa_i = numpy.where((lulc_array >= 1) & (lulc_array <= 5), msa_i_primary, 1.0) msa_i = numpy.where((lulc_array >= 6) & (lulc_array <= 12), msa_i_other, msa_i) return msa_i LOGGER.info('calculate msa_i') distance_to_infrastructure_uri = os.path.join( intermediate_dir, 'distance_to_infrastructure%s.tif' % file_suffix) pygeoprocessing.geoprocessing.distance_transform_edt( infrastructure_uri, distance_to_infrastructure_uri) msa_i_uri = os.path.join(output_dir, 'msa_i%s.tif' % file_suffix) pygeoprocessing.geoprocessing.vectorize_datasets( [globio_lulc_uri, distance_to_infrastructure_uri], _msa_i_op, msa_i_uri, gdal.GDT_Float32, msa_nodata, out_pixel_size, "intersection", dataset_to_align_index=0, assert_datasets_projected=False, vectorize_op=False) #calc_msa_lu msa_lu_uri = os.path.join(output_dir, 'msa_lu%s.tif' % file_suffix) LOGGER.info('calculate msa_lu') pygeoprocessing.geoprocessing.reclassify_dataset_uri( globio_lulc_uri, msa_parameter_table['msa_lu'], msa_lu_uri, gdal.GDT_Float32, globio_nodata, exception_flag='values_required') LOGGER.info('calculate msa') msa_uri = os.path.join(output_dir, 'msa%s.tif' % file_suffix) def _msa_op(msa_f, msa_lu, msa_i): """Calculate the MSA which is the product of the sub msas""" return numpy.where(msa_f != globio_nodata, msa_f * msa_lu * msa_i, globio_nodata) pygeoprocessing.geoprocessing.vectorize_datasets( [msa_f_uri, msa_lu_uri, msa_i_uri], _msa_op, msa_uri, gdal.GDT_Float32, msa_nodata, out_pixel_size, "intersection", dataset_to_align_index=0, assert_datasets_projected=False, vectorize_op=False) if 'aoi_uri' in args: #copy the aoi to an output shapefile original_datasource = ogr.Open(args['aoi_uri']) summary_aoi_uri = os.path.join(output_dir, 'aoi_summary%s.shp' % file_suffix) #If there is already an existing shapefile with the same name and path, # delete it if os.path.isfile(summary_aoi_uri): os.remove(summary_aoi_uri) #Copy the input shapefile into the designated output folder esri_driver = ogr.GetDriverByName('ESRI Shapefile') datasource_copy = esri_driver.CopyDataSource(original_datasource, summary_aoi_uri) layer = datasource_copy.GetLayer() msa_summary_field_def = ogr.FieldDefn('msa_mean', ogr.OFTReal) layer.CreateField(msa_summary_field_def) #make an identifying id per polygon that can be used for aggregation layer_defn = layer.GetLayerDefn() while True: #last 8 characters because shapefile fields are limited to 8 chars poly_id_field = str(uuid.uuid4())[-8:] if layer_defn.GetFieldIndex(poly_id_field) == -1: break layer_id_field = ogr.FieldDefn(poly_id_field, ogr.OFTInteger) layer.CreateField(layer_id_field) for poly_index, poly_feat in enumerate(layer): poly_feat.SetField(poly_id_field, poly_index) layer.SetFeature(poly_feat) layer.SyncToDisk() #aggregate by ID msa_summary = pygeoprocessing.aggregate_raster_values_uri( msa_uri, summary_aoi_uri, shapefile_field=poly_id_field) #add new column to output file for feature_id in xrange(layer.GetFeatureCount()): feature = layer.GetFeature(feature_id) key_value = feature.GetFieldAsInteger(poly_id_field) feature.SetField('msa_mean', float(msa_summary.pixel_mean[key_value])) layer.SetFeature(feature) # don't need a random poly id anymore layer.DeleteField(layer_defn.GetFieldIndex(poly_id_field))
aoi_uri=yosemite_vector, ) # Next we need to calculate the slope layer. LOGGER.info('Calculating slope') slope_raster = os.path.join(OUTPUT_DIR, 'slope.tif') pygeoprocessing.calculate_slope( dem_dataset_uri=joined_dem, slope_uri=slope_raster) # OK! Now we add it all together with a call to vectorize_datasets LOGGER.info('Finding high-elevation, steep grasslands') lulc = '/data/landcover.tif' # segfault if I do this: gdal.Open(lulc).GetRasterBand(1).GetNoDataValue() lulc_nodata = pygeoprocessing.get_nodata_from_uri(lulc) dem_nodata = pygeoprocessing.get_nodata_from_uri(joined_dem) slope_nodata = pygeoprocessing.get_nodata_from_uri(slope_raster) out_nodata = -1 def _find_grasslands(lulc_blk, dem_blk, slope_blk): # All blocks will be the same dimensions # Create a mask of invalid pixels due to nodata values valid_mask = ((lulc_blk != lulc_nodata) & (dem_blk != dem_nodata) & (slope_blk!= slope_nodata)) # grasslands are lulc code 10 matching_grasslands = ((lulc_blk[valid_mask] == 10) & (slope_blk[valid_mask] >= 45) &
def _sort_to_disk(dataset_uri, score_weight=1.0, cache_element_size=2**25): """Sorts the non-nodata pixels in the dataset on disk and returns an iterable in sorted order. Parameters: dataset_uri (string): a path to a floating point GDAL dataset score_weight (float): a number to multiply all values by, which can be used to reverse the order of the iteration if negative. cache_element_size (int): approximate number of single elements to hold in memory before flushing to disk. Due to the internal blocksize of the input raster, it is possible this cache could go over this value by that size before the cache is flushed. Returns: an iterable that produces (value * score_weight, flat_index) in decreasing sorted order by value * score_weight""" def _read_score_index_from_disk(score_file_name, index_file_name, buffer_size=4 * 10000): """Generator to yield a float/int value from the given filenames. reads a buffer of `buffer_size` big before to avoid keeping the file open between generations.""" score_buffer = '' index_buffer = '' file_offset = 0 buffer_offset = 1 # initialize to 1 to trigger the first load while True: if buffer_offset > len(score_buffer): score_file = open(score_file_name, 'rb') index_file = open(index_file_name, 'rb') score_file.seek(file_offset) index_file.seek(file_offset) score_buffer = score_file.read(buffer_size) index_buffer = index_file.read(buffer_size) score_file.close() index_file.close() file_offset += buffer_size buffer_offset = 0 packed_score = score_buffer[buffer_offset:buffer_offset + 4] packed_index = index_buffer[buffer_offset:buffer_offset + 4] buffer_offset += 4 if not packed_score: break yield (struct.unpack('f', packed_score)[0], struct.unpack('i', packed_index)[0]) def _sort_cache_to_iterator(index_cache, score_cache): """Flushes the current cache to a heap and returns it Parameters: index_cache (1d numpy.array): contains flat indexes to the score pixels `score_cache` score_cache (1d numpy.array): contains score pixels Returns: Iterable to visit scores/indexes in increasing score order.""" # sort the whole bunch to disk sort_index = score_cache.argsort() score_cache = score_cache[sort_index] index_cache = index_cache[sort_index] #Dump all the scores and indexes to disk score_file = tempfile.NamedTemporaryFile(delete=False) score_file.write(struct.pack('%sf' % score_cache.size, *score_cache)) index_file = tempfile.NamedTemporaryFile(delete=False) index_file.write(struct.pack('%si' % index_cache.size, *index_cache)) #Get the filename and register a command to delete it after the #interpreter exits score_file_name = score_file.name score_file.close() index_file_name = index_file.name index_file.close() def _remove_file(path): """Function to remove a file and handle exceptions to register in atexit.""" try: os.remove(path) except OSError: # This happens if the file didn't exist, okay because # maybe we deleted it in a method pass atexit.register(_remove_file, score_file_name) atexit.register(_remove_file, index_file_name) return _read_score_index_from_disk(score_file_name, index_file_name) nodata = pygeoprocessing.get_nodata_from_uri(dataset_uri) nodata *= score_weight # scale the nodata so they can be filtered out # This will be a list of file iterators we'll pass to heap.merge iters = [] _, n_cols = pygeoprocessing.get_row_col_from_uri(dataset_uri) index_cache = numpy.empty((0, ), dtype=numpy.float32) score_cache = numpy.empty((0, ), dtype=numpy.int32) for scores_data, scores_block in pygeoprocessing.iterblocks(dataset_uri): # flatten and scale the results scores_block = scores_block.flatten() * score_weight col_coords, row_coords = numpy.meshgrid( xrange(scores_data['xoff'], scores_data['xoff'] + scores_data['win_xsize']), xrange(scores_data['yoff'], scores_data['yoff'] + scores_data['win_ysize'])) flat_indexes = (col_coords + row_coords * n_cols).flatten() sort_index = scores_block.argsort() sorted_scores = scores_block[sort_index] sorted_indexes = flat_indexes[sort_index] # search for nodata values are so we can splice them out left_index = numpy.searchsorted(sorted_scores, nodata, side='left') right_index = numpy.searchsorted(sorted_scores, nodata, side='right') # remove nodata values and sort in decreasing order score_cache = numpy.concatenate( (score_cache, sorted_scores[0:left_index], sorted_scores[right_index::])) index_cache = numpy.concatenate( (index_cache, sorted_indexes[0:left_index], sorted_indexes[right_index::])) # check if we need to flush the cache if index_cache.size >= cache_element_size: iters.append(_sort_cache_to_iterator(index_cache, score_cache)) index_cache = numpy.empty((0, ), dtype=numpy.float32) score_cache = numpy.empty((0, ), dtype=numpy.int32) iters.append(_sort_cache_to_iterator(index_cache, score_cache)) return heapq.merge(*iters)
def execute(args): """InVEST Carbon Edge Model calculates the carbon due to edge effects in forest pixels. Parameters: args['workspace_dir'] (string): a uri to the directory that will write output and other temporary files during calculation. (required) args['results_suffix'] (string): a string to append to any output file name (optional) args['n_nearest_model_points'] (int): number of nearest neighbor model points to search for args['aoi_uri'] (string): (optional) if present, a path to a shapefile that will be used to aggregate carbon stock results at the end of the run. args['biophysical_table_uri'] (string): a path to a CSV table that has at least a header for an 'lucode', 'is_forest', and 'c_above'. 'lucode': an integer that corresponds to landcover codes in the raster args['lulc_uri'] 'is_forest': either 0 or 1 indicating whether the landcover type is forest (1) or not (0). If 1, the value in c_above is ignored and instead calculated from the edge regression model. 'c_above': floating point number indicating tons of carbon per hectare for that landcover type Example: lucode, is_forest, c_above 0,0,32.8 1,1,n/a 2,1,n/a 16,0,28.1 Note the "n/a" are optional since that field is ignored when is_forest==1. args['lulc_uri'] (string): path to a integer landcover code raster args['forest_edge_carbon_model_shape_uri'] (string): path to a shapefile that defines the regions for the local carbon edge models. Has at least the fields 'method', 'theta1', 'theta2', 'theta3'. Where 'method' is an int between 1..3 describing the biomass regression model, and the thetas are floating point numbers that have different meanings depending on the 'method' parameter. Specifically, method 1 asymptotic model: biomass = theta1 - theta2 * exp(-theta3 * edge_dist_km) method 2 logarithmic model: biomass = theta1 + theta2 * numpy.log(edge_dist_km) (theta3 is ignored for this method) method 3 linear regression: biomass = theta1 + theta2 * edge_dist_km args['biomass_to_carbon_conversion_factor'] (string/float): Number by which to multiply forest biomass to convert to carbon in the edge effect calculation. Returns: None""" output_dir = args['workspace_dir'] intermediate_dir = os.path.join(args['workspace_dir'], 'intermediate_outputs') pygeoprocessing.create_directories([output_dir, intermediate_dir]) try: file_suffix = args['results_suffix'] if file_suffix != "" and not file_suffix.startswith('_'): file_suffix = '_' + file_suffix except KeyError: file_suffix = '' # used to keep track of files generated by this module output_file_registry = { 'non_forest_carbon_stocks': os.path.join(intermediate_dir, 'non_forest_carbon_stocks%s.tif' % file_suffix), 'edge_distance': os.path.join(intermediate_dir, 'edge_distance%s.tif' % file_suffix), 'forest_edge_carbon_map': os.path.join(intermediate_dir, 'forest_edge_carbon_stocks%s.tif' % file_suffix), 'carbon_map': os.path.join(output_dir, 'carbon_map%s.tif' % file_suffix), 'aoi_datasource': os.path.join(output_dir, 'aggregated_carbon_stocks.shp') } # Map non-forest landcover codes to carbon biomasses LOGGER.info('calculating non-forest carbon') _calculate_lulc_carbon_map( args['lulc_uri'], args['biophysical_table_uri'], output_file_registry['non_forest_carbon_stocks']) # generate a map of pixel distance to forest edge from the landcover map LOGGER.info('calculating distance from forest edge') _map_distance_from_forest_edge(args['lulc_uri'], args['biophysical_table_uri'], output_file_registry['edge_distance']) # Build spatial index for gridded global model for closest 3 points LOGGER.info('Building spatial index for forest edge models.') kd_tree, theta_model_parameters, method_model_parameter = ( _build_spatial_index(args['lulc_uri'], intermediate_dir, args['forest_edge_carbon_model_shape_uri'])) # calculate the edge carbon effect on forests LOGGER.info('calculating forest edge carbon') _calculate_forest_edge_carbon_map( output_file_registry['edge_distance'], kd_tree, theta_model_parameters, method_model_parameter, int(args['n_nearest_model_points']), float(args['biomass_to_carbon_conversion_factor']), output_file_registry['forest_edge_carbon_map']) # combine maps into output LOGGER.info('combining forest and non forest carbon into single raster') cell_size_in_meters = pygeoprocessing.get_cell_size_from_uri( args['lulc_uri']) carbon_edge_nodata = pygeoprocessing.get_nodata_from_uri( output_file_registry['forest_edge_carbon_map']) def combine_carbon_maps(non_forest_carbon, forest_carbon): """This combines the forest and non forest maps into one""" return numpy.where(forest_carbon == carbon_edge_nodata, non_forest_carbon, forest_carbon) pygeoprocessing.vectorize_datasets([ output_file_registry['non_forest_carbon_stocks'], output_file_registry['forest_edge_carbon_map'] ], combine_carbon_maps, output_file_registry['carbon_map'], gdal.GDT_Float32, carbon_edge_nodata, cell_size_in_meters, 'intersection', vectorize_op=False, datasets_are_pre_aligned=True) # generate report (optional) by aoi if they exist if 'aoi_uri' in args: LOGGER.info('aggregating carbon map by aoi') _aggregate_carbon_map(args['aoi_uri'], output_file_registry['carbon_map'], output_file_registry['aoi_datasource'])