def burn_dem( dem_raster_path, streams_raster_path, target_burned_dem_path, burn_depth=10): """Burn streams into dem.""" dem_raster_info = pygeoprocessing.get_raster_info(dem_raster_path) dem_nodata = dem_raster_info['nodata'][0] pygeoprocessing.new_raster_from_base( dem_raster_path, target_burned_dem_path, dem_raster_info['datatype'], [dem_nodata]) burned_dem_raster = gdal.OpenEx( target_burned_dem_path, gdal.OF_RASTER | gdal.OF_UPDATE) burned_dem_band = burned_dem_raster.GetRasterBand(1) stream_raster = gdal.OpenEx(streams_raster_path, gdal.OF_RASTER) stream_band = stream_raster.GetRasterBand(1) for offset_dict, dem_block in pygeoprocessing.iterblocks( (dem_raster_path, 1)): stream_block = stream_band.ReadAsArray(**offset_dict) stream_mask = ( (stream_block == 1) & ~numpy.isclose(dem_block, dem_nodata)) filled_block = numpy.copy(dem_block) filled_block[stream_mask] = filled_block[stream_mask]-burn_depth burned_dem_band.WriteArray( filled_block, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) stream_band = None stream_raster = None burned_dem_band = None burned_dem_raster = None
def _create_outlet_raster(outlet_vector_path, base_raster_path, target_outlet_raster_path): """Create a raster that has 1s where outlet exists and 0 everywhere else. Args: outlet_vector_path (str): path to input vector that has 'i', 'j' fields indicating which pixels are outlets base_raster_path (str): path to base raster used to create outlet raster shape/projection. target_outlet_raster_path (str): created by this call, contains 0s except where pixels intersect with an outlet. Return: None. """ pygeoprocessing.new_raster_from_base(base_raster_path, target_outlet_raster_path, gdal.GDT_Byte, [0]) outlet_raster = gdal.OpenEx(target_outlet_raster_path, gdal.OF_RASTER | gdal.GA_Update) outlet_band = outlet_raster.GetRasterBand(1) outlet_vector = gdal.OpenEx(outlet_vector_path, gdal.OF_VECTOR) outlet_layer = outlet_vector.GetLayer() one_array = numpy.ones((1, 1), dtype=numpy.int8) for outlet_feature in outlet_layer: outlet_band.WriteArray(one_array, outlet_feature.GetField('i'), outlet_feature.GetField('j')) outlet_band = None outlet_raster = None
def make_buffered_point_raster_mask(shore_sample_point_vector_path, template_raster_path, workspace_dir, habitat_id, protective_distance, target_buffer_raster_path): gpkg_driver = ogr.GetDriverByName('GPKG') buffer_habitat_path = os.path.join(workspace_dir, '%s_buffer.gpkg' % habitat_id) buffer_habitat_vector = gpkg_driver.CreateDataSource(buffer_habitat_path) wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) buffer_habitat_layer = (buffer_habitat_vector.CreateLayer( habitat_id, wgs84_srs, ogr.wkbPolygon)) buffer_habitat_layer_defn = buffer_habitat_layer.GetLayerDefn() shore_sample_point_vector = gdal.OpenEx(shore_sample_point_vector_path, gdal.OF_VECTOR) shore_sample_point_layer = shore_sample_point_vector.GetLayer() buffer_habitat_layer.StartTransaction() for point_index, point_feature in enumerate(shore_sample_point_layer): if point_index % 1000 == 0: LOGGER.debug( 'point buffering is %.2f%% complete', point_index / shore_sample_point_layer.GetFeatureCount() * 100.0) # for each point, convert to local UTM to buffer out a given # distance then back to wgs84 point_geom = point_feature.GetGeometryRef() utm_srs = calculate_utm_srs(point_geom.GetX(), point_geom.GetY()) wgs84_to_utm_transform = osr.CoordinateTransformation( wgs84_srs, utm_srs) utm_to_wgs84_transform = osr.CoordinateTransformation( utm_srs, wgs84_srs) point_geom.Transform(wgs84_to_utm_transform) buffer_poly_geom = point_geom.Buffer(protective_distance) buffer_poly_geom.Transform(utm_to_wgs84_transform) buffer_point_feature = ogr.Feature(buffer_habitat_layer_defn) buffer_point_feature.SetGeometry(buffer_poly_geom) buffer_habitat_layer.CreateFeature(buffer_point_feature) buffer_point_feature = None point_feature = None buffer_poly_geom = None point_geom = None # at this point every shore point has been buffered to the effective # habitat distance and the habitat service has been saved with it buffer_habitat_layer.CommitTransaction() buffer_habitat_layer = None buffer_habitat_vector = None pygeoprocessing.new_raster_from_base( template_raster_path, target_buffer_raster_path, gdal.GDT_Float32, [0], raster_driver_creation_tuple=('GTIFF', ('TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256', 'SPARSE_OK=TRUE'))) pygeoprocessing.rasterize(buffer_habitat_path, target_buffer_raster_path, burn_values=[1])
def make_mask(base_raster_path, mask_vector_path, target_raster_path): """Mask vector onto target using base as the reference.""" pygeoprocessing.new_raster_from_base(base_raster_path, target_raster_path, gdal.GDT_Byte, [0], fill_value_list=[0]) pygeoprocessing.rasterize(mask_vector_path, target_raster_path, burn_values=[1])
def rasterize_carbon_zones( base_raster_path, carbon_vector_path, rasterized_zones_path): """Rasterize carbon zones, expect 'CODE' as the parameter in the vector.""" pygeoprocessing.new_raster_from_base( base_raster_path, rasterized_zones_path, gdal.GDT_Int32, [-1]) pygeoprocessing.rasterize( carbon_vector_path, rasterized_zones_path, option_list=['ATTRIBUTE=CODE'])
def model_predict(model, lulc_raster_path, forest_mask_raster_path, aligned_predictor_list, predicted_biomass_raster_path): """Predict biomass given predictors.""" pygeoprocessing.new_raster_from_base(lulc_raster_path, predicted_biomass_raster_path, gdal.GDT_Float32, [-1]) predicted_biomass_raster = gdal.OpenEx(predicted_biomass_raster_path, gdal.OF_RASTER | gdal.GA_Update) predicted_biomass_band = predicted_biomass_raster.GetRasterBand(1) predictor_band_nodata_list = [] raster_list = [] # simple lookup to map predictor band/nodata to a list for predictor_path, nodata in aligned_predictor_list: predictor_raster = gdal.OpenEx(predictor_path, gdal.OF_RASTER) raster_list.append(predictor_raster) predictor_band = predictor_raster.GetRasterBand(1) if nodata is None: nodata = predictor_band.GetNoDataValue() predictor_band_nodata_list.append((predictor_band, nodata)) forest_raster = gdal.OpenEx(forest_mask_raster_path, gdal.OF_RASTER) forest_band = forest_raster.GetRasterBand(1) for offset_dict in pygeoprocessing.iterblocks((lulc_raster_path, 1), offset_only=True): forest_array = forest_band.ReadAsArray(**offset_dict) valid_mask = (forest_array == 1) x_vector = None array_list = [] for band, nodata in predictor_band_nodata_list: array = band.ReadAsArray(**offset_dict) if nodata is None: nodata = band.GetNoDataValue() if nodata is not None: valid_mask &= array != nodata array_list.append(array) if not numpy.any(valid_mask): continue for array in array_list: if x_vector is None: x_vector = array[valid_mask].astype(numpy.float32) x_vector = numpy.reshape(x_vector, (-1, x_vector.size)) else: valid_array = array[valid_mask].astype(numpy.float32) valid_array = numpy.reshape(valid_array, (-1, valid_array.size)) x_vector = numpy.append(x_vector, valid_array, axis=0) y_vector = model(torch.from_numpy(x_vector.T)) result = numpy.full(forest_array.shape, -1) result[valid_mask] = (y_vector.detach().numpy()).flatten() predicted_biomass_band.WriteArray(result, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) predicted_biomass_band = None predicted_biomass_raster = None
def rasterize_streams( base_raster_path, stream_vector_path, target_streams_raster_path): """Rasterize streams.""" pygeoprocessing.new_raster_from_base( base_raster_path, target_streams_raster_path, gdal.GDT_Byte, [2], fill_value_list=[2]) LOGGER.debug(stream_vector_path) pygeoprocessing.rasterize( stream_vector_path, target_streams_raster_path, burn_values=[1])
def _calculate_bar_factor( dem_path, factor_path, flow_accumulation_path, flow_direction_path, zero_absorption_source_path, loss_path, accumulation_path, out_bar_path): """Route user defined source across DEM. Used for calcualting S and W bar in the SDR operation. Parameters: dem_path (string): path to DEM raster factor_path (string): path to arbitrary factor raster flow_accumulation_path (string): path to flow accumulation raster flow_direction_path (string): path to flow direction path (in radians) zero_absorption_source_path (string): path to a raster that is all 0s and same size as `dem_path`. Temporary file. loss_path (string): path to a raster that can save the loss raster from routing. Temporary file. accumulation_path (string): path to a raster that can be used to save the accumulation of the factor. Temporary file. out_bar_path (string): path to output raster that is the result of the factor accumulation raster divided by the flow accumulation raster. Returns: None. """ pygeoprocessing.new_raster_from_base( dem_path, zero_absorption_source_path, gdal.GDT_Float32, [_TARGET_NODATA], fill_value_list=[0.0]) flow_accumulation_nodata = pygeoprocessing.get_raster_info( flow_accumulation_path)['nodata'][0] natcap.invest.pygeoprocessing_0_3_3.routing.route_flux( flow_direction_path, dem_path, factor_path, zero_absorption_source_path, loss_path, accumulation_path, 'flux_only') def bar_op(base_accumulation, flow_accumulation): """Aggregate accumulation from base divided by the flow accum.""" result = numpy.empty(base_accumulation.shape) valid_mask = ( (base_accumulation != _TARGET_NODATA) & (flow_accumulation != flow_accumulation_nodata)) result[:] = _TARGET_NODATA result[valid_mask] = ( base_accumulation[valid_mask] / flow_accumulation[valid_mask]) return result pygeoprocessing.raster_calculator( [(accumulation_path, 1), (flow_accumulation_path, 1)], bar_op, out_bar_path, gdal.GDT_Float32, _TARGET_NODATA)
def test_regression_with_undefined_nodata(self): """SDR base regression test with undefined nodata values. Execute SDR with sample data with all rasters having undefined nodata values. """ from natcap.invest.sdr import sdr # use predefined directory so test can clean up files during teardown args = SDRTests.generate_base_args(self.workspace_dir) # args_copy = args.copy() # args_copy['workspace_dir'] = 'sdr_test_workspace' # set all input rasters to have undefined nodata values tmp_dir = os.path.join(args['workspace_dir'], 'nodata_raster_dir') os.makedirs(tmp_dir) for path_key in ['erodibility_path', 'erosivity_path', 'lulc_path']: target_path = os.path.join(tmp_dir, os.path.basename(args[path_key])) datatype = pygeoprocessing.get_raster_info( args[path_key])['datatype'] pygeoprocessing.new_raster_from_base(args[path_key], target_path, datatype, [None]) base_raster = gdal.OpenEx(args[path_key], gdal.OF_RASTER) base_band = base_raster.GetRasterBand(1) base_array = base_band.ReadAsArray() base_band = None base_raster = None target_raster = gdal.OpenEx(target_path, gdal.OF_RASTER | gdal.GA_Update) target_band = target_raster.GetRasterBand(1) target_band.WriteArray(base_array) target_band = None target_raster = None args[path_key] = target_path sdr.execute(args) expected_results = { 'sed_retent': 443994.1875, 'sed_export': 0.87300693989, 'usle_tot': 14.25030517578, } vector_path = os.path.join(args['workspace_dir'], 'watershed_results_sdr.shp') # make args explicit that this is a base run of SWY assert_expected_results_in_vector(expected_results, vector_path)
def new_raster_from_base(base_raster, target_base_id, target_dir, target_datatype, target_nodata): """Create a new raster from base given the base id. This function is to make the function signature look different for each run. Parameters: base_raster, target_base_id, target_dir, target_datatype, target_nodata are the same as pygeoprocessing.new_raster_from_base. Returns: None. """ target_raster_path = os.path.join(target_dir, '%s.tif' % target_base_id) LOGGER.debug('making new raster %s', target_raster_path) pygeoprocessing.new_raster_from_base(base_raster, target_raster_path, target_datatype, [target_nodata]) LOGGER.debug('done making new raster %s', target_raster_path)
def _greedy_select_pixels_to_area( base_value_raster_path, workspace_dir, area_ha_to_step_report_list): """Greedy select pixels in base with a report every area steps. workspace_dir will contain a set of mask rasters with filenames of the form {area_selected}_mask_{base_id}.tif and a csv table with the filename {base_id}_{target_area_ha}_report.csv containing columns (area slected), (sum of value selected), (path to raster mask). Args: base_value_raster_path (str): path to raster with value pixels, preferably positive. workspace_dir (str): path to directory to write output files into. area_ha_to_step_report (list): list of areas in Ha to record. Returns: A tuple containing (path_to_taret_area_mask_raster, maximum area selected), where the raster is the largest amount selected and the value is the area that is selected, will either be very close to target_area_ha or the maximum available area. """ raster_id = _raw_basename(base_value_raster_path) all_ones_raster_path = os.path.join( workspace_dir, f'all_ones_{raster_id}.tif') pixel_area_in_ha_raster_path = os.path.join( workspace_dir, f'pixel_area_in_ha_{raster_id}.tif') pygeoprocessing.new_raster_from_base( base_value_raster_path, all_ones_raster_path, gdal.GDT_Byte, [None], fill_value_list=[1]) density_per_ha_to_total_per_pixel( all_ones_raster_path, 1.0, pixel_area_in_ha_raster_path) LOGGER.info( f'calculating greedy pixels for value raster {base_value_raster_path} ' f'and area {pixel_area_in_ha_raster_path}') pygeoprocessing.greedy_pixel_pick_by_area( (base_value_raster_path, 1), (pixel_area_in_ha_raster_path, 1), workspace_dir, area_ha_to_step_report_list)
def _replace_value_by_mask( base_raster_path, replacement_value, replacement_mask_raster_path, target_replacement_raster_path): """Overwrite values in raster based on mask. Args: base_raster_path (str): base raster to modify replacement_value (numeric): value to write into base raster where the mask indicates. replacement_mask_raster_path (str): path to raster indicating (1) where a pixel should be replaced in base. target_replacement_raster_path (str): path to a target replacement raster. Returns: None """ base_info = pygeoprocessing.get_raster_info(base_raster_path) pygeoprocessing.new_raster_from_base( base_raster_path, target_replacement_raster_path, base_info['datatype'], base_info['nodata']) target_raster = gdal.OpenEx( target_replacement_raster_path, gdal.OF_RASTER | gdal.GA_Update) target_band = target_raster.GetRasterBand(1) mask_raster = gdal.OpenEx( replacement_mask_raster_path, gdal.OF_RASTER | gdal.GA_Update) mask_band = mask_raster.GetRasterBand(1) for offset_dict, base_block in pygeoprocessing.iterblocks( (base_raster_path, 1)): mask_block = mask_band.ReadAsArray(**offset_dict) base_block[mask_block == 1] = replacement_value target_band.WriteArray( base_block, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) target_band = None target_raster = None
def execute(args): """Habitat Quality. Open files necessary for the portion of the habitat_quality model. Args: workspace_dir (string): a path to the directory that will write output and other temporary files (required) lulc_cur_path (string): a path to an input land use/land cover raster (required) lulc_fut_path (string): a path to an input land use/land cover raster (optional) lulc_bas_path (string): a path to an input land use/land cover raster (optional, but required for rarity calculations) threat_folder (string): a path to the directory that will contain all threat rasters (required) threats_table_path (string): a path to an input CSV containing data of all the considered threats. Each row is a degradation source and each column a different attribute of the source with the following names: 'THREAT','MAX_DIST','WEIGHT' (required). access_vector_path (string): a path to an input polygon shapefile containing data on the relative protection against threats (optional) sensitivity_table_path (string): a path to an input CSV file of LULC types, whether they are considered habitat, and their sensitivity to each threat (required) half_saturation_constant (float): a python float that determines the spread and central tendency of habitat quality scores (required) suffix (string): a python string that will be inserted into all raster path paths just before the file extension. Example Args Dictionary:: { 'workspace_dir': 'path/to/workspace_dir', 'lulc_cur_path': 'path/to/lulc_cur_raster', 'lulc_fut_path': 'path/to/lulc_fut_raster', 'lulc_bas_path': 'path/to/lulc_bas_raster', 'threat_raster_folder': 'path/to/threat_rasters/', 'threats_table_path': 'path/to/threats_csv', 'access_vector_path': 'path/to/access_shapefile', 'sensitivity_table_path': 'path/to/sensitivity_csv', 'half_saturation_constant': 0.5, 'suffix': '_results', } Returns: None """ workspace = args['workspace_dir'] # Append a _ to the suffix if it's not empty and doesn't already have one suffix = utils.make_suffix_string(args, 'suffix') # Check to see if each of the workspace folders exists. If not, create the # folder in the filesystem. inter_dir = os.path.join(workspace, 'intermediate') out_dir = os.path.join(workspace, 'output') kernel_dir = os.path.join(inter_dir, 'kernels') utils.make_directories([inter_dir, out_dir, kernel_dir]) # get a handle on the folder with the threat rasters threat_raster_dir = args['threat_raster_folder'] threat_dict = utils.build_lookup_from_csv(args['threats_table_path'], 'THREAT', to_lower=False) sensitivity_dict = utils.build_lookup_from_csv( args['sensitivity_table_path'], 'LULC', to_lower=False) # check that the required headers exist in the sensitivity table. # Raise exception if they don't. sens_header_list = sensitivity_dict.items()[0][1].keys() required_sens_header_list = ['LULC', 'NAME', 'HABITAT'] missing_sens_header_list = [ h for h in required_sens_header_list if h not in sens_header_list ] if missing_sens_header_list: raise ValueError('Column(s) %s are missing in the sensitivity table' % (', '.join(missing_sens_header_list))) # check that the threat names in the threats table match with the threats # columns in the sensitivity table. Raise exception if they don't. for threat in threat_dict: if 'L_' + threat not in sens_header_list: missing_threat_header_list = (set(sens_header_list) - set(required_sens_header_list)) raise ValueError( 'Threat "%s" does not match any column in the sensitivity ' 'table. Possible columns: %s' % (threat, missing_threat_header_list)) # get the half saturation constant try: half_saturation = float(args['half_saturation_constant']) except ValueError: raise ValueError('Half-saturation constant is not a numeric number.' 'It is: %s' % args['half_saturation_constant']) # declare dictionaries to store the land cover and the threat rasters # pertaining to the different threats lulc_path_dict = {} threat_path_dict = {} # also store land cover and threat rasters in a list lulc_and_threat_raster_list = [] aligned_raster_list = [] # declare a set to store unique codes from lulc rasters raster_unique_lucodes = set() # compile all the threat rasters associated with the land cover for lulc_key, lulc_args in (('_c', 'lulc_cur_path'), ('_f', 'lulc_fut_path'), ('_b', 'lulc_bas_path')): if lulc_args in args: lulc_path = args[lulc_args] lulc_path_dict[lulc_key] = lulc_path # save land cover paths in a list for alignment and resize lulc_and_threat_raster_list.append(lulc_path) aligned_raster_list.append( os.path.join( inter_dir, os.path.basename(lulc_path).replace( '.tif', '_aligned.tif'))) # save unique codes to check if it's missing in sensitivity table for _, lulc_block in pygeoprocessing.iterblocks((lulc_path, 1)): raster_unique_lucodes.update(numpy.unique(lulc_block)) # Remove the nodata value from the set of landuser codes. nodata = pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] try: raster_unique_lucodes.remove(nodata) except KeyError: # KeyError when the nodata value was not encountered in the # raster's pixel values. Same result when nodata value is # None. pass # add a key to the threat dictionary that associates all threat # rasters with this land cover threat_path_dict['threat' + lulc_key] = {} # for each threat given in the CSV file try opening the associated # raster which should be found in threat_raster_folder for threat in threat_dict: # it's okay to have no threat raster for baseline scenario threat_path_dict['threat' + lulc_key][threat] = ( resolve_ambiguous_raster_path( os.path.join(threat_raster_dir, threat + lulc_key), raise_error=(lulc_key != '_b'))) # save threat paths in a list for alignment and resize threat_path = threat_path_dict['threat' + lulc_key][threat] if threat_path: lulc_and_threat_raster_list.append(threat_path) aligned_raster_list.append( os.path.join( inter_dir, os.path.basename(lulc_path).replace( '.tif', '_aligned.tif'))) # check if there's any lucode from the LULC rasters missing in the # sensitivity table table_unique_lucodes = set(sensitivity_dict.keys()) missing_lucodes = raster_unique_lucodes.difference(table_unique_lucodes) if missing_lucodes: raise ValueError( 'The following land cover codes were found in your landcover rasters ' 'but not in your sensitivity table. Check your sensitivity table ' 'to see if they are missing: %s. \n\n' % ', '.join([str(x) for x in sorted(missing_lucodes)])) # Align and resize all the land cover and threat rasters, # and tore them in the intermediate folder LOGGER.info('Starting aligning and resizing land cover and threat rasters') lulc_pixel_size = (pygeoprocessing.get_raster_info( args['lulc_cur_path']))['pixel_size'] aligned_raster_list = [ os.path.join(inter_dir, os.path.basename(path).replace('.tif', '_aligned.tif')) for path in lulc_and_threat_raster_list ] pygeoprocessing.align_and_resize_raster_stack( lulc_and_threat_raster_list, aligned_raster_list, ['near'] * len(lulc_and_threat_raster_list), lulc_pixel_size, 'intersection') LOGGER.info('Finished aligning and resizing land cover and threat rasters') # Modify paths in lulc_path_dict and threat_path_dict to be aligned rasters for lulc_key, lulc_path in lulc_path_dict.iteritems(): lulc_path_dict[lulc_key] = os.path.join( inter_dir, os.path.basename(lulc_path).replace('.tif', '_aligned.tif')) for threat in threat_dict: threat_path = threat_path_dict['threat' + lulc_key][threat] if threat_path in lulc_and_threat_raster_list: threat_path_dict['threat' + lulc_key][threat] = os.path.join( inter_dir, os.path.basename(threat_path).replace( '.tif', '_aligned.tif')) LOGGER.info('Starting habitat_quality biophysical calculations') # Rasterize access vector, if value is null set to 1 (fully accessible), # else set to the value according to the ACCESS attribute cur_lulc_path = lulc_path_dict['_c'] fill_value = 1.0 try: LOGGER.info('Handling Access Shape') access_raster_path = os.path.join(inter_dir, 'access_layer%s.tif' % suffix) # create a new raster based on the raster info of current land cover pygeoprocessing.new_raster_from_base(cur_lulc_path, access_raster_path, gdal.GDT_Float32, [_OUT_NODATA], fill_value_list=[fill_value]) pygeoprocessing.rasterize(args['access_vector_path'], access_raster_path, burn_values=None, option_list=['ATTRIBUTE=ACCESS']) except KeyError: LOGGER.info('No Access Shape Provided, access raster filled with 1s.') # calculate the weight sum which is the sum of all the threats' weights weight_sum = 0.0 for threat_data in threat_dict.itervalues(): # Sum weight of threats weight_sum = weight_sum + threat_data['WEIGHT'] LOGGER.debug('lulc_path_dict : %s', lulc_path_dict) # for each land cover raster provided compute habitat quality for lulc_key, lulc_path in lulc_path_dict.iteritems(): LOGGER.info('Calculating habitat quality for landuse: %s', lulc_path) # Create raster of habitat based on habitat field habitat_raster_path = os.path.join( inter_dir, 'habitat%s%s.tif' % (lulc_key, suffix)) map_raster_to_dict_values(lulc_path, habitat_raster_path, sensitivity_dict, 'HABITAT', _OUT_NODATA, values_required=False) # initialize a list that will store all the threat/threat rasters # after they have been adjusted for distance, weight, and access deg_raster_list = [] # a list to keep track of the normalized weight for each threat weight_list = numpy.array([]) # variable to indicate whether we should break out of calculations # for a land cover because a threat raster was not found exit_landcover = False # adjust each threat/threat raster for distance, weight, and access for threat, threat_data in threat_dict.iteritems(): LOGGER.info('Calculating threat: %s.\nThreat data: %s' % (threat, threat_data)) # get the threat raster for the specific threat threat_raster_path = threat_path_dict['threat' + lulc_key][threat] LOGGER.info('threat_raster_path %s', threat_raster_path) if threat_raster_path is None: LOGGER.info( 'The threat raster for %s could not be found for the land ' 'cover %s. Skipping Habitat Quality calculation for this ' 'land cover.' % (threat, lulc_key)) exit_landcover = True break # need the pixel size for the threat raster so we can create # an appropriate kernel for convolution threat_pixel_size = pygeoprocessing.get_raster_info( threat_raster_path)['pixel_size'] # pixel size tuple could have negative value mean_threat_pixel_size = (abs(threat_pixel_size[0]) + abs(threat_pixel_size[1])) / 2.0 # convert max distance (given in KM) to meters max_dist_m = threat_data['MAX_DIST'] * 1000.0 # convert max distance from meters to the number of pixels that # represents on the raster max_dist_pixel = max_dist_m / mean_threat_pixel_size LOGGER.debug('Max distance in pixels: %f', max_dist_pixel) # blur the threat raster based on the effect of the threat over # distance decay_type = threat_data['DECAY'] kernel_path = os.path.join( kernel_dir, 'kernel_%s%s%s.tif' % (threat, lulc_key, suffix)) if decay_type == 'linear': make_linear_decay_kernel_path(max_dist_pixel, kernel_path) elif decay_type == 'exponential': utils.exponential_decay_kernel_raster(max_dist_pixel, kernel_path) else: raise ValueError( "Unknown type of decay in biophysical table, should be " "either 'linear' or 'exponential'. Input was %s for threat" " %s." % (decay_type, threat)) filtered_threat_raster_path = os.path.join( inter_dir, 'filtered_%s%s%s.tif' % (threat, lulc_key, suffix)) pygeoprocessing.convolve_2d((threat_raster_path, 1), (kernel_path, 1), filtered_threat_raster_path) # create sensitivity raster based on threat sens_raster_path = os.path.join( inter_dir, 'sens_%s%s%s.tif' % (threat, lulc_key, suffix)) map_raster_to_dict_values(lulc_path, sens_raster_path, sensitivity_dict, 'L_' + threat, _OUT_NODATA, values_required=True) # get the normalized weight for each threat weight_avg = threat_data['WEIGHT'] / weight_sum # add the threat raster adjusted by distance and the raster # representing sensitivity to the list to be past to # vectorized_rasters below deg_raster_list.append(filtered_threat_raster_path) deg_raster_list.append(sens_raster_path) # store the normalized weight for each threat in a list that # will be used below in total_degradation weight_list = numpy.append(weight_list, weight_avg) # check to see if we got here because a threat raster was missing # and if so then we want to skip to the next landcover if exit_landcover: continue def total_degradation(*raster): """A vectorized function that computes the degradation value for each pixel based on each threat and then sums them together *rasters - a list of floats depicting the adjusted threat value per pixel based on distance and sensitivity. The values are in pairs so that the values for each threat can be tracked: [filtered_val_threat1, sens_val_threat1, filtered_val_threat2, sens_val_threat2, ...] There is an optional last value in the list which is the access_raster value, but it is only present if access_raster is not None. returns - the total degradation score for the pixel""" # we can not be certain how many threats the user will enter, # so we handle each filtered threat and sensitivity raster # in pairs sum_degradation = numpy.zeros(raster[0].shape) for index in range(len(raster) / 2): step = index * 2 sum_degradation += (raster[step] * raster[step + 1] * weight_list[index]) nodata_mask = numpy.empty(raster[0].shape, dtype=numpy.int8) nodata_mask[:] = 0 for array in raster: nodata_mask = nodata_mask | (array == _OUT_NODATA) # the last element in raster is access return numpy.where(nodata_mask, _OUT_NODATA, sum_degradation * raster[-1]) # add the access_raster onto the end of the collected raster list. The # access_raster will be values from the shapefile if provided or a # raster filled with all 1's if not deg_raster_list.append(access_raster_path) deg_sum_raster_path = os.path.join( out_dir, 'deg_sum' + lulc_key + suffix + '.tif') LOGGER.info('Starting raster calculation on total_degradation') deg_raster_band_list = [(path, 1) for path in deg_raster_list] pygeoprocessing.raster_calculator(deg_raster_band_list, total_degradation, deg_sum_raster_path, gdal.GDT_Float32, _OUT_NODATA) LOGGER.info('Finished raster calculation on total_degradation') # Compute habitat quality # ksq: a term used below to compute habitat quality ksq = half_saturation**_SCALING_PARAM def quality_op(degradation, habitat): """Vectorized function that computes habitat quality given a degradation and habitat value. degradation - a float from the created degradation raster above. habitat - a float indicating habitat suitability from from the habitat raster created above. returns - a float representing the habitat quality score for a pixel """ degredataion_clamped = numpy.where(degradation < 0, 0, degradation) return numpy.where( (degradation == _OUT_NODATA) | (habitat == _OUT_NODATA), _OUT_NODATA, (habitat * (1.0 - ((degredataion_clamped**_SCALING_PARAM) / (degredataion_clamped**_SCALING_PARAM + ksq))))) quality_path = os.path.join(out_dir, 'quality' + lulc_key + suffix + '.tif') LOGGER.info('Starting raster calculation on quality_op') deg_hab_raster_list = [deg_sum_raster_path, habitat_raster_path] deg_hab_raster_band_list = [(path, 1) for path in deg_hab_raster_list] pygeoprocessing.raster_calculator(deg_hab_raster_band_list, quality_op, quality_path, gdal.GDT_Float32, _OUT_NODATA) LOGGER.info('Finished raster calculation on quality_op') # Compute Rarity if user supplied baseline raster if '_b' not in lulc_path_dict: LOGGER.info('Baseline not provided to compute Rarity') else: lulc_base_path = lulc_path_dict['_b'] # get the area of a base pixel to use for computing rarity where the # pixel sizes are different between base and cur/fut rasters base_pixel_size = pygeoprocessing.get_raster_info( lulc_base_path)['pixel_size'] base_area = float(abs(base_pixel_size[0]) * abs(base_pixel_size[1])) base_nodata = pygeoprocessing.get_raster_info( lulc_base_path)['nodata'][0] lulc_code_count_b = raster_pixel_count(lulc_base_path) # compute rarity for current landscape and future (if provided) for lulc_key in ['_c', '_f']: if lulc_key not in lulc_path_dict: continue lulc_path = lulc_path_dict[lulc_key] lulc_time = 'current' if lulc_key == '_c' else 'future' # get the area of a cur/fut pixel lulc_pixel_size = pygeoprocessing.get_raster_info( lulc_path)['pixel_size'] lulc_area = float( abs(lulc_pixel_size[0]) * abs(lulc_pixel_size[1])) lulc_nodata = pygeoprocessing.get_raster_info( lulc_path)['nodata'][0] def trim_op(base, cover_x): """Trim cover_x to the mask of base. Parameters: base (numpy.ndarray): base raster from 'lulc_base' cover_x (numpy.ndarray): either future or current land cover raster from 'lulc_path' above Returns: _OUT_NODATA where either array has nodata, otherwise cover_x. """ return numpy.where( (base == base_nodata) | (cover_x == lulc_nodata), base_nodata, cover_x) LOGGER.info('Create new cover for %s', lulc_path) new_cover_path = os.path.join( inter_dir, 'new_cover' + lulc_key + suffix + '.tif') LOGGER.info('Starting masking %s land cover to base land cover.' % lulc_time) pygeoprocessing.raster_calculator([(lulc_base_path, 1), (lulc_path, 1)], trim_op, new_cover_path, gdal.GDT_Float32, _OUT_NODATA) LOGGER.info('Finished masking %s land cover to base land cover.' % lulc_time) LOGGER.info('Starting rarity computation on %s land cover.' % lulc_time) lulc_code_count_x = raster_pixel_count(new_cover_path) # a dictionary to map LULC types to a number that depicts how # rare they are considered code_index = {} # compute rarity index for each lulc code # define 0.0 if an lulc code is found in the cur/fut landcover # but not the baseline for code in lulc_code_count_x.iterkeys(): if code in lulc_code_count_b: numerator = lulc_code_count_x[code] * lulc_area denominator = lulc_code_count_b[code] * base_area ratio = 1.0 - (numerator / denominator) code_index[code] = ratio else: code_index[code] = 0.0 rarity_path = os.path.join(out_dir, 'rarity' + lulc_key + suffix + '.tif') pygeoprocessing.reclassify_raster((new_cover_path, 1), code_index, rarity_path, gdal.GDT_Float32, _RARITY_NODATA) LOGGER.info('Finished rarity computation on %s land cover.' % lulc_time) LOGGER.info('Finished habitat_quality biophysical calculations')
def test_flow_accum_mfd_with_weights(self): """PGP.routing: test flow accum for mfd with weights.""" n = 11 dem_raster_path = os.path.join(self.workspace_dir, 'dem.tif') dem_array = numpy.zeros((n, n), dtype=numpy.float32) dem_array[int(n / 2), :] = -1 _array_to_raster(dem_array, None, dem_raster_path) flow_dir_path = os.path.join(self.workspace_dir, 'flow_dir.tif') pygeoprocessing.routing.flow_dir_mfd((dem_raster_path, 1), flow_dir_path, working_dir=self.workspace_dir) flow_weight_raster_path = os.path.join(self.workspace_dir, 'flow_weights.tif') flow_weight_array = numpy.empty((n, n)) flow_weight_constant = 2.7 flow_weight_array[:] = flow_weight_constant pygeoprocessing.new_raster_from_base(flow_dir_path, flow_weight_raster_path, gdal.GDT_Float32, [-1.0]) flow_weight_raster = gdal.OpenEx(flow_weight_raster_path, gdal.OF_RASTER | gdal.GA_Update) flow_weight_band = flow_weight_raster.GetRasterBand(1) flow_weight_band.WriteArray(flow_weight_array) flow_weight_band.FlushCache() flow_weight_band = None flow_weight_raster = None target_flow_accum_path = os.path.join(self.workspace_dir, 'flow_accum_mfd.tif') pygeoprocessing.routing.flow_accumulation_mfd( (flow_dir_path, 1), target_flow_accum_path, weight_raster_path_band=(flow_weight_raster_path, 1)) flow_array = pygeoprocessing.raster_to_numpy_array( target_flow_accum_path) self.assertEqual(flow_array.dtype, numpy.float64) # this was generated from a hand-checked result with flow weight of # 1, so the result should be twice that since we have flow weights # of 2. expected_result = flow_weight_constant * numpy.array( [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [ 1.88571429, 2.11428571, 2., 2., 2., 2., 2., 2., 2., 2.11428571, 1.88571429 ], [ 2.7355102, 3.23183673, 3.03265306, 3., 3., 3., 3., 3., 3.03265306, 3.23183673, 2.7355102 ], [ 3.56468805, 4.34574927, 4.08023324, 4.00932945, 4., 4., 4., 4.00932945, 4.08023324, 4.34574927, 3.56468805 ], [ 4.38045548, 5.45412012, 5.13583673, 5.02692212, 5.00266556, 5., 5.00266556, 5.02692212, 5.13583673, 5.45412012, 4.38045548 ], [ 60.5, 51.12681336, 39.01272503, 27.62141227, 16.519192, 11.00304635, 16.519192, 27.62141227, 39.01272503, 51.12681336, 60.5 ], [ 4.38045548, 5.45412012, 5.13583673, 5.02692212, 5.00266556, 5., 5.00266556, 5.02692212, 5.13583673, 5.45412012, 4.38045548 ], [ 3.56468805, 4.34574927, 4.08023324, 4.00932945, 4., 4., 4., 4.00932945, 4.08023324, 4.34574927, 3.56468805 ], [ 2.7355102, 3.23183673, 3.03265306, 3., 3., 3., 3., 3., 3.03265306, 3.23183673, 2.7355102 ], [ 1.88571429, 2.11428571, 2., 2., 2., 2., 2., 2., 2., 2.11428571, 1.88571429 ], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]) numpy.testing.assert_allclose(flow_array, expected_result, rtol=1e-6) # try with zero weights zero_array = numpy.zeros(expected_result.shape, dtype=numpy.float32) zero_raster_path = os.path.join(self.workspace_dir, 'zero.tif') _array_to_raster(zero_array, None, zero_raster_path) pygeoprocessing.routing.flow_accumulation_mfd( (flow_dir_path, 1), target_flow_accum_path, weight_raster_path_band=(zero_raster_path, 1)) flow_accum_array = pygeoprocessing.raster_to_numpy_array( target_flow_accum_path) self.assertEqual(flow_accum_array.dtype, numpy.float64) numpy.testing.assert_almost_equal(numpy.sum(flow_accum_array), numpy.sum(zero_array), 6)
def people_access(country_id, friction_raster_path, population_density_raster_path, habitat_raster_path, max_travel_time, target_people_access_path, target_normalized_people_access_path): """Construct a people access raster showing where people can reach. The people access raster will have a value of population count per pixel which can reach that pixel within a cutoff of `max_travel_time` or `max_travel_distance`. Parameters: country_id (str): country id just for logging friction_raster_path (str): path to a raster whose units are minutes/meter required to cross any given pixel. Values of 0 are treated as impassible. population_density_raster_path (str): path to a per-pixel population density pop/m^2 raster. max_travel_time (float): maximum time to allow to travel in mins target_people_access_path (str): raster created that will contain the count of population that can reach any given pixel within the travel time and travel distance constraints. target_normalized_people_access_path (str): raster created that contains a normalized count of population that can reach any pixel within the travel time. Population is normalized by dividing the source population by the number of pixels that it can reach such that the sum of the entire reachable area equals the original population count. Useful for aggregating of "number of people that can reach this area" and similar calculations. Returns: None. """ try: min_friction = get_min_nonzero_raster_value(friction_raster_path) max_travel_distance_in_pixels = math.ceil( 1 / min_friction * max_travel_time / TARGET_CELL_LENGTH_M) LOGGER.debug( f'min_friction: {min_friction}\n' f'max_travel_time: {max_travel_time}\n' f'max_travel_distance_in_pixels {max_travel_distance_in_pixels}') pygeoprocessing.new_raster_from_base(population_density_raster_path, target_people_access_path, gdal.GDT_Float32, [-1]) pygeoprocessing.new_raster_from_base( population_density_raster_path, target_normalized_people_access_path, gdal.GDT_Float32, [-1]) friction_raster_info = pygeoprocessing.get_raster_info( friction_raster_path) raster_x_size, raster_y_size = friction_raster_info['raster_size'] start_complete_queue = queue.Queue() status_monitor_thread = threading.Thread(target=status_monitor, args=(start_complete_queue, country_id)) status_monitor_thread.start() shortest_distances_worker_thread_list = [] work_queue = queue.Queue() result_queue = queue.Queue() for _ in range(multiprocessing.cpu_count()): shortest_distances_worker_thread = threading.Thread( target=shortest_distances_worker, args=(work_queue, result_queue, start_complete_queue, friction_raster_path, population_density_raster_path, max_travel_time)) shortest_distances_worker_thread.start() shortest_distances_worker_thread_list.append( shortest_distances_worker_thread) access_raster_stitcher_thread = threading.Thread( target=access_raster_stitcher, args=(result_queue, start_complete_queue, habitat_raster_path, target_people_access_path, target_normalized_people_access_path)) access_raster_stitcher_thread.start() n_window_x = math.ceil(raster_x_size / CORE_SIZE) n_window_y = math.ceil(raster_y_size / CORE_SIZE) n_windows = n_window_x * n_window_y start_complete_queue.put(n_windows) for window_i in range(n_window_x): i_core = window_i * CORE_SIZE i_offset = i_core - max_travel_distance_in_pixels i_size = CORE_SIZE + 2 * max_travel_distance_in_pixels i_core_size = CORE_SIZE if i_offset < 0: # shrink the size by the left margin and clamp to 0 i_size += i_offset i_offset = 0 if i_core + i_core_size >= raster_x_size: i_core_size -= i_core + i_core_size - raster_x_size + 1 if i_offset + i_size >= raster_x_size: i_size -= i_offset + i_size - raster_x_size + 1 for window_j in range(n_window_y): j_core = window_j * CORE_SIZE j_offset = (j_core - max_travel_distance_in_pixels) j_size = CORE_SIZE + 2 * max_travel_distance_in_pixels j_core_size = CORE_SIZE if j_offset < 0: # shrink the size by the left margin and clamp to 0 j_size += j_offset j_offset = 0 if j_core + j_core_size >= raster_y_size: j_core_size -= j_core + j_core_size - raster_y_size + 1 if j_offset + j_size >= raster_y_size: j_size -= j_offset + j_size - raster_y_size + 1 work_queue.put((i_offset, j_offset, i_size, j_size, i_core, j_core, i_core_size, j_core_size)) work_queue.put(None) for worker_thread in shortest_distances_worker_thread_list: worker_thread.join() LOGGER.info(f'done with workers') result_queue.put(None) access_raster_stitcher_thread.join() LOGGER.info( f'done with access raster worker {target_people_access_path}') except Exception: LOGGER.exception( f'something bad happened on people_access for {target_people_access_path}' )
def calculate_reef_value(shore_sample_point_vector, template_raster_path, reef_habitat_raster_path, working_dir, target_reef_value_raster_path): """Calculate habitat value. Will create rasters in the `working_dir` directory named from the `habitat_fieldname_list` values containing relative importance of global habitat. The higher the value of a pixel the more important that pixel of habitat is for protection of the coastline. Parameters: shore_sample_point_vector (str): path to CV analysis vector containing at least the fields `Rt` and `Rt_nohab_[hab]` for all habitat types under consideration. template_raster_path (str): path to an existing raster whose size and shape will be used to be the base of the raster that's created for each habitat type. habitat_fieldname_list (list): list of habitat ids to analyise. habitat_vector_path_map (dict): maps fieldnames from `habitat_fieldname_list` to 3-tuples of (path to hab vector (str), risk val (float), protective distance (float)). working_dir (str): path to directory containing habitat back projection results target_reef_value_raster_path (str): path to raster value raster for that habitat. Returns: None. """ temp_workspace_dir = os.path.join(working_dir, 'hab_value_churn') for dir_path in [working_dir, temp_workspace_dir]: try: os.makedirs(dir_path) except OSError: pass gpkg_driver = ogr.GetDriverByName('gpkg') shore_sample_point_vector = gdal.OpenEx(shore_sample_point_vector, gdal.OF_VECTOR) shore_sample_point_layer = shore_sample_point_vector.GetLayer() reef_service_id = 'Rt_habservice_reefs_all' buffer_habitat_path = os.path.join(temp_workspace_dir, 'reefs_all_buffer.gpkg') buffer_habitat_vector = gpkg_driver.CreateDataSource(buffer_habitat_path) wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) buffer_habitat_layer = (buffer_habitat_vector.CreateLayer( reef_service_id, wgs84_srs, ogr.wkbPolygon)) buffer_habitat_layer.CreateField( ogr.FieldDefn(reef_service_id, ogr.OFTReal)) buffer_habitat_layer_defn = buffer_habitat_layer.GetLayerDefn() shore_sample_point_layer.ResetReading() buffer_habitat_layer.StartTransaction() for point_index, point_feature in enumerate(shore_sample_point_layer): if point_index % 1000 == 0: LOGGER.debug( 'point buffering is %.2f%% complete', point_index / shore_sample_point_layer.GetFeatureCount() * 100.0) # for each point, convert to local UTM to buffer out a given # distance then back to wgs84 point_geom = point_feature.GetGeometryRef() if point_geom.GetX() > 178 or point_geom.GetX() < -178: continue utm_srs = calculate_utm_srs(point_geom.GetX(), point_geom.GetY()) wgs84_to_utm_transform = osr.CoordinateTransformation( wgs84_srs, utm_srs) utm_to_wgs84_transform = osr.CoordinateTransformation( utm_srs, wgs84_srs) point_geom.Transform(wgs84_to_utm_transform) buffer_poly_geom = point_geom.Buffer(REEF_PROT_DIST) buffer_poly_geom.Transform(utm_to_wgs84_transform) buffer_point_feature = ogr.Feature(buffer_habitat_layer_defn) buffer_point_feature.SetGeometry(buffer_poly_geom) buffer_point_feature.SetField( reef_service_id, point_feature.GetField('Rt_habservice_reefs_all')) buffer_habitat_layer.CreateFeature(buffer_point_feature) buffer_point_feature = None point_feature = None buffer_poly_geom = None point_geom = None # at this point every shore point has been buffered to the effective # habitat distance and the habitat service has been saved with it buffer_habitat_layer.CommitTransaction() buffer_habitat_layer = None buffer_habitat_vector = None value_coverage_raster_path = os.path.join(temp_workspace_dir, 'reefs_all_value_cover.tif') pygeoprocessing.new_raster_from_base( template_raster_path, value_coverage_raster_path, gdal.GDT_Float32, [0], raster_driver_creation_tuple=('GTIFF', ('TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256', 'SPARSE_OK=TRUE'))) pygeoprocessing.rasterize( buffer_habitat_path, value_coverage_raster_path, option_list=['ATTRIBUTE=%s' % reef_service_id, 'MERGE_ALG=ADD']) value_coverage_nodata = pygeoprocessing.get_raster_info( value_coverage_raster_path)['nodata'][0] hab_nodata = pygeoprocessing.get_raster_info( reef_habitat_raster_path)['nodata'][0] wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) aligned_value_hab_raster_path_list = align_raster_list( [value_coverage_raster_path, reef_habitat_raster_path], temp_workspace_dir, target_sr_wkt=wgs84_srs.ExportToWkt()) pygeoprocessing.raster_calculator( [(aligned_value_hab_raster_path_list[0], 1), (aligned_value_hab_raster_path_list[1], 1), (value_coverage_nodata, 'raw'), (hab_nodata, 'raw')], intersect_raster_op, target_reef_value_raster_path, gdal.GDT_Float32, value_coverage_nodata) ecoshard.build_overviews(target_reef_value_raster_path)
def _calculate_tropical_forest_edge_carbon_map( edge_distance_path, spatial_index_pickle_path, n_nearest_model_points, biomass_to_carbon_conversion_factor, tropical_forest_edge_carbon_map_path): """Calculates the carbon on the forest pixels accounting for their global position with respect to precalculated edge carbon models. Args: edge_distance_path (string): path to the a raster where each pixel contains the pixel distance to forest edge. spatial_index_pickle_path (string): path to the pickle file that contains a tuple of: kd_tree (scipy.spatial.cKDTree): a kd-tree that has indexed the valid model parameter points for fast nearest neighbor calculations. theta_model_parameters (numpy.array Nx3): parallel array of model theta parameters consistent with the order in which points were inserted into 'kd_tree' method_model_parameter (numpy.array N): parallel array of method numbers (1..3) consistent with the order in which points were inserted into 'kd_tree'. n_nearest_model_points (int): number of nearest model points to search for. biomass_to_carbon_conversion_factor (float): number by which to multiply the biomass by to get carbon. tropical_forest_edge_carbon_map_path (string): a filepath to the output raster which will contain total carbon stocks per cell of forest type. Returns: None """ # load spatial indices from pickle file # let d = number of precalculated model cells (2217 for sample data) # kd_tree.data.shape: (d, 2) # theta_model_parameters.shape: (d, 3) # method_model_parameter.shape: (d,) kd_tree, theta_model_parameters, method_model_parameter = pickle.load( open(spatial_index_pickle_path, 'rb')) # create output raster and open band for writing # fill nodata, in case we skip entire memory blocks that are non-forest pygeoprocessing.new_raster_from_base(edge_distance_path, tropical_forest_edge_carbon_map_path, gdal.GDT_Float32, band_nodata_list=[NODATA_VALUE], fill_value_list=[NODATA_VALUE]) edge_carbon_raster = gdal.OpenEx(tropical_forest_edge_carbon_map_path, gdal.GA_Update) edge_carbon_band = edge_carbon_raster.GetRasterBand(1) edge_carbon_geotransform = edge_carbon_raster.GetGeoTransform() # create edge distance band for memory block reading n_rows = edge_carbon_raster.RasterYSize n_cols = edge_carbon_raster.RasterXSize n_cells = n_rows * n_cols n_cells_processed = 0 # timer to give updates per call last_time = time.time() cell_xsize, cell_ysize = pygeoprocessing.get_raster_info( edge_distance_path)['pixel_size'] cell_size_km = (abs(cell_xsize) + abs(cell_ysize)) / 2 / 1000.0 cell_area_ha = (abs(cell_xsize) * abs(cell_ysize)) / 10000.0 # Loop memory block by memory block, calculating the forest edge carbon # for every forest pixel. for edge_distance_data, edge_distance_block in pygeoprocessing.iterblocks( (edge_distance_path, 1), largest_block=2**12): current_time = time.time() if current_time - last_time > 5.0: LOGGER.info('Carbon edge calculation approx. %.2f%% complete', (n_cells_processed / float(n_cells) * 100.0)) last_time = current_time n_cells_processed += (edge_distance_data['win_xsize'] * edge_distance_data['win_ysize']) # only forest pixels will have an edge distance > 0 valid_edge_distance_mask = (edge_distance_block > 0) # if no valid forest pixels to calculate, skip to the next block if not valid_edge_distance_mask.any(): continue # calculate local coordinates for each pixel so we can test for # distance to the nearest carbon model points col_range = numpy.linspace( edge_carbon_geotransform[0] + edge_carbon_geotransform[1] * edge_distance_data['xoff'], edge_carbon_geotransform[0] + edge_carbon_geotransform[1] * (edge_distance_data['xoff'] + edge_distance_data['win_xsize']), num=edge_distance_data['win_xsize'], endpoint=False) row_range = numpy.linspace( edge_carbon_geotransform[3] + edge_carbon_geotransform[5] * edge_distance_data['yoff'], edge_carbon_geotransform[3] + edge_carbon_geotransform[5] * (edge_distance_data['yoff'] + edge_distance_data['win_ysize']), num=edge_distance_data['win_ysize'], endpoint=False) col_coords, row_coords = numpy.meshgrid(col_range, row_range) # query nearest points for every point in the grid # workers=-1 means use all available CPUs coord_points = list( zip(row_coords[valid_edge_distance_mask].ravel(), col_coords[valid_edge_distance_mask].ravel())) # for each forest point x, for each of its k nearest neighbors # shape of distances and indexes: (x, k) distances, indexes = kd_tree.query( coord_points, k=n_nearest_model_points, distance_upper_bound=DISTANCE_UPPER_BOUND, workers=-1) if n_nearest_model_points == 1: distances = distances.reshape(distances.shape[0], 1) indexes = indexes.reshape(indexes.shape[0], 1) # 3 is for the 3 thetas in the carbon model. thetas shape: (x, k, 3) thetas = numpy.zeros((indexes.shape[0], indexes.shape[1], 3)) valid_index_mask = (indexes != kd_tree.n) thetas[valid_index_mask] = theta_model_parameters[ indexes[valid_index_mask]] # reshape to an N,nearest_points so we can multiply by thetas valid_edge_distances_km = numpy.repeat( edge_distance_block[valid_edge_distance_mask] * cell_size_km, n_nearest_model_points).reshape(-1, n_nearest_model_points) # For each forest pixel x, for each of its k nearest neighbors, the # chosen regression method (1, 2, or 3). model_index shape: (x, k) model_index = numpy.zeros(indexes.shape, dtype=numpy.int8) model_index[valid_index_mask] = ( method_model_parameter[indexes[valid_index_mask]]) # biomass shape: (x, k) biomass = numpy.zeros((indexes.shape[0], indexes.shape[1]), dtype=numpy.float32) # mask shapes: (x, k) mask_1 = model_index == 1 mask_2 = model_index == 2 mask_3 = model_index == 3 # exponential model # biomass_1 = t1 - t2 * exp(-t3 * edge_dist_km) biomass[mask_1] = ( thetas[mask_1][:, 0] - thetas[mask_1][:, 1] * numpy.exp(-thetas[mask_1][:, 2] * valid_edge_distances_km[mask_1]) ) * cell_area_ha # logarithmic model # biomass_2 = t1 + t2 * numpy.log(edge_dist_km) biomass[mask_2] = ( thetas[mask_2][:, 0] + thetas[mask_2][:, 1] * numpy.log(valid_edge_distances_km[mask_2])) * cell_area_ha # linear regression # biomass_3 = t1 + t2 * edge_dist_km biomass[mask_3] = (thetas[mask_3][:, 0] + thetas[mask_3][:, 1] * valid_edge_distances_km[mask_3]) * cell_area_ha # reshape the array so that each set of points is in a separate # dimension, here distances are distances to each valid model # point, not distance to edge of forest weights = numpy.zeros(distances.shape) valid_distance_mask = (distances > 0) & (distances < numpy.inf) weights[valid_distance_mask] = (n_nearest_model_points / distances[valid_distance_mask]) # Denominator is the sum of the weights per nearest point (axis 1) denom = numpy.sum(weights, axis=1) # To avoid a divide by 0 valid_denom = denom != 0 average_biomass = numpy.zeros(distances.shape[0]) average_biomass[valid_denom] = ( numpy.sum(weights[valid_denom] * biomass[valid_denom], axis=1) / denom[valid_denom]) # Ensure the result has nodata everywhere the distance was invalid result = numpy.full(edge_distance_block.shape, NODATA_VALUE, dtype=numpy.float32) # convert biomass to carbon in this stage result[valid_edge_distance_mask] = ( average_biomass * biomass_to_carbon_conversion_factor) edge_carbon_band.WriteArray(result, xoff=edge_distance_data['xoff'], yoff=edge_distance_data['yoff']) LOGGER.info('Carbon edge calculation 100.0% complete')
def alternative_index_workflow(workspace_dir, raster_input_dict, aoi_path, index_path, polygon_input_list=None): """Compute the alternative index from raw inputs. All inputs, including AOI, must be share coordinate reference system and must have roughly equivalent extents. Recommend that inputs are clipped and projected in Arc prior to running this script. Args: workspace_dir (string): path to workspace where intermediate results should be created/stored raster_input_dict (dict): a nested python dictionary containing info about raster-based inputs that should be combined. The keys in the index should be the labels for each input; values in the dictionary should be dictionaries containing the keys 'path' (path to the raster input) and 'weight' (weighting value that is applied to the normalized values in this input relative to others). EACH INDEX IS INTERPRETED AS HIGH VALUE = GOOD. aoi_path (string): path to boundary of the study area index_path (string): path to location where the index should be saved polygon_input_list (list): list of paths to polygon inputs that should be included. Each of these is assigned a weight of 1. Side effects: creates or modifies a raster at the location ``index_path`` Returns: None """ # ensure that each new input shares spatial reference vector_info = pygeoprocessing.get_vector_info(aoi_path) destination_proj = osr.SpatialReference() destination_proj.ImportFromWkt(vector_info['projection_wkt']) problem_list = [] for new_input in raster_input_dict: new_proj = osr.SpatialReference() new_proj.ImportFromWkt( pygeoprocessing.get_raster_info( raster_input_dict[new_input]['path'])['projection_wkt']) if (new_proj.IsSame(destination_proj) == 0): problem_list.append(new_input) if problem_list: raise ValueError( "Project these to match the AOI: {}".format(problem_list)) intermediate_dir = os.path.join(workspace_dir, 'intermediate') if not os.path.exists(intermediate_dir): os.makedirs(intermediate_dir) normalized_dir = os.path.join(intermediate_dir, 'normalized') if not os.path.exists(normalized_dir): os.makedirs(normalized_dir) aligned_dir = os.path.join(intermediate_dir, 'aligned') if not os.path.exists(aligned_dir): os.makedirs(aligned_dir) # normalize all raster-based inputs within AOI base_raster_path_list = [] aligned_raster_path_list = [] for new_input in raster_input_dict: value_raster_path = raster_input_dict[new_input]['path'] try: weight = raster_input_dict[new_input]['weight'] except KeyError: weight = 1 bn = os.path.basename(value_raster_path) normalized_path = os.path.join(normalized_dir, bn) aligned_path = os.path.join(aligned_dir, bn) base_raster_path_list.append(normalized_path) aligned_raster_path_list.append(aligned_path) if not os.path.exists(normalized_path): with tempfile.NamedTemporaryFile( prefix='mask_raster', delete=False, suffix='.tif', dir=normalized_dir) as clipped_raster_file: clipped_raster_path = clipped_raster_file.name pygeoprocessing.mask_raster((value_raster_path, 1), aoi_path, clipped_raster_path) normalize(clipped_raster_path, normalized_path, aoi_path, weight) os.remove(clipped_raster_path) # align and resample normalized rasters, using minimum pixel size of inputs pixel_size_list = [] for new_input in raster_input_dict: value_raster_path = raster_input_dict[new_input]['path'] raster_info = pygeoprocessing.get_raster_info(value_raster_path) pixel_size_list.append(raster_info['pixel_size']) target_pixel_size = min(pixel_size_list) min_pixel_index = pixel_size_list.index(min(pixel_size_list)) if not all([os.path.exists(f) for f in aligned_raster_path_list]): pygeoprocessing.align_and_resize_raster_stack( base_raster_path_list, aligned_raster_path_list, ['near'] * len(base_raster_path_list), target_pixel_size, 'intersection', raster_align_index=min_pixel_index) # rasterize polygon inputs template_raster_path = aligned_raster_path_list[0] if polygon_input_list: for vec_path in polygon_input_list: target_raster_path = os.path.join( aligned_dir, '{}.tif'.format(os.path.basename(vec_path)[:-4])) aligned_raster_path_list.append(target_raster_path) if not os.path.exists(target_raster_path): pygeoprocessing.new_raster_from_base( template_raster_path, target_raster_path, gdal.GDT_Int16, [_TARGET_NODATA], fill_value_list=[_TARGET_NODATA]) pygeoprocessing.rasterize(vec_path, target_raster_path, burn_values=[100]) # add together raster_list_sum(aligned_raster_path_list, _TARGET_NODATA, index_path, _TARGET_NODATA, nodata_remove=True)
args.habitat_vector_path) habitat_vector_srs = osr.SpatialReference() habitat_vector_srs.ImportFromWkt(habitat_vector_info['projection_wkt']) habitat_vector_epsg = aoi_srs.GetAttrValue("PROJCS|GEOGCS|AUTHORITY", 1) if len(set([habitat_vector_epsg, shoreline_epsg, aoi_epsg])) > 1: raise ValueError( "AOI raster, shoreline point vector, and habitat vector do not " "all share the same projection") # Rasterize CV points w/ value using target AOI as mask pre_mask_point_raster_path = os.path.join(args.workspace_dir, 'pre_mask_shore_points.tif') target_pixel_size = aoi_raster_info['pixel_size'] pygeoprocessing.new_raster_from_base(args.aoi_mask_raster_path, pre_mask_point_raster_path, gdal.GDT_Float32, [numpy.finfo(numpy.float32).min]) pygeoprocessing.rasterize( args.shoreline_point_vector_path, pre_mask_point_raster_path, option_list=[ f'ATTRIBUTE={args.shoreline_vector_fieldname}', 'ALL_TOUCHED=TRUE' ]) # TODO: mask out values that are not in a defined AOI. shore_point_raster_path = os.path.join( args.workspace_dir, args.target_shoreline_raster_filename) mask_by_nodata(pre_mask_point_raster_path, args.aoi_mask_raster_path, shore_point_raster_path) # Create habitat mask
def _collapse_infrastructure_layers(infrastructure_dir, base_raster_path, infrastructure_path, tmp_dir): """Collapse all GIS infrastructure layers to one raster. Gathers all the GIS layers in the given directory and collapses them to a single byte raster mask where 1 indicates a pixel overlapping with one of the original infrastructure layers, 0 does not, and nodata indicates a region that has no layers that overlap but are still contained in the bounding box. Parameters: infrastructure_dir (string): path to a directory containing maps of either gdal compatible rasters or OGR compatible shapefiles. base_raster_path (string): a path to a file that has the dimensions and projection of the desired output infrastructure file. infrastructure_path (string): (output) path to a file that will be a byte raster with 1s everywhere there was a GIS layer present in the GIS layers in `infrastructure_dir`. tmp_dir (string): path to folder to store inetermediate datasets such as aligned versions of infrastructure rasters. Returns: None """ # load the infrastructure layers from disk infrastructure_filenames = [] infrastructure_nodata_list = [] infrastructure_tmp_filenames = [] # in case we need to rasterize some vector inputs: tmp_rasterize_dir = os.path.join(tmp_dir, 'rasterized') for root_directory, _, filename_list in os.walk(infrastructure_dir): for filename in filename_list: if filename.lower().endswith(".tif"): infrastructure_filenames.append( os.path.join(root_directory, filename)) infrastructure_nodata_list.append( pygeoprocessing.get_raster_info( infrastructure_filenames[-1])['nodata'][0]) if filename.lower().endswith(".shp"): utils.make_directories([tmp_rasterize_dir]) file_handle, tmp_raster_path = tempfile.mkstemp( dir=tmp_rasterize_dir, suffix='.tif') os.close(file_handle) pygeoprocessing.new_raster_from_base(base_raster_path, tmp_raster_path, gdal.GDT_Int32, [-1.0], fill_value_list=[0]) pygeoprocessing.rasterize(os.path.join(root_directory, filename), tmp_raster_path, burn_values=[1], option_list=["ALL_TOUCHED=TRUE"]) infrastructure_filenames.append(tmp_raster_path) infrastructure_tmp_filenames.append(tmp_raster_path) infrastructure_nodata_list.append( pygeoprocessing.get_raster_info( infrastructure_filenames[-1])['nodata'][0]) if len(infrastructure_filenames) == 0: raise ValueError( "infrastructure directory didn't have any rasters or " "vectors at %s", infrastructure_dir) infrastructure_nodata = -1 def _collapse_infrastructure_op(*infrastructure_array_list): """For each pixel, create mask 1 if all valid, else set to nodata.""" nodata_mask = (numpy.isclose(infrastructure_array_list[0], infrastructure_nodata_list[0])) infrastructure_result = infrastructure_array_list[0] > 0 for index in range(1, len(infrastructure_array_list)): current_nodata = numpy.isclose(infrastructure_array_list[index], infrastructure_nodata_list[index]) infrastructure_result = (infrastructure_result | ( (infrastructure_array_list[index] > 0) & ~current_nodata)) nodata_mask = (nodata_mask & current_nodata) infrastructure_result[nodata_mask] = infrastructure_nodata return infrastructure_result LOGGER.info('collapse infrastructure into one raster') aligned_infrastructure_target_list = [ os.path.join(tmp_dir, os.path.basename(x)) for x in infrastructure_filenames ] base_raster_info = pygeoprocessing.get_raster_info(base_raster_path) pygeoprocessing.align_and_resize_raster_stack( infrastructure_filenames, aligned_infrastructure_target_list, ['near'] * len(infrastructure_filenames), base_raster_info['pixel_size'], base_raster_info['bounding_box']) infra_filename_band_list = [(x, 1) for x in aligned_infrastructure_target_list] pygeoprocessing.raster_calculator(infra_filename_band_list, _collapse_infrastructure_op, infrastructure_path, gdal.GDT_Byte, infrastructure_nodata) # clean up the temporary filenames if os.path.isdir(tmp_rasterize_dir): for filename in infrastructure_tmp_filenames: os.remove(filename) os.rmdir(tmp_rasterize_dir)
def _count_and_weight_visible_structures(visibility_raster_path_list, weights, clipped_dem_path, target_path): """Count (and weight) the number of visible structures for each pixel. Args: visibility_raster_path_list (list of strings): A list of strings to perfectly overlapping visibility rasters. weights (list of numbers): A list of numeric weights to apply to each visibility raster. There must be the same number of weights in this list as there are elements in visibility_rasters. clipped_dem_path (string): String path to the DEM. target_path (string): The path to where the output raster is stored. Returns: ``None`` """ LOGGER.info('Summing and weighting %d visibility rasters', len(visibility_raster_path_list)) target_nodata = -1 dem_raster_info = pygeoprocessing.get_raster_info(clipped_dem_path) dem_nodata = dem_raster_info['nodata'][0] pygeoprocessing.new_raster_from_base( clipped_dem_path, target_path, gdal.GDT_Float32, [target_nodata], raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS) weighted_sum_visibility_raster = gdal.OpenEx( target_path, gdal.OF_RASTER | gdal.GA_Update) weighted_sum_visibility_band = ( weighted_sum_visibility_raster.GetRasterBand(1)) dem_raster = gdal.OpenEx(clipped_dem_path, gdal.OF_RASTER) dem_band = dem_raster.GetRasterBand(1) last_log_time = time.time() n_visibility_pixels = ( dem_raster_info['raster_size'][0] * dem_raster_info['raster_size'][1] * len(visibility_raster_path_list)) n_visibility_pixels_touched = 0 for block_data in pygeoprocessing.iterblocks((clipped_dem_path, 1), offset_only=True): dem_block = dem_band.ReadAsArray(**block_data) valid_mask = ~utils.array_equals_nodata(dem_block, dem_nodata) visibility_sum = numpy.empty(dem_block.shape, dtype=numpy.float32) visibility_sum[:] = target_nodata visibility_sum[valid_mask] = 0 # Weight and sum the outputs, only opening one raster at a time. # Opening rasters one at a time avoids errors about having too many # files open at once and also avoids possible out-of-memory errors # relative to if we were to open all the incoming rasters at once. for vis_raster_path, weight in zip(visibility_raster_path_list, weights): if time.time() - last_log_time > 5.0: LOGGER.info( 'Weighting and summing approx. %.2f%% complete.', 100.0 * (n_visibility_pixels_touched / n_visibility_pixels)) last_log_time = time.time() visibility_raster = gdal.OpenEx(vis_raster_path, gdal.OF_RASTER) visibility_band = visibility_raster.GetRasterBand(1) visibility_block = visibility_band.ReadAsArray(**block_data) visible_mask = (valid_mask & (visibility_block == 1)) visibility_sum[visible_mask] += ( visibility_block[visible_mask] * weight) visibility_band = None visibility_raster = None n_visibility_pixels_touched += dem_block.size weighted_sum_visibility_band.WriteArray( visibility_sum, xoff=block_data['xoff'], yoff=block_data['yoff']) weighted_sum_visibility_band.ComputeStatistics(0) weighted_sum_visibility_band = None weighted_sum_visibility_raster = None dem_band = None dem_raster = None
def _calculate_valuation(visibility_path, viewpoint, weight, valuation_method, valuation_coefficients, max_valuation_radius, valuation_raster_path): """Calculate valuation with one of the defined methods. Args: visibility_path (string): The path to a visibility raster for a single point. The visibility raster has pixel values of 0, 1, or nodata. This raster must be projected in meters. viewpoint (tuple): The viewpoint in projected coordinates (x, y) of the visibility raster. weight (number): The numeric weight of the visibility. valuation_method (string): The valuation method to use, one of ('linear', 'logarithmic', 'exponential'). valuation_coefficients (dict): A dictionary mapping string coefficient letters to numeric coefficient values. Keys 'a' and 'b' are required. max_valuation_radius (number): Past this distance (in meters), valuation values will be set to 0. valuation_raster_path (string): The path to where the valuation raster will be saved. Returns: ``None`` """ LOGGER.info('Calculating valuation with %s method. Coefficients: %s', valuation_method, ' '.join(['%s=%g' % (k, v) for (k, v) in sorted(valuation_coefficients.items())])) # All valuation functions use coefficients a, b a = valuation_coefficients['a'] b = valuation_coefficients['b'] if valuation_method == 'linear': def _valuation(distance, visibility): valid_pixels = (visibility == 1) valuation = numpy.empty(distance.shape, dtype=numpy.float64) valuation[:] = _VALUATION_NODATA valuation[(visibility == 0) | valid_pixels] = 0 x = distance[valid_pixels] valuation[valid_pixels] = ( (a + b * x) * (weight * visibility[valid_pixels])) return valuation elif valuation_method == 'logarithmic': def _valuation(distance, visibility): valid_pixels = (visibility == 1) valuation = numpy.empty(distance.shape, dtype=numpy.float64) valuation[:] = _VALUATION_NODATA valuation[(visibility == 0) | valid_pixels] = 0 # Per Rob, this is the natural log. # Also per Rob (and Rich), we'll use log(x+1) because log of values # where 0 < x < 1 yields strange results indeed. valuation[valid_pixels] = ( (a + b * numpy.log(distance[valid_pixels] + 1)) * ( weight * visibility[valid_pixels])) return valuation elif valuation_method == 'exponential': def _valuation(distance, visibility): valid_pixels = (visibility == 1) valuation = numpy.empty(distance.shape, dtype=numpy.float64) valuation[:] = _VALUATION_NODATA valuation[(visibility == 0) | valid_pixels] = 0 valuation[valid_pixels] = ( (a * numpy.exp(-b * distance[valid_pixels])) * ( weight * visibility[valid_pixels])) return valuation pygeoprocessing.new_raster_from_base( visibility_path, valuation_raster_path, gdal.GDT_Float64, [_VALUATION_NODATA]) vis_raster_info = pygeoprocessing.get_raster_info(visibility_path) vis_gt = vis_raster_info['geotransform'] iy_viewpoint = int((viewpoint[1] - vis_gt[3]) / vis_gt[5]) ix_viewpoint = int((viewpoint[0] - vis_gt[0]) / vis_gt[1]) # convert the distance transform to meters spatial_reference = osr.SpatialReference() spatial_reference.ImportFromWkt(vis_raster_info['projection_wkt']) linear_units = spatial_reference.GetLinearUnits() pixel_size_in_m = utils.mean_pixel_size_and_area( vis_raster_info['pixel_size'])[0] * linear_units valuation_raster = gdal.OpenEx(valuation_raster_path, gdal.OF_RASTER | gdal.GA_Update) valuation_band = valuation_raster.GetRasterBand(1) vis_nodata = vis_raster_info['nodata'][0] for block_info, vis_block in pygeoprocessing.iterblocks( (visibility_path, 1)): visibility_value = numpy.empty(vis_block.shape, dtype=numpy.float64) visibility_value[:] = _VALUATION_NODATA x_coord = numpy.linspace( block_info['xoff'], block_info['xoff'] + block_info['win_xsize'] - 1, block_info['win_xsize']) y_coord = numpy.linspace( block_info['yoff'], block_info['yoff'] + block_info['win_ysize'] - 1, block_info['win_ysize']) ix_matrix, iy_matrix = numpy.meshgrid(x_coord, y_coord) dist_in_m = numpy.hypot(numpy.absolute(ix_matrix - ix_viewpoint), numpy.absolute(iy_matrix - iy_viewpoint), dtype=numpy.float64) * pixel_size_in_m valid_distances = (dist_in_m <= max_valuation_radius) nodata = utils.array_equals_nodata(vis_block, vis_nodata) valid_indexes = (valid_distances & (~nodata)) visibility_value[valid_indexes] = _valuation(dist_in_m[valid_indexes], vis_block[valid_indexes]) visibility_value[~valid_distances & ~nodata] = 0 valuation_band.WriteArray(visibility_value, xoff=block_info['xoff'], yoff=block_info['yoff']) # the 0 means approximate stats are not okay valuation_band.ComputeStatistics(0) valuation_band = None valuation_raster.FlushCache() valuation_raster = None
def _mask_raster_by_vector( base_raster_path_band, vector_path, working_dir, target_raster_path): """Mask pixels outside of the vector to nodata. Parameters: base_raster_path (string): path/band tuple to raster to process vector_path (string): path to single layer raster that is used to indicate areas to preserve from the base raster. Areas outside of this vector are set to nodata. working_dir (str): path to temporary directory. target_raster_path (string): path to a single band raster that will be created of the same dimensions and data type as `base_raster_path_band` where any pixels that lie outside of `vector_path` coverage will be set to nodata. Returns: None. """ # Warp input raster to be same bounding box as AOI if smaller. base_raster_info = pygeoprocessing.get_raster_info( base_raster_path_band[0]) nodata = base_raster_info['nodata'][base_raster_path_band[1]-1] target_pixel_size = base_raster_info['pixel_size'] vector_info = pygeoprocessing.get_vector_info(vector_path) target_bounding_box = pygeoprocessing.merge_bounding_box_list( [base_raster_info['bounding_box'], vector_info['bounding_box']], 'intersection') pygeoprocessing.warp_raster( base_raster_path_band[0], target_pixel_size, target_raster_path, 'near', target_bb=target_bounding_box) # Create mask raster same size as the warped raster. tmp_dir = tempfile.mkdtemp(dir=working_dir) mask_raster_path = os.path.join(tmp_dir, 'mask.tif') pygeoprocessing.new_raster_from_base( target_raster_path, mask_raster_path, gdal.GDT_Byte, [0], fill_value_list=[0]) # Rasterize the vector onto the mask raster pygeoprocessing.rasterize(vector_path, mask_raster_path, [1], None) # Parallel iterate over warped raster and mask raster to mask out original. target_raster = gdal.OpenEx( target_raster_path, gdal.GA_Update | gdal.OF_RASTER) target_band = target_raster.GetRasterBand(1) mask_raster = gdal.OpenEx(mask_raster_path, gdal.OF_RASTER) mask_band = mask_raster.GetRasterBand(1) for offset_dict in pygeoprocessing.iterblocks( (mask_raster_path, 1), offset_only=True): data_array = target_band.ReadAsArray(**offset_dict) mask_array = mask_band.ReadAsArray(**offset_dict) data_array[mask_array != 1] = nodata target_band.WriteArray( data_array, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) target_band.FlushCache() target_band = None target_raster = None mask_band = None mask_raster = None try: shutil.rmtree(tmp_dir) except OSError: LOGGER.warn("Unable to delete temporary file %s", mask_raster_path)
def _calculate_tropical_forest_edge_carbon_map( edge_distance_path, spatial_index_pickle_path, n_nearest_model_points, biomass_to_carbon_conversion_factor, tropical_forest_edge_carbon_map_path): """Calculates the carbon on the forest pixels accounting for their global position with respect to precalculated edge carbon models. Parameters: edge_distance_path (string): path to the a raster where each pixel contains the pixel distance to forest edge. spatial_index_pickle_path (string): path to the pickle file that contains a tuple of: kd_tree (scipy.spatial.cKDTree): a kd-tree that has indexed the valid model parameter points for fast nearest neighbor calculations. theta_model_parameters (numpy.array Nx3): parallel array of model theta parameters consistent with the order in which points were inserted into 'kd_tree' method_model_parameter (numpy.array N): parallel array of method numbers (1..3) consistent with the order in which points were inserted into 'kd_tree'. n_nearest_model_points (int): number of nearest model points to search for. biomass_to_carbon_conversion_factor (float): number by which to multiply the biomass by to get carbon. tropical_forest_edge_carbon_map_path (string): a filepath to the output raster which will contain total carbon stocks per cell of forest type. Returns: None """ # load spatial indeces from pickle file kd_tree, theta_model_parameters, method_model_parameter = pickle.load( open(spatial_index_pickle_path, 'rb')) # create output raster and open band for writing # fill nodata, in case we skip entire memory blocks that are non-forest pygeoprocessing.new_raster_from_base( edge_distance_path, tropical_forest_edge_carbon_map_path, gdal.GDT_Float32, band_nodata_list=[CARBON_MAP_NODATA], fill_value_list=[CARBON_MAP_NODATA]) edge_carbon_raster = gdal.OpenEx( tropical_forest_edge_carbon_map_path, gdal.GA_Update) edge_carbon_band = edge_carbon_raster.GetRasterBand(1) edge_carbon_geotransform = edge_carbon_raster.GetGeoTransform() # create edge distance band for memory block reading n_rows = edge_carbon_raster.RasterYSize n_cols = edge_carbon_raster.RasterXSize n_cells = n_rows * n_cols n_cells_processed = 0 # timer to give updates per call last_time = time.time() cell_xsize, cell_ysize = pygeoprocessing.get_raster_info( edge_distance_path)['pixel_size'] cell_size_km = (abs(cell_xsize) + abs(cell_ysize))/2 / 1000.0 cell_area_ha = (abs(cell_xsize) * abs(cell_ysize)) / 10000.0 # Loop memory block by memory block, calculating the forest edge carbon # for every forest pixel. for edge_distance_data, edge_distance_block in pygeoprocessing.iterblocks( (edge_distance_path, 1), largest_block=2**12): current_time = time.time() if current_time - last_time > 5.0: LOGGER.info( 'Carbon edge calculation approx. %.2f%% complete', (n_cells_processed / float(n_cells) * 100.0)) last_time = current_time n_cells_processed += ( edge_distance_data['win_xsize'] * edge_distance_data['win_ysize']) valid_edge_distance_mask = (edge_distance_block > 0) # if no valid forest pixels to calculate, skip to the next block if not valid_edge_distance_mask.any(): continue # calculate local coordinates for each pixel so we can test for # distance to the nearest carbon model points col_range = numpy.linspace( edge_carbon_geotransform[0] + edge_carbon_geotransform[1] * edge_distance_data['xoff'], edge_carbon_geotransform[0] + edge_carbon_geotransform[1] * ( edge_distance_data['xoff'] + edge_distance_data['win_xsize']), num=edge_distance_data['win_xsize'], endpoint=False) row_range = numpy.linspace( edge_carbon_geotransform[3] + edge_carbon_geotransform[5] * edge_distance_data['yoff'], edge_carbon_geotransform[3] + edge_carbon_geotransform[5] * ( edge_distance_data['yoff'] + edge_distance_data['win_ysize']), num=edge_distance_data['win_ysize'], endpoint=False) col_coords, row_coords = numpy.meshgrid(col_range, row_range) # query nearest points for every point in the grid # n_jobs=-1 means use all available CPUs coord_points = zip( row_coords[valid_edge_distance_mask].ravel(), col_coords[valid_edge_distance_mask].ravel()) # note, the 'n_jobs' parameter was introduced in SciPy 0.16.0 distances, indexes = kd_tree.query( coord_points, k=n_nearest_model_points, distance_upper_bound=DISTANCE_UPPER_BOUND, n_jobs=-1) if n_nearest_model_points == 1: distances = distances.reshape(distances.shape[0], 1) indexes = indexes.reshape(indexes.shape[0], 1) # the 3 is for the 3 thetas in the carbon model thetas = numpy.zeros((indexes.shape[0], indexes.shape[1], 3)) valid_index_mask = (indexes != kd_tree.n) thetas[valid_index_mask] = theta_model_parameters[ indexes[valid_index_mask]] # the 3 is for the 3 models (asym, exp, linear) biomass_model = numpy.zeros( (indexes.shape[0], indexes.shape[1], 3)) # reshape to an N,nearest_points so we can multiply by thetas valid_edge_distances_km = numpy.repeat( edge_distance_block[valid_edge_distance_mask] * cell_size_km, n_nearest_model_points).reshape(-1, n_nearest_model_points) # asymptotic model # biomass_1 = t1 - t2 * exp(-t3 * edge_dist_km) biomass_model[:, :, 0] = ( thetas[:, :, 0] - thetas[:, :, 1] * numpy.exp( -thetas[:, :, 2] * valid_edge_distances_km) ) * cell_area_ha # logarithmic model # biomass_2 = t1 + t2 * numpy.log(edge_dist_km) biomass_model[:, :, 1] = ( thetas[:, :, 0] + thetas[:, :, 1] * numpy.log( valid_edge_distances_km)) * cell_area_ha # linear regression # biomass_3 = t1 + t2 * edge_dist_km biomass_model[:, :, 2] = ( (thetas[:, :, 0] + thetas[:, :, 1] * valid_edge_distances_km) * cell_area_ha) # Collapse the biomass down to the valid models model_index = numpy.zeros(indexes.shape, dtype=numpy.int8) model_index[valid_index_mask] = ( method_model_parameter[indexes[valid_index_mask]] - 1) # reduce the axis=1 dimensionality of the model by selecting the # appropriate value via the model_index array. Got this trick from # http://stackoverflow.com/questions/18702746/reduce-a-dimension-of-numpy-array-by-selecting biomass_y, biomass_x = numpy.meshgrid( numpy.arange(biomass_model.shape[1]), numpy.arange(biomass_model.shape[0])) biomass = biomass_model[biomass_x, biomass_y, model_index] # reshape the array so that each set of points is in a separate # dimension, here distances are distances to each valid model # point, not distance to edge of forest weights = numpy.zeros(distances.shape) valid_distance_mask = (distances > 0) & (distances < numpy.inf) weights[valid_distance_mask] = ( n_nearest_model_points / distances[valid_distance_mask]) # Denominator is the sum of the weights per nearest point (axis 1) denom = numpy.sum(weights, axis=1) # To avoid a divide by 0 valid_denom = denom != 0 average_biomass = numpy.zeros(distances.shape[0]) average_biomass[valid_denom] = ( numpy.sum(weights[valid_denom] * biomass[valid_denom], axis=1) / denom[valid_denom]) # Ensure the result has nodata everywhere the distance was invalid result = numpy.empty( edge_distance_block.shape, dtype=numpy.float32) result[:] = CARBON_MAP_NODATA # convert biomass to carbon in this stage result[valid_edge_distance_mask] = ( average_biomass * biomass_to_carbon_conversion_factor) edge_carbon_band.WriteArray( result, xoff=edge_distance_data['xoff'], yoff=edge_distance_data['yoff']) LOGGER.info('Carbon edge calculation 100.0% complete')
def _build_file_registry(C_prior_raster, transition_rasters, snapshot_years, results_suffix, do_economic_analysis, outputs_dir, intermediate_dir): """Build an output file registry. Args: C_prior_raster (str): template raster transition_rasters (list): A list of GDAL-supported rasters representing representing the landcover at transition years. May be an empty list. snapshot_years (list): years of provided snapshots to help with filenames results_suffix (str): the results file suffix do_economic_analysis (bool): whether or not to create a NPV raster outputs_dir (str): path to output directory Returns: File_Registry (dict): map to collections of output files. """ template_raster = C_prior_raster T_s_rasters = [] A_r_rasters = [] E_r_rasters = [] N_r_rasters = [] for snapshot_idx in xrange(len(snapshot_years)): snapshot_year = snapshot_years[snapshot_idx] T_s_rasters.append(_OUTPUT['carbon_stock'] % (snapshot_year)) if snapshot_idx < len(snapshot_years)-1: next_snapshot_year = snapshot_years[snapshot_idx + 1] A_r_rasters.append(_OUTPUT['carbon_accumulation'] % ( snapshot_year, next_snapshot_year)) E_r_rasters.append(_OUTPUT['cabon_emissions'] % ( snapshot_year, next_snapshot_year)) N_r_rasters.append(_OUTPUT['carbon_net_sequestration'] % ( snapshot_year, next_snapshot_year)) # Total Net Sequestration N_total_raster = 'total_net_carbon_sequestration.tif' raster_registry_dict = { 'T_s_rasters': T_s_rasters, 'A_r_rasters': A_r_rasters, 'E_r_rasters': E_r_rasters, 'N_r_rasters': N_r_rasters, 'N_total_raster': N_total_raster, } # Net Sequestration from Base Year to Analysis Year if do_economic_analysis: raster_registry_dict['NPV_raster'] = 'net_present_value.tif' file_registry = utils.build_file_registry( [(raster_registry_dict, outputs_dir), (_INTERMEDIATE, intermediate_dir)], results_suffix) LOGGER.info('Aligning and clipping incoming datasets') incoming_rasters = [C_prior_raster] + transition_rasters # If an analysis year is defined, it's appended to the snapshot_years list, # but won't have a corresponding raster. aligned_lulc_files = [file_registry['aligned_lulc_template'] % year for year in snapshot_years[:len(incoming_rasters)]] baseline_pixel_size = pygeoprocessing.get_raster_info( C_prior_raster)['pixel_size'] pygeoprocessing.align_and_resize_raster_stack( [C_prior_raster] + transition_rasters, aligned_lulc_files, ['near'] * len(aligned_lulc_files), baseline_pixel_size, 'intersection') raster_lists = ['T_s_rasters', 'A_r_rasters', 'E_r_rasters', 'N_r_rasters'] num_temporal_rasters = sum( [len(file_registry[key]) for key in raster_lists]) LOGGER.info('Creating %s temporal rasters', num_temporal_rasters) for index, raster_filepath in enumerate(itertools.chain( *[file_registry[key] for key in raster_lists])): LOGGER.info('Setting up temporal raster %s of %s at %s', index+1, num_temporal_rasters, os.path.basename(raster_filepath)) pygeoprocessing.new_raster_from_base( template_raster, raster_filepath, gdal.GDT_Float32, [NODATA_FLOAT]) for raster_key in ['N_total_raster', 'NPV_raster']: try: filepath = file_registry[raster_key] LOGGER.info('Setting up valuation raster %s', os.path.basename(filepath)) pygeoprocessing.new_raster_from_base( template_raster, filepath, gdal.GDT_Float32, [NODATA_FLOAT]) except KeyError: # KeyError raised when ``raster_key`` is not in the file registry. pass return file_registry
def normalize_by_polygon(raster_path, vector_path, percentile, clamp_range, workspace_dir, target_path): """Normalize a raster locally by regions defined by vector. Parameters: raster_path (str): path to base raster to aggregate over. vector_path (str): path to a vector that defines local regions to normalize over. Any pixels outside of these polygons will be set to nodata. percentile (float): a number in the range [0, 100] that is used to normalize the local regions defined by `vector_path`. This number will be used to calculate the percentile in each region separately clamp_range (list or tuple): a min/max range to clamp the normalized result by. workspace_dir (str): path to a workspace to create and keep intermediate files. Returns: None. """ base_dir = os.path.dirname(target_path) for dir_path in [base_dir, workspace_dir]: try: os.makedirs(dir_path) except OSError: pass vector = ogr.Open(vector_path) layer = vector.GetLayer() fid_to_percentile_pickle_path = {} for feature in layer: # clip the original layer and then mask it fid = feature.GetFID() feature_mask_path = os.path.join(workspace_dir, '%d_mask.tif' % fid) mask_raster_task = TASK_GRAPH.add_task( func=clip_and_mask_raster, args=(raster_path, vector_path, fid, feature_mask_path), target_path_list=[feature_mask_path], task_name='mask feature %d' % fid) percentile_pickle_path = os.path.join( workspace_dir, '%d_%d.pickle' % (fid, percentile)) _ = TASK_GRAPH.add_task(func=calculate_percentile, args=(feature_mask_path, [percentile], base_dir, percentile_pickle_path), target_path_list=[percentile_pickle_path], dependent_task_list=[mask_raster_task], task_name='calculating %s' % percentile_pickle_path) fid_to_percentile_pickle_path[fid] = percentile_pickle_path feature = None local_vector_path = os.path.join(workspace_dir, 'local_vector.gpkg') gpkg_driver = ogr.GetDriverByName('GPKG') local_vector = gpkg_driver.CopyDataSource(vector, local_vector_path) vector = None layer = None local_layer = local_vector.GetLayer() local_layer.CreateField(ogr.FieldDefn('norm_val', ogr.OFTReal)) global_norm_value_raster_path = os.path.join(workspace_dir, 'global_norm_values.tif') pygeoprocessing.new_raster_from_base( raster_path, global_norm_value_raster_path, gdal.GDT_Float32, [-1], raster_driver_creation_tuple=('GTIFF', ('TILED=YES', 'BIGTIFF=YES', 'COMPRESS=ZSTD', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256', 'SPARSE_OK=TRUE'))) TASK_GRAPH.join() for fid, pickle_path in fid_to_percentile_pickle_path.items(): feature = local_layer.GetFeature(fid) with open(pickle_path, 'rb') as pickle_file: percentile_list = pickle.load(pickle_file) if len(percentile_list) > 0: feature.SetField('norm_val', percentile_list[0]) else: feature.SetField('norm_val', -1.0) local_layer.SetFeature(feature) feature = None local_layer = None local_vector = None pygeoprocessing.rasterize(local_vector_path, global_norm_value_raster_path, option_list=['ATTRIBUTE=norm_val']) raster_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] pygeoprocessing.raster_calculator([(raster_path, 1), (global_norm_value_raster_path, 1), (clamp_range, 'raw'), (raster_nodata, 'raw'), (-1, 'raw'), (-1, 'raw')], divide_op, target_path, gdal.GDT_Float32, -1)
def _clip_and_mask_dem(dem_path, aoi_path, target_path, working_dir): """Clip and mask the DEM to the AOI. Args: dem_path (string): The path to the DEM to use. Must have the same projection as the AOI. aoi_path (string): The path to the AOI to use. Must have the same projection as the DEM. target_path (string): The path on disk to where the clipped and masked raster will be saved. If a file exists at this location it will be overwritten. The raster will have a bounding box matching the intersection of the AOI and the DEM's bounding box and a spatial reference matching the AOI and the DEM. working_dir (string): A path to a directory on disk. A new temporary directory will be created within this directory for the storage of several working files. This temporary directory will be removed at the end of this function. Returns: ``None`` """ temp_dir = tempfile.mkdtemp(dir=working_dir, prefix='clip_dem') LOGGER.info('Clipping the DEM to its intersection with the AOI.') aoi_vector_info = pygeoprocessing.get_vector_info(aoi_path) dem_raster_info = pygeoprocessing.get_raster_info(dem_path) mean_pixel_size = ( abs(dem_raster_info['pixel_size'][0]) + abs(dem_raster_info['pixel_size'][1])) / 2.0 pixel_size = (mean_pixel_size, -mean_pixel_size) intersection_bbox = [op(aoi_dim, dem_dim) for (aoi_dim, dem_dim, op) in zip(aoi_vector_info['bounding_box'], dem_raster_info['bounding_box'], [max, max, min, min])] clipped_dem_path = os.path.join(temp_dir, 'clipped_dem.tif') pygeoprocessing.warp_raster( dem_path, pixel_size, clipped_dem_path, 'near', target_bb=intersection_bbox) LOGGER.info('Masking DEM pixels outside the AOI to nodata') aoi_mask_raster_path = os.path.join(temp_dir, 'aoi_mask.tif') pygeoprocessing.new_raster_from_base( clipped_dem_path, aoi_mask_raster_path, gdal.GDT_Byte, [_BYTE_NODATA], [0], raster_driver_creation_tuple=BYTE_GTIFF_CREATION_OPTIONS) pygeoprocessing.rasterize(aoi_path, aoi_mask_raster_path, [1], None) dem_nodata = dem_raster_info['nodata'][0] def _mask_op(dem, aoi_mask): valid_pixels = (~utils.array_equals_nodata(dem, dem_nodata) & (aoi_mask == 1)) masked_dem = numpy.empty(dem.shape) masked_dem[:] = dem_nodata masked_dem[valid_pixels] = dem[valid_pixels] return masked_dem pygeoprocessing.raster_calculator( [(clipped_dem_path, 1), (aoi_mask_raster_path, 1)], _mask_op, target_path, gdal.GDT_Float32, dem_nodata, raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS) shutil.rmtree(temp_dir, ignore_errors=True)
def test_archive_extraction(self): """Datastack: test archive extraction.""" from natcap.invest import datastack from natcap.invest import utils params = { 'blank': '', 'a': 1, 'b': 'hello there', 'c': 'plain bytestring', 'foo': os.path.join(self.workspace, 'foo.txt'), 'bar': os.path.join(self.workspace, 'foo.txt'), 'data_dir': os.path.join(self.workspace, 'data_dir'), 'raster': os.path.join(DATA_DIR, 'dem'), 'vector': os.path.join(DATA_DIR, 'watersheds.shp'), 'simple_table': os.path.join(DATA_DIR, 'carbon_pools_samp.csv'), 'spatial_table': os.path.join(self.workspace, 'spatial_table.csv'), } # synthesize sample data os.makedirs(params['data_dir']) for filename in ('foo.txt', 'bar.txt', 'baz.txt'): data_filepath = os.path.join(params['data_dir'], filename) with open(data_filepath, 'w') as textfile: textfile.write(filename) with open(params['foo'], 'w') as textfile: textfile.write('hello world!') with open(params['spatial_table'], 'w') as spatial_csv: # copy existing DEM # copy existing watersheds # new raster # new vector spatial_csv.write('ID,path\n') spatial_csv.write(f"1,{params['raster']}\n") spatial_csv.write(f"2,{params['vector']}\n") # Create a raster only referenced by the CSV target_csv_raster_path = os.path.join(self.workspace, 'new_raster.tif') pygeoprocessing.new_raster_from_base(params['raster'], target_csv_raster_path, gdal.GDT_UInt16, [0]) spatial_csv.write(f'3,{target_csv_raster_path}\n') # Create a vector only referenced by the CSV target_csv_vector_path = os.path.join(self.workspace, 'new_vector.geojson') pygeoprocessing.shapely_geometry_to_vector( [shapely.geometry.Point(100, 100)], target_csv_vector_path, pygeoprocessing.get_raster_info( params['raster'])['projection_wkt'], 'GeoJSON', ogr_geom_type=ogr.wkbPoint) spatial_csv.write(f'4,{target_csv_vector_path}\n') archive_path = os.path.join(self.workspace, 'archive.invs.tar.gz') datastack.build_datastack_archive( params, 'test_datastack_modules.archive_extraction', archive_path) out_directory = os.path.join(self.workspace, 'extracted_archive') archive_params = datastack.extract_datastack_archive( archive_path, out_directory) model_array = pygeoprocessing.raster_to_numpy_array( archive_params['raster']) reg_array = pygeoprocessing.raster_to_numpy_array(params['raster']) numpy.testing.assert_allclose(model_array, reg_array) utils._assert_vectors_equal(archive_params['vector'], params['vector']) pandas.testing.assert_frame_equal( pandas.read_csv(archive_params['simple_table']), pandas.read_csv(params['simple_table'])) for key in ('blank', 'a', 'b', 'c'): self.assertEqual(archive_params[key], params[key], f'Params differ for key {key}') for key in ('foo', 'bar'): self.assertTrue( filecmp.cmp(archive_params[key], params[key], shallow=False)) spatial_csv_dict = utils.build_lookup_from_csv( archive_params['spatial_table'], 'ID', to_lower=True) spatial_csv_dir = os.path.dirname(archive_params['spatial_table']) numpy.testing.assert_allclose( pygeoprocessing.raster_to_numpy_array( os.path.join(spatial_csv_dir, spatial_csv_dict[3]['path'])), pygeoprocessing.raster_to_numpy_array(target_csv_raster_path)) utils._assert_vectors_equal( os.path.join(spatial_csv_dir, spatial_csv_dict[4]['path']), target_csv_vector_path)
def dilate_holes(base_raster_path, target_raster_path): """Dialate holes in raster.""" base_raster = gdal.OpenEx(base_raster_path, gdal.OF_RASTER) base_band = base_raster.GetRasterBand(1) nodata = base_band.GetNoDataValue() if nodata is None: shutil.copyfile(base_raster_path, target_raster_path) return base_info = pygeoprocessing.get_raster_info(base_raster_path) pygeoprocessing.new_raster_from_base( base_raster_path, target_raster_path, base_info['datatype'], base_info['nodata']) target_raster = gdal.OpenEx( target_raster_path, gdal.OF_RASTER | gdal.GA_Update) target_band = target_raster.GetRasterBand(1) n_cols = base_band.XSize n_rows = base_band.YSize hole_kernel = numpy.array([ [1, 1, 1], [1, 9, 1], [1, 1, 1]]) neighbor_avg_kernel = numpy.array([ [1, 1, 1], [1, 0, 1], [1, 1, 1]]) for offset_dict in pygeoprocessing.iterblocks( (base_raster_path, 1), offset_only=True): ul_offset_x = 1 ul_offset_y = 1 grid_array = numpy.empty( (offset_dict['win_ysize']+2, offset_dict['win_xsize']+2)) grid_array[:] = nodata LOGGER.debug(f'**** {offset_dict}') if offset_dict['win_xsize']+offset_dict['xoff'] < n_cols: offset_dict['win_xsize'] += 1 LOGGER.debug("if offset_dict['win_xsize']+offset_dict['xoff'] < n_cols:") if offset_dict['win_ysize']+offset_dict['yoff'] < n_rows: offset_dict['win_ysize'] += 1 LOGGER.debug("if offset_dict['win_ysize']+offset_dict['yoff'] < n_rows:") if offset_dict['xoff'] > 0: LOGGER.debug("if offset_dict['xoff'] > 0:") offset_dict['xoff'] -= 1 offset_dict['win_xsize'] += 1 ul_offset_x -= 1 if offset_dict['yoff'] > 0: LOGGER.debug("if offset_dict['yoff'] > 0:") offset_dict['yoff'] -= 1 offset_dict['win_ysize'] += 1 ul_offset_y -= 1 LOGGER.debug(offset_dict) LOGGER.debug(ul_offset_x) LOGGER.debug(ul_offset_y) base_array = base_band.ReadAsArray(**offset_dict) LOGGER.debug(base_array.shape) LOGGER.debug(grid_array.shape) LOGGER.debug(f"{ul_offset_y}:{offset_dict['win_ysize']}") LOGGER.debug(f"{ul_offset_x}:{offset_dict['win_xsize']}") grid_array[ ul_offset_y:ul_offset_y+offset_dict['win_ysize'], ul_offset_x:ul_offset_x+offset_dict['win_xsize']] = base_array nodata_holes = numpy.isclose(grid_array, nodata) single_holes = scipy.signal.convolve2d( nodata_holes, hole_kernel, mode='same', boundary='fill', fillvalue=1) == 9 neighbor_avg = scipy.signal.convolve2d( grid_array, neighbor_avg_kernel, mode='same') grid_array[single_holes] = neighbor_avg[single_holes] target_band.WriteArray( grid_array[ ul_offset_y:ul_offset_y+offset_dict['win_ysize'], ul_offset_x:ul_offset_x+offset_dict['win_xsize']], xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) target_band = None target_raster = None
def _execute(args): """Execute the seasonal water yield model. Parameters: See the parameters for `natcap.invest.seasonal_water_yield.seasonal_wateryield.execute`. Returns: None """ LOGGER.info('prepare and test inputs for common errors') # fail early on a missing required rain events table if (not args['user_defined_local_recharge'] and not args['user_defined_climate_zones']): rain_events_lookup = ( utils.build_lookup_from_csv( args['rain_events_table_path'], 'month')) biophysical_table = utils.build_lookup_from_csv( args['biophysical_table_path'], 'lucode') if args['monthly_alpha']: # parse out the alpha lookup table of the form (month_id: alpha_val) alpha_month = dict( (key, val['alpha']) for key, val in utils.build_lookup_from_csv( args['monthly_alpha_path'], 'month').iteritems()) else: # make all 12 entries equal to args['alpha_m'] alpha_m = float(fractions.Fraction(args['alpha_m'])) alpha_month = dict( (month_index+1, alpha_m) for month_index in xrange(12)) beta_i = float(fractions.Fraction(args['beta_i'])) gamma = float(fractions.Fraction(args['gamma'])) threshold_flow_accumulation = float(args['threshold_flow_accumulation']) pixel_size = pygeoprocessing.get_raster_info( args['dem_raster_path'])['pixel_size'] file_suffix = utils.make_suffix_string(args, 'results_suffix') intermediate_output_dir = os.path.join( args['workspace_dir'], 'intermediate_outputs') output_dir = args['workspace_dir'] utils.make_directories([intermediate_output_dir, output_dir]) LOGGER.info('Building file registry') file_registry = utils.build_file_registry( [(_OUTPUT_BASE_FILES, output_dir), (_INTERMEDIATE_BASE_FILES, intermediate_output_dir), (_TMP_BASE_FILES, output_dir)], file_suffix) LOGGER.info('Checking that the AOI is not the output aggregate vector') if (os.path.normpath(args['aoi_path']) == os.path.normpath(file_registry['aggregate_vector_path'])): raise ValueError( "The input AOI is the same as the output aggregate vector, " "please choose a different workspace or move the AOI file " "out of the current workspace %s" % file_registry['aggregate_vector_path']) LOGGER.info('Aligning and clipping dataset list') input_align_list = [args['lulc_raster_path'], args['dem_raster_path']] output_align_list = [ file_registry['lulc_aligned_path'], file_registry['dem_aligned_path']] if not args['user_defined_local_recharge']: precip_path_list = [] et0_path_list = [] et0_dir_list = [ os.path.join(args['et0_dir'], f) for f in os.listdir( args['et0_dir'])] precip_dir_list = [ os.path.join(args['precip_dir'], f) for f in os.listdir( args['precip_dir'])] for month_index in range(1, N_MONTHS + 1): month_file_match = re.compile(r'.*[^\d]%d\.[^.]+$' % month_index) for data_type, dir_list, path_list in [ ('et0', et0_dir_list, et0_path_list), ('Precip', precip_dir_list, precip_path_list)]: file_list = [ month_file_path for month_file_path in dir_list if month_file_match.match(month_file_path)] if len(file_list) == 0: raise ValueError( "No %s found for month %d" % (data_type, month_index)) if len(file_list) > 1: raise ValueError( "Ambiguous set of files found for month %d: %s" % (month_index, file_list)) path_list.append(file_list[0]) input_align_list = ( precip_path_list + [args['soil_group_path']] + et0_path_list + input_align_list) output_align_list = ( file_registry['precip_path_aligned_list'] + [file_registry['soil_group_aligned_path']] + file_registry['et0_path_aligned_list'] + output_align_list) align_index = len(input_align_list) - 1 # this aligns with the DEM if args['user_defined_local_recharge']: input_align_list.append(args['l_path']) output_align_list.append(file_registry['l_aligned_path']) elif args['user_defined_climate_zones']: input_align_list.append(args['climate_zone_raster_path']) output_align_list.append( file_registry['cz_aligned_raster_path']) interpolate_list = ['nearest'] * len(input_align_list) pygeoprocessing.align_and_resize_raster_stack( input_align_list, output_align_list, interpolate_list, pixel_size, 'intersection', base_vector_path_list=[args['aoi_path']], raster_align_index=align_index) LOGGER.info('flow direction') natcap.invest.pygeoprocessing_0_3_3.routing.flow_direction_d_inf( file_registry['dem_aligned_path'], file_registry['flow_dir_path']) LOGGER.info('flow weights') natcap.invest.pygeoprocessing_0_3_3.routing.routing_core.calculate_flow_weights( file_registry['flow_dir_path'], file_registry['outflow_weights_path'], file_registry['outflow_direction_path']) LOGGER.info('flow accumulation') natcap.invest.pygeoprocessing_0_3_3.routing.flow_accumulation( file_registry['flow_dir_path'], file_registry['dem_aligned_path'], file_registry['flow_accum_path']) LOGGER.info('stream thresholding') natcap.invest.pygeoprocessing_0_3_3.routing.stream_threshold( file_registry['flow_accum_path'], threshold_flow_accumulation, file_registry['stream_path']) LOGGER.info('quick flow') if args['user_defined_local_recharge']: file_registry['l_path'] = file_registry['l_aligned_path'] li_nodata = pygeoprocessing.get_raster_info( file_registry['l_path'])['nodata'][0] def l_avail_op(l_array): """Calculate equation [8] L_avail = min(gamma*L, L)""" result = numpy.empty(l_array.shape) result[:] = li_nodata valid_mask = (l_array != li_nodata) result[valid_mask] = numpy.min(numpy.stack( (gamma*l_array[valid_mask], l_array[valid_mask])), axis=0) return result pygeoprocessing.raster_calculator( [(file_registry['l_path'], 1)], l_avail_op, file_registry['l_avail_path'], gdal.GDT_Float32, li_nodata) else: # user didn't predefine local recharge so calculate it LOGGER.info('loading number of monthly events') for month_id in xrange(N_MONTHS): if args['user_defined_climate_zones']: cz_rain_events_lookup = ( utils.build_lookup_from_csv( args['climate_zone_table_path'], 'cz_id')) month_label = MONTH_ID_TO_LABEL[month_id] climate_zone_rain_events_month = dict([ (cz_id, cz_rain_events_lookup[cz_id][month_label]) for cz_id in cz_rain_events_lookup]) n_events_nodata = -1 pygeoprocessing.reclassify_raster( (file_registry['cz_aligned_raster_path'], 1), climate_zone_rain_events_month, file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, n_events_nodata, values_required=True) else: # rain_events_lookup defined near entry point of execute n_events = rain_events_lookup[month_id+1]['events'] pygeoprocessing.new_raster_from_base( file_registry['dem_aligned_path'], file_registry['n_events_path_list'][month_id], gdal.GDT_Float32, [TARGET_NODATA], fill_value_list=[n_events]) LOGGER.info('calculate curve number') _calculate_curve_number_raster( file_registry['lulc_aligned_path'], file_registry['soil_group_aligned_path'], biophysical_table, file_registry['cn_path']) LOGGER.info('calculate Si raster') _calculate_si_raster( file_registry['cn_path'], file_registry['stream_path'], file_registry['si_path']) for month_index in xrange(N_MONTHS): LOGGER.info('calculate quick flow for month %d', month_index+1) _calculate_monthly_quick_flow( file_registry['precip_path_aligned_list'][month_index], file_registry['lulc_aligned_path'], file_registry['cn_path'], file_registry['n_events_path_list'][month_index], file_registry['stream_path'], file_registry['qfm_path_list'][month_index], file_registry['si_path']) qf_nodata = -1 LOGGER.info('calculate QFi') # TODO: lose this loop def qfi_sum_op(*qf_values): """Sum the monthly qfis.""" qf_sum = numpy.zeros(qf_values[0].shape) valid_mask = qf_values[0] != qf_nodata valid_qf_sum = qf_sum[valid_mask] for index in range(len(qf_values)): valid_qf_sum += qf_values[index][valid_mask] qf_sum[:] = qf_nodata qf_sum[valid_mask] = valid_qf_sum return qf_sum pygeoprocessing.raster_calculator( [(path, 1) for path in file_registry['qfm_path_list']], qfi_sum_op, file_registry['qf_path'], gdal.GDT_Float32, qf_nodata) LOGGER.info('calculate local recharge') kc_lookup = {} LOGGER.info('classify kc') for month_index in xrange(12): kc_lookup = dict([ (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)]) for lucode in biophysical_table]) kc_nodata = -1 # a reasonable nodata value pygeoprocessing.reclassify_raster( (file_registry['lulc_aligned_path'], 1), kc_lookup, file_registry['kc_path_list'][month_index], gdal.GDT_Float32, kc_nodata) # call through to a cython function that does the necessary routing # between AET and L.sum.avail in equation [7], [4], and [3] seasonal_water_yield_core.calculate_local_recharge( file_registry['precip_path_aligned_list'], file_registry['et0_path_aligned_list'], file_registry['qfm_path_list'], file_registry['flow_dir_path'], file_registry['outflow_weights_path'], file_registry['outflow_direction_path'], file_registry['dem_aligned_path'], file_registry['lulc_aligned_path'], alpha_month, beta_i, gamma, file_registry['stream_path'], file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_avail_path'], file_registry['aet_path'], file_registry['kc_path_list']) #calculate Qb as the sum of local_recharge_avail over the AOI, Eq [9] qb_sum, qb_valid_count = _sum_valid(file_registry['l_path']) qb_result = 0.0 if qb_valid_count > 0: qb_result = qb_sum / qb_valid_count li_nodata = pygeoprocessing.get_raster_info( file_registry['l_path'])['nodata'][0] def vri_op(li_array): """Calculate vri index [Eq 10].""" result = numpy.empty_like(li_array) result[:] = li_nodata if qb_sum > 0: valid_mask = li_array != li_nodata result[valid_mask] = li_array[valid_mask] / qb_sum return result pygeoprocessing.raster_calculator( [(file_registry['l_path'], 1)], vri_op, file_registry['vri_path'], gdal.GDT_Float32, li_nodata) _aggregate_recharge( args['aoi_path'], file_registry['l_path'], file_registry['vri_path'], file_registry['aggregate_vector_path']) LOGGER.info('calculate L_sum') # Eq. [12] pygeoprocessing.new_raster_from_base( file_registry['dem_aligned_path'], file_registry['zero_absorption_source_path'], gdal.GDT_Float32, [TARGET_NODATA], fill_value_list=[0.0]) natcap.invest.pygeoprocessing_0_3_3.routing.route_flux( file_registry['flow_dir_path'], file_registry['dem_aligned_path'], file_registry['l_path'], file_registry['zero_absorption_source_path'], file_registry['loss_path'], file_registry['l_sum_pre_clamp'], 'flux_only', stream_uri=file_registry['stream_path']) # The result of route_flux can be slightly negative due to roundoff error # (on the order of 1e-4. It is acceptable to clamp those values to 0.0 l_sum_pre_clamp_nodata = pygeoprocessing.get_raster_info( file_registry['l_sum_pre_clamp'])['nodata'][0] def clamp_l_sum(l_sum_pre_clamp): """Clamp any negative values to 0.0.""" result = l_sum_pre_clamp.copy() result[ (l_sum_pre_clamp != l_sum_pre_clamp_nodata) & (l_sum_pre_clamp < 0.0)] = 0.0 return result pygeoprocessing.raster_calculator( [(file_registry['l_sum_pre_clamp'], 1)], clamp_l_sum, file_registry['l_sum_path'], gdal.GDT_Float32, l_sum_pre_clamp_nodata) LOGGER.info('calculate B_sum') seasonal_water_yield_core.route_baseflow_sum( file_registry['dem_aligned_path'], file_registry['l_path'], file_registry['l_avail_path'], file_registry['l_sum_path'], file_registry['outflow_direction_path'], file_registry['outflow_weights_path'], file_registry['stream_path'], file_registry['b_sum_path']) LOGGER.info('calculate B') b_sum_nodata = li_nodata def op_b(b_sum, l_avail, l_sum): """Calculate B=max(B_sum*Lavail/L_sum, 0).""" valid_mask = ( (b_sum != b_sum_nodata) & (l_avail != li_nodata) & (l_sum > 0) & (l_sum != l_sum_pre_clamp_nodata)) result = numpy.empty(b_sum.shape) result[:] = b_sum_nodata result[valid_mask] = ( b_sum[valid_mask] * l_avail[valid_mask] / l_sum[valid_mask]) # if l_sum is zero, it's okay to make B zero says Perrine in an email result[l_sum == 0] = 0.0 result[(result < 0) & valid_mask] = 0 return result pygeoprocessing.raster_calculator( [(file_registry['b_sum_path'], 1), (file_registry['l_path'], 1), (file_registry['l_sum_path'], 1)], op_b, file_registry['b_path'], gdal.GDT_Float32, b_sum_nodata) LOGGER.info('deleting temporary files') for file_id in _TMP_BASE_FILES: try: if isinstance(file_registry[file_id], basestring): os.remove(file_registry[file_id]) elif isinstance(file_registry[file_id], list): for index in xrange(len(file_registry[file_id])): os.remove(file_registry[file_id][index]) except OSError: # Let it go. pass LOGGER.info(' (\\w/) SWY Complete!') LOGGER.info(' (.. \\ ') LOGGER.info(' _/ ) \\______') LOGGER.info('(oo /\'\\ )`,') LOGGER.info(' `--\' (v __( / ||') LOGGER.info(' ||| ||| ||') LOGGER.info(' //_| //_|')