def test_pollination_constant_abundance(self): """Pollination: regression testing when abundance is all 1.""" from natcap.invest import pollination args = { 'results_suffix': '', 'workspace_dir': self.workspace_dir, 'landcover_raster_path': os.path.join(REGRESSION_DATA, 'input', 'clipped_landcover.tif'), 'guild_table_path': os.path.join(REGRESSION_DATA, 'input', 'guild_table_rel_all_ones.csv'), 'landcover_biophysical_table_path': os.path.join(REGRESSION_DATA, 'input', 'landcover_biophysical_table.csv') } pollination.execute(args) result_raster_path = os.path.join( self.workspace_dir, 'pollinator_abundance_apis_spring.tif') result_sum = numpy.float32(0.0) for _, data_block in pygeoprocessing.iterblocks( (result_raster_path, 1)): result_sum += numpy.sum(data_block) # the number below is just what the sum rounded to two decimal places # when I manually inspected a run that appeared to be correct. self.assertAlmostEqual(result_sum, 68.44777, places=2)
def burn_dem( dem_raster_path, streams_raster_path, target_burned_dem_path, burn_depth=10): """Burn streams into dem.""" dem_raster_info = pygeoprocessing.get_raster_info(dem_raster_path) dem_nodata = dem_raster_info['nodata'][0] pygeoprocessing.new_raster_from_base( dem_raster_path, target_burned_dem_path, dem_raster_info['datatype'], [dem_nodata]) burned_dem_raster = gdal.OpenEx( target_burned_dem_path, gdal.OF_RASTER | gdal.OF_UPDATE) burned_dem_band = burned_dem_raster.GetRasterBand(1) stream_raster = gdal.OpenEx(streams_raster_path, gdal.OF_RASTER) stream_band = stream_raster.GetRasterBand(1) for offset_dict, dem_block in pygeoprocessing.iterblocks( (dem_raster_path, 1)): stream_block = stream_band.ReadAsArray(**offset_dict) stream_mask = ( (stream_block == 1) & ~numpy.isclose(dem_block, dem_nodata)) filled_block = numpy.copy(dem_block) filled_block[stream_mask] = filled_block[stream_mask]-burn_depth burned_dem_band.WriteArray( filled_block, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) stream_band = None stream_raster = None burned_dem_band = None burned_dem_raster = None
def _validate_inputs(lulc_snapshot_list, lulc_lookup_dict): """Validate inputs. Args: lulc_snapshot_list (list): list of snapshot raster filepaths lulc_lookup_dict (dict): lookup table information """ LOGGER.info('Validating inputs...') lulc_snapshot_list = lulc_snapshot_list lulc_lookup_dict = lulc_lookup_dict nodata_values = set([pygeoprocessing.get_raster_info(filepath)['nodata'][0] for filepath in lulc_snapshot_list]) if len(nodata_values) > 1: raise ValueError('Provided rasters have different nodata values') # assert all raster values in lookup table raster_val_set = set(reduce( lambda accum_value, x: numpy.unique( numpy.append(accum_value, x.next()[1].flat)), itertools.chain(pygeoprocessing.iterblocks((snapshot, 1)) for snapshot in lulc_snapshot_list), numpy.array([]))) code_set = set(lulc_lookup_dict.iterkeys()) code_set.add( pygeoprocessing.get_raster_info(lulc_snapshot_list[0])['nodata'][0]) if raster_val_set.difference(code_set): msg = "These raster values are not in the lookup table: %s" % \ raster_val_set.difference(code_set) raise ValueError(msg)
def _calculate_vri(l_path, target_vri_path): """Calculate VRI as li_array / qb_sum. Parameters: l_path (str): path to L raster. target_vri_path (str): path to output Vri raster. Returns: None. """ qb_sum = 0.0 qb_valid_count = 0 l_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0] for _, block in pygeoprocessing.iterblocks((l_path, 1)): valid_mask = block != l_nodata qb_sum += numpy.sum(block[valid_mask]) qb_valid_count += numpy.count_nonzero(valid_mask) li_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0] def vri_op(li_array): """Calculate vri index [Eq 10].""" result = numpy.empty_like(li_array) result[:] = li_nodata if qb_sum > 0: valid_mask = li_array != li_nodata result[valid_mask] = li_array[valid_mask] / qb_sum return result pygeoprocessing.raster_calculator([(l_path, 1)], vri_op, target_vri_path, gdal.GDT_Float32, li_nodata)
def get_unique_values(raster_path): """Return a list of non-nodata unique values from `raster_path`.""" nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] unique_set = set() for offset_data, array in pygeoprocessing.iterblocks((raster_path, 1)): unique_set |= set(numpy.unique(array[~numpy.isclose(array, nodata)])) return unique_set
def sum_of_masked_op(mask_path, value_raster_path, churn_dir): temp_dir = tempfile.mkdtemp(dir=churn_dir) mask_align_path = os.path.join(temp_dir, 'align_mask.tif') value_align_path = os.path.join(temp_dir, 'value_align.tif') target_pixel_size = pygeoprocessing.get_raster_info( value_raster_path)['pixel_size'] pygeoprocessing.align_and_resize_raster_stack( [mask_path, value_raster_path], [mask_align_path, value_align_path], ['near'] * 2, target_pixel_size, 'intersection') mask_raster = gdal.OpenEx(mask_align_path, gdal.OF_RASTER) value_raster = gdal.OpenEx(value_align_path, gdal.OF_RASTER) mask_band = mask_raster.GetRasterBand(1) value_band = value_raster.GetRasterBand(1) sum_val = 0.0 for offset_dict in pygeoprocessing.iterblocks((mask_align_path, 1), offset_only=True): mask_array = mask_band.ReadAsArray(**offset_dict) value_array = value_band.ReadAsArray(**offset_dict) sum_val += numpy.sum(value_array[mask_array == 1]) mask_band = None value_band = None mask_raster = None value_raster = None shutil.rmtree(temp_dir) return sum_val
def _get_land_cover_transitions(raster_t1_uri, raster_t2_uri): """Get land cover transition. Args: raster_t1_uri (str): filepath to first raster raster_t2_uri (str): filepath to second raster Returns: transition_set (set): a set of all types of transitions """ transition_nodata = pygeoprocessing.get_raster_info( raster_t1_uri)['nodata'][0] transition_set = set() for d, a1 in pygeoprocessing.iterblocks((raster_t1_uri, 1)): a2 = read_from_raster(raster_t2_uri, d) transition_list = zip(a1.flatten(), a2.flatten()) transition_set = transition_set.union(set(transition_list)) # Remove transitions to or from cells with NODATA values # There may be times when the user's nodata may not match NODATA_INT expected_nodata_values = set([NODATA_INT, transition_nodata]) s = copy.copy(transition_set) for i in s: for nodata_value in expected_nodata_values: if nodata_value in i: transition_set.remove(i) return transition_set
def calculate_mask_area(base_mask_raster_path): """Calculate area of mask==1.""" base_raster_info = pygeoprocessing.get_raster_info( base_mask_raster_path) base_srs = osr.SpatialReference() base_srs.ImportFromWkt(base_raster_info['projection_wkt']) if base_srs.IsProjected(): # convert m^2 of pixel size to Ha pixel_conversion = numpy.array([[ abs(base_raster_info['pixel_size'][0] * base_raster_info['pixel_size'][1])]]) / 10000.0 else: # create 1D array of pixel size vs. lat n_rows = base_raster_info['raster_size'][1] pixel_height = abs(base_raster_info['geotransform'][5]) # the / 2 is to get in the center of the pixel miny = base_raster_info['bounding_box'][1] + pixel_height/2 maxy = base_raster_info['bounding_box'][3] - pixel_height/2 lat_vals = numpy.linspace(maxy, miny, n_rows) pixel_conversion = 1.0 / 10000.0 * numpy.array([ [area_of_pixel(pixel_height, lat_val)] for lat_val in lat_vals]) nodata = base_raster_info['nodata'][0] area_raster_path = 'tmp_area_mask.tif' pygeoprocessing.raster_calculator( [(base_mask_raster_path, 1), pixel_conversion], mask_op, area_raster_path, gdal.GDT_Float32, nodata) area_sum = 0.0 for _, area_block in pygeoprocessing.iterblocks((area_raster_path, 1)): area_sum += numpy.sum(area_block) return area_sum
def sum_raster(raster_path): """Sum raster and return result.""" nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] sum_val = 0.0 for _, data_array in pygeoprocessing.iterblocks((raster_path, 1)): sum_val += numpy.sum(data_array[~numpy.isclose(data_array, nodata)]) return sum_val
def _accumulate_totals(raster_path): """Sum all non-nodata pixels in `raster_path` and return result.""" nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] raster_sum = 0.0 for _, block in pygeoprocessing.iterblocks((raster_path, 1)): raster_sum += numpy.sum(block[block != nodata]) return raster_sum
def summarize_pixel_distribution(raster_path): """Summarize the distribution of pixel values in a raster. Convert all valid pixel values to a vector of values and summarize the distribution of values. Calculate the median, standard deviation, and range of values. Args: raster_path (string): path to raster that should be summarized Returns: dictionary with keys: 'mean', 'median', 'stdev', and 'range' """ value_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] value_raster = gdal.OpenEx(raster_path) value_band = value_raster.GetRasterBand(1) try: value_list = [] last_blocksize = None for block_offset in pygeoprocessing.iterblocks((raster_path, 1), offset_only=True): blocksize = (block_offset['win_ysize'], block_offset['win_xsize']) if last_blocksize != blocksize: value_array = numpy.zeros( blocksize, dtype=pygeoprocessing._gdal_to_numpy_type(value_band)) last_blocksize = blocksize value_data = block_offset.copy() value_data['buf_obj'] = value_array value_band.ReadAsArray(**value_data) valid_mask = (~numpy.isclose(value_array, value_nodata)) value_list = (value_list + (value_array[valid_mask].flatten().tolist())) finally: value_band = None gdal.Dataset.__swig_destroy__(value_raster) if len(value_list) > 0: summary_dict = { 'mean': statistics.mean(value_list), 'median': statistics.median(value_list), 'stdev': statistics.stdev(value_list), 'min': min(value_list), 'max': max(value_list), } else: summary_dict = { 'mean': 'NA', 'median': 'NA', 'stdev': 'NA', 'min': 'NA', 'max': 'NA', } return summary_dict
def calc_raster_sum(raster_path): """Return the sum of the values in raster_path.""" nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] raster_sum = 0.0 for _, raster_array in pygeoprocessing.iterblocks((raster_path, 1)): raster_sum += numpy.sum( raster_array[~numpy.isclose(raster_array, nodata)]) return raster_sum
def _sum_raster(raster_path): """Return sum of non-nodata values in ``raster_path``.""" nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] running_sum = 0.0 for _, raster_block in pygeoprocessing.iterblocks((raster_path, 1)): running_sum += numpy.sum( raster_block[~numpy.isclose(raster_block, nodata)]) return running_sum
def _make_gaussian_kernel_uri(sigma, kernel_uri): """Creates a 2D gaussian kernel. Parameters: sigma (float): the sigma as in the classic Gaussian function kernel_uri (string): path to raster on disk to write the gaussian kernel. Returns: None. """ # going 3.0 times out from the sigma gives you over 99% of area under # the guassian curve max_distance = sigma * 3.0 kernel_size = int(numpy.round(max_distance * 2 + 1)) driver = gdal.GetDriverByName('GTiff') kernel_dataset = driver.Create(kernel_uri.encode('utf-8'), kernel_size, kernel_size, 1, gdal.GDT_Float32, options=['BIGTIFF=IF_SAFER']) # Make some kind of geotransform, it doesn't matter what but # will make GIS libraries behave better if it's all defined kernel_dataset.SetGeoTransform([444720, 30, 0, 3751320, 0, -30]) srs = osr.SpatialReference() srs.SetUTM(11, 1) srs.SetWellKnownGeogCS('NAD27') kernel_dataset.SetProjection(srs.ExportToWkt()) kernel_band = kernel_dataset.GetRasterBand(1) kernel_band.SetNoDataValue(-9999) col_index = numpy.array(xrange(kernel_size)) integration = 0.0 for row_index in xrange(kernel_size): distance_kernel_row = numpy.sqrt((row_index - max_distance)**2 + (col_index - max_distance)**2).reshape( 1, kernel_size) kernel = numpy.where( distance_kernel_row > max_distance, 0.0, (1 / (2.0 * numpy.pi * sigma**2) * numpy.exp(-distance_kernel_row**2 / (2 * sigma**2)))) integration += numpy.sum(kernel) kernel_band.WriteArray(kernel, xoff=0, yoff=row_index) kernel_dataset.FlushCache() for kernel_data, kernel_block in pygeoprocessing.iterblocks(kernel_uri): kernel_block /= integration kernel_band.WriteArray(kernel_block, xoff=kernel_data['xoff'], yoff=kernel_data['yoff'])
def model_predict(model, lulc_raster_path, forest_mask_raster_path, aligned_predictor_list, predicted_biomass_raster_path): """Predict biomass given predictors.""" pygeoprocessing.new_raster_from_base(lulc_raster_path, predicted_biomass_raster_path, gdal.GDT_Float32, [-1]) predicted_biomass_raster = gdal.OpenEx(predicted_biomass_raster_path, gdal.OF_RASTER | gdal.GA_Update) predicted_biomass_band = predicted_biomass_raster.GetRasterBand(1) predictor_band_nodata_list = [] raster_list = [] # simple lookup to map predictor band/nodata to a list for predictor_path, nodata in aligned_predictor_list: predictor_raster = gdal.OpenEx(predictor_path, gdal.OF_RASTER) raster_list.append(predictor_raster) predictor_band = predictor_raster.GetRasterBand(1) if nodata is None: nodata = predictor_band.GetNoDataValue() predictor_band_nodata_list.append((predictor_band, nodata)) forest_raster = gdal.OpenEx(forest_mask_raster_path, gdal.OF_RASTER) forest_band = forest_raster.GetRasterBand(1) for offset_dict in pygeoprocessing.iterblocks((lulc_raster_path, 1), offset_only=True): forest_array = forest_band.ReadAsArray(**offset_dict) valid_mask = (forest_array == 1) x_vector = None array_list = [] for band, nodata in predictor_band_nodata_list: array = band.ReadAsArray(**offset_dict) if nodata is None: nodata = band.GetNoDataValue() if nodata is not None: valid_mask &= array != nodata array_list.append(array) if not numpy.any(valid_mask): continue for array in array_list: if x_vector is None: x_vector = array[valid_mask].astype(numpy.float32) x_vector = numpy.reshape(x_vector, (-1, x_vector.size)) else: valid_array = array[valid_mask].astype(numpy.float32) valid_array = numpy.reshape(valid_array, (-1, valid_array.size)) x_vector = numpy.append(x_vector, valid_array, axis=0) y_vector = model(torch.from_numpy(x_vector.T)) result = numpy.full(forest_array.shape, -1) result[valid_mask] = (y_vector.detach().numpy()).flatten() predicted_biomass_band.WriteArray(result, xoff=offset_dict['xoff'], yoff=offset_dict['yoff']) predicted_biomass_band = None predicted_biomass_raster = None
def _check_missing_lucodes(clipped_lulc_path, demand_lucodes, bio_lucodes, valid_lulc_txt_path): """Check for raster values that don't appear in lookup tables. LULC raster values that are missing from the biophysical or demand tables is a very common error. Parameters: clipped_lulc_path (string): file path to lulc raster demand_lucodes (set): codes found in args['demand_table_path'] bio_lucodes (set): codes found in args['biophysical_table_path'] valid_lulc_txt_path (string): path to a file that gets created if there are no missing values. serves as target_path_list for taskgraph. Returns: None Raises: ValueError if any landcover codes are present in the raster but not in both of the tables. """ LOGGER.info( 'Checking that input tables have landcover codes for every value ' 'in the landcover map.') missing_bio_lucodes = set() missing_demand_lucodes = set() for _, lulc_block in pygeoprocessing.iterblocks((clipped_lulc_path, 1)): unique_codes = set(numpy.unique(lulc_block)) missing_bio_lucodes.update(unique_codes.difference(bio_lucodes)) if demand_lucodes is not None: missing_demand_lucodes.update( unique_codes.difference(demand_lucodes)) missing_message = '' if missing_bio_lucodes: missing_message += ( 'The following landcover codes were found in the landcover ' 'raster but they did not have corresponding entries in the ' 'biophysical table. Check your biophysical table to see if they ' 'are missing. %s.\n\n' % ', '.join([str(x) for x in sorted(missing_bio_lucodes)])) if missing_demand_lucodes: missing_message += ( 'The following landcover codes were found in the landcover ' 'raster but they did not have corresponding entries in the water ' 'demand table. Check your demand table to see if they are ' 'missing. "%s".\n\n' % ', '.join([str(x) for x in sorted(missing_demand_lucodes)])) if missing_message: raise ValueError(missing_message) with open(valid_lulc_txt_path, 'w') as txt_file: txt_file.write('')
def sum_raster(raster_path_band): """Sum the raster and return the result.""" nodata = pygeoprocessing.get_raster_info( raster_path_band[0])['nodata'][raster_path_band[1]-1] raster_sum = 0.0 for _, array in pygeoprocessing.iterblocks(raster_path_band): valid_mask = ~numpy.isclose(array, nodata) raster_sum += numpy.sum(array[valid_mask]) return raster_sum
def _make_gaussian_kernel_path(sigma, kernel_path): """Create a 2D Gaussian kernel. Args: sigma (float): the sigma as in the classic Gaussian function kernel_path (string): path to raster on disk to write the gaussian kernel. Returns: None. """ # going 3.0 times out from the sigma gives you over 99% of area under # the guassian curve max_distance = sigma * 3.0 kernel_size = int(numpy.round(max_distance * 2 + 1)) driver = gdal.GetDriverByName('GTiff') kernel_dataset = driver.Create( kernel_path.encode('utf-8'), kernel_size, kernel_size, 1, gdal.GDT_Float32, options=[ 'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256', 'BLOCKYSIZE=256']) # Make some kind of geotransform, it doesn't matter what but # will make GIS libraries behave better if it's all defined kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1]) srs = osr.SpatialReference() srs.SetWellKnownGeogCS('WGS84') kernel_dataset.SetProjection(srs.ExportToWkt()) kernel_band = kernel_dataset.GetRasterBand(1) kernel_band.SetNoDataValue(-9999) col_index = numpy.array(range(kernel_size)) running_sum = 0.0 for row_index in range(kernel_size): distance_kernel_row = numpy.sqrt( (row_index - max_distance) ** 2 + (col_index - max_distance) ** 2).reshape(1, kernel_size) kernel = numpy.where( distance_kernel_row > max_distance, 0.0, (1 / (2.0 * numpy.pi * sigma ** 2) * numpy.exp(-distance_kernel_row**2 / (2 * sigma ** 2)))) running_sum += numpy.sum(kernel) kernel_band.WriteArray(kernel, xoff=0, yoff=row_index) kernel_dataset.FlushCache() for kernel_data, kernel_block in pygeoprocessing.iterblocks( (kernel_path, 1)): # divide by sum to normalize kernel_block /= running_sum kernel_band.WriteArray( kernel_block, xoff=kernel_data['xoff'], yoff=kernel_data['yoff'])
def _accumulate_totals(raster_path): """Sum all non-nodata pixels in `raster_path` and return result.""" nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] raster_sum = 0.0 for _, block in pygeoprocessing.iterblocks((raster_path, 1)): # The float64 dtype in the sum is needed to reduce numerical error in # the sum. Users calculated the sum with ArcGIS zonal statistics, # noticed a difference and wrote to us about it on the forum. raster_sum += numpy.sum(block[~numpy.isclose(block, nodata)], dtype=numpy.float64) return raster_sum
def _sum_raster(raster_path): """Return the sum of the raster.""" running_sum = 0 nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] for _, data_array in pygeoprocessing.iterblocks((raster_path, 1)): if nodata is not None: valid_mask = ~numpy.isclose(data_array, nodata) else: valid_mask = slice(-1) running_sum += numpy.sum(data_array[valid_mask]) return running_sum
def calculate_cdf(raster_path, percentile_list): """Calculate the CDF given its percentile list.""" cdf_array = [0.0] * len(percentile_list) nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks((raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_list): cdf_array[index] += numpy.sum( data_block[nodata_mask & (data_block >= percentile_value)]) return cdf_array
def calculate_percentile( raster_path, percentiles_list, workspace_dir, result_pickle_path): """Calculate the percentile cutoffs of a given raster. Store in json. Parameters: raster_path (str): path to raster to calculate over. percentiles_list (list): sorted list of increasing percentile cutoffs to calculate. workspace_dir (str): path to a directory where this function can create a temporary directory to work in. result_pickle_path (path): path to .json file that will store "percentiles_list" -- original value of perentile_list "percentile_values_list" -- list of percentile threshold values in the same position in `percentile_list`. "percentile_sums_list" -- sum of all values up to the given percentile in the same position in `percentile_list`. Returns: None. """ churn_dir = tempfile.mkdtemp(dir=workspace_dir) LOGGER.debug('processing percentiles for %s', raster_path) heap_size = 2**28 ffi_buffer_size = 2**10 result_dict = { 'percentiles_list': percentiles_list, 'percentile_sum_list': [0.] * len(percentiles_list), 'percentile_values_list': pygeoprocessing.raster_band_percentile( (raster_path, 1), churn_dir, percentiles_list, heap_size, ffi_buffer_size) } LOGGER.debug('intermediate result_dict: %s', str(result_dict)) LOGGER.debug('processing percentile sums for %s', raster_path) nodata_value = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] for _, block_data in pygeoprocessing.iterblocks((raster_path, 1)): nodata_mask = numpy.isclose(block_data, nodata_value) # this loop makes the block below a lot simpler for index, percentile_value in enumerate( result_dict['percentile_values_list']): mask = (block_data > percentile_value) & (~nodata_mask) result_dict['percentile_sum_list'][index] += ( numpy.sum(block_data[mask])) LOGGER.debug( 'pickling percentile results of %s to %s', raster_path, result_pickle_path) with open(result_pickle_path, 'wb') as pickle_file: pickle_file.write(pickle.dumps(result_dict)) shutil.rmtree(churn_dir)
def sum_valid(raster_path): """Sum non-nodata pixesl in raster_path. Args: raster_path (str): path to arbitrary raster. Returns: sum of nodata pixels in raster at `raster_path`. """ accumulator_sum = 0.0 raster_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] for _, raster_block in pygeoprocessing.iterblocks((raster_path, 1)): accumulator_sum += numpy.sum( raster_block[~numpy.isclose(raster_block, raster_nodata)]) return accumulator_sum
def mosaic_base_into_target(base_raster_path, target_raster_path, target_token_complete_path): """Copy valid parts of base to target w/r/t correct georeference. Parameters: base_raster_path (str): a raster with the same cell size, coordinate system, and nodata as `target_raster_path`. target_raster_path (str): a raster that already exists on disk that after this call will contain the non-nodata parts of `base_raster_path` that geographically overlap with the target. target_token_complete_path (str): this file is created if the mosaic to target is successful. Useful for taskgraph task scheduling. Returns: None. """ target_raster = gdal.OpenEx(target_raster_path, gdal.OF_RASTER | gdal.GA_Update) target_band = target_raster.GetRasterBand(1) target_raster_info = pygeoprocessing.get_raster_info(target_raster_path) target_nodata = target_raster_info['nodata'][0] base_raster_info = pygeoprocessing.get_raster_info(base_raster_path) target_gt = target_raster_info['geotransform'] base_gt = base_raster_info['geotransform'] target_x_off = int((base_gt[0] - target_gt[0]) / target_gt[1]) target_y_off = int((base_gt[3] - target_gt[3]) / target_gt[5]) for offset_dict, band_data in pygeoprocessing.iterblocks( (base_raster_path, 1)): target_block = target_band.ReadAsArray( xoff=offset_dict['xoff'] + target_x_off, yoff=offset_dict['yoff'] + target_y_off, win_xsize=offset_dict['win_xsize'], win_ysize=offset_dict['win_ysize']) valid_mask = numpy.isclose(target_block, target_nodata) target_block[valid_mask] = band_data[valid_mask] target_band.WriteArray(target_block, xoff=offset_dict['xoff'] + target_x_off, yoff=offset_dict['yoff'] + target_y_off) target_band.FlushCache() target_band = None target_raster = None with open(target_token_complete_path, 'w') as token_file: token_file.write('complete!')
def raster_pixel_count(raster_path): """Count unique pixel values in raster. Parameters: raster_path (string): path to a raster Returns: dict of pixel values to frequency. """ nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] counts = collections.defaultdict(int) for _, raster_block in pygeoprocessing.iterblocks((raster_path, 1)): for value, count in zip( *numpy.unique(raster_block, return_counts=True)): if value == nodata: continue counts[value] += count return counts
def test_iterblocks_multiband(self): """PGP.geoprocessing: multiband iterblocks on identical blocks.""" pixel_matrix = numpy.ones((1000, 1000)) nodata = 0 reference = sampledata.SRS_COLOMBIA # double one value so we can ensure we're getting out different bands pygeoprocessing.testing.create_raster_on_disk( [pixel_matrix, 2 * pixel_matrix], reference.origin, reference.projection, nodata, reference.pixel_size(30), filename=self.raster_filename, dataset_opts=['TILED=YES']) for _, band_1_block, band_2_block in \ pygeoprocessing.iterblocks(self.raster_filename): numpy.testing.assert_almost_equal(band_1_block * 2, band_2_block)
def main(): parser = argparse.ArgumentParser(description='Run CE model') parser.add_argument('raster_a_path') parser.add_argument('raster_b_path') args = parser.parse_args() raster_a = gdal.OpenEx(args.raster_a_path, gdal.OF_RASTER) raster_b = gdal.OpenEx(args.raster_b_path, gdal.OF_RASTER) band_a = raster_a.GetRasterBand(1) band_b = raster_b.GetRasterBand(1) a_nodata = pygeoprocessing.get_raster_info(args.raster_a_path)['nodata'][0] b_nodata = pygeoprocessing.get_raster_info(args.raster_b_path)['nodata'][0] print(a_nodata, b_nodata) valid_a = numpy.array([]) valid_b = numpy.array([]) for offset_dict, array in pygeoprocessing.iterblocks( (args.raster_a_path, 1)): array_a = band_a.ReadAsArray(**offset_dict) array_b = band_b.ReadAsArray(**offset_dict) valid_mask = (array_a > 0) & (array_a < 500) & (array_b > 0) & (array_b < 500) valid_a = numpy.append(valid_a, array_a[valid_mask]) valid_b = numpy.append(valid_b, array_b[valid_mask]) n = 10000 arr = numpy.arange(valid_a.size) numpy.random.shuffle(arr) index = arr[:n] r2 = r2_score(valid_a[index], valid_b[index], multioutput='variance_weighted') print(f'r2: {r2}') print(numpy.sum(valid_b / valid_a) / valid_a.size) max_val = numpy.max(valid_a) print(f'max val: {numpy.max(valid_a)} {numpy.max(valid_b)}') fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(valid_b[index], valid_a[index], s=1, alpha=1) ax.plot(numpy.arange(max_val), numpy.arange(max_val), linewidth=0.5, c='b') ax.set_xlim([0, max_val]) ax.set_ylim([0, max_val]) plt.show()
def test_delineateit_willamette_detect_pour_points(self): """DelineateIt: regression testing full run with pour point detection.""" from natcap.invest.delineateit import delineateit args = { 'dem_path': os.path.join(REGRESSION_DATA, 'input', 'dem.tif'), 'outlet_vector_path': os.path.join(REGRESSION_DATA, 'input', 'outlets.shp'), 'workspace_dir': self.workspace_dir, 'detect_pour_points': True, 'results_suffix': 'w', 'n_workers': None, # Trigger error and default to -1 } delineateit.execute(args) vector = gdal.OpenEx( os.path.join(args['workspace_dir'], 'watersheds_w.gpkg'), gdal.OF_VECTOR) layer = vector.GetLayer('watersheds_w') # includes suffix self.assertEqual(layer.GetFeatureCount(), 102) # Assert that every valid pixel is covered by a watershed. n_pixels = 0 raster_info = pygeoprocessing.get_raster_info(args['dem_path']) pixel_x, pixel_y = raster_info['pixel_size'] pixel_area = abs(pixel_x * pixel_y) nodata = raster_info['nodata'][0] for _, block in pygeoprocessing.iterblocks((args['dem_path'], 1)): n_pixels += len(block[~numpy.isclose(block, nodata)]) valid_pixel_area = n_pixels * pixel_area total_area = 0 for feature in layer: geom = feature.GetGeometryRef() total_area += geom.Area() self.assertAlmostEqual(valid_pixel_area, total_area, 4)
def test_iterblocks(self): """PGP.geoprocessing: Sum a 1000**2 raster using iterblocks.""" pixel_matrix = numpy.ones((1000, 1000)) nodata = 0 reference = sampledata.SRS_COLOMBIA pygeoprocessing.testing.create_raster_on_disk( [pixel_matrix], reference.origin, reference.projection, nodata, reference.pixel_size(30), filename=self.raster_filename, dataset_opts=['TILED=YES']) raster_sum = 0 for _, memblock in pygeoprocessing.iterblocks(self.raster_filename): raster_sum += memblock.sum() self.assertEqual(raster_sum, 1000000)
def _sum_valid(raster_path): """Calculate the sum of the non-nodata pixels in the raster. Parameters: raster_path (string): path to raster on disk Returns: (sum, n_pixels) tuple where sum is the sum of the non-nodata pixels and n_pixels is the count of them """ raster_sum = 0 raster_count = 0 raster_nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] for _, block in pygeoprocessing.iterblocks( raster_path, band_index_list=[1]): valid_mask = block != raster_nodata raster_sum += numpy.sum(block[valid_mask]) raster_count += numpy.count_nonzero(valid_mask) return raster_sum, raster_count