def calculate_percentile(raster_path, percentiles_list, workspace_dir, result_pickle_path): """Calculate the percentile cutoffs of a given raster. Store in json. Parameters: raster_path (str): path to raster to calculate over. percentiles_list (list): sorted list of increasing percentile cutoffs to calculate. workspace_dir (str): path to a directory where this function can create a temporary directory to work in. result_pickle_path (path): path to .json file that will store a list of percentile threshold values in the same position in `percentile_list`. Returns: None. """ churn_dir = tempfile.mkdtemp(dir=workspace_dir) LOGGER.debug('processing percentiles for %s', raster_path) heap_size = 2**28 ffi_buffer_size = 2**10 percentile_values_list = pygeoprocessing.raster_band_percentile( (raster_path, 1), churn_dir, percentiles_list, heap_size, ffi_buffer_size) with open(result_pickle_path, 'wb') as pickle_file: pickle.dump(percentile_values_list, pickle_file) pickle_file.flush() shutil.rmtree(churn_dir)
def calculate_percentile( raster_path, percentiles_list, workspace_dir, result_pickle_path): """Calculate the percentile cutoffs of a given raster. Store in json. Parameters: raster_path (str): path to raster to calculate over. percentiles_list (list): sorted list of increasing percentile cutoffs to calculate. workspace_dir (str): path to a directory where this function can create a temporary directory to work in. result_pickle_path (path): path to .json file that will store "percentiles_list" -- original value of perentile_list "percentile_values_list" -- list of percentile threshold values in the same position in `percentile_list`. "percentile_sums_list" -- sum of all values up to the given percentile in the same position in `percentile_list`. Returns: None. """ churn_dir = tempfile.mkdtemp(dir=workspace_dir) LOGGER.debug('processing percentiles for %s', raster_path) heap_size = 2**28 ffi_buffer_size = 2**10 result_dict = { 'percentiles_list': percentiles_list, 'percentile_sum_list': [0.] * len(percentiles_list), 'percentile_values_list': pygeoprocessing.raster_band_percentile( (raster_path, 1), churn_dir, percentiles_list, heap_size, ffi_buffer_size) } LOGGER.debug('intermediate result_dict: %s', str(result_dict)) LOGGER.debug('processing percentile sums for %s', raster_path) nodata_value = pygeoprocessing.get_raster_info(raster_path)['nodata'][0] for _, block_data in pygeoprocessing.iterblocks((raster_path, 1)): nodata_mask = numpy.isclose(block_data, nodata_value) # this loop makes the block below a lot simpler for index, percentile_value in enumerate( result_dict['percentile_values_list']): mask = (block_data > percentile_value) & (~nodata_mask) result_dict['percentile_sum_list'][index] += ( numpy.sum(block_data[mask])) LOGGER.debug( 'pickling percentile results of %s to %s', raster_path, result_pickle_path) with open(result_pickle_path, 'wb') as pickle_file: pickle_file.write(pickle.dumps(result_dict)) shutil.rmtree(churn_dir)
def main(): """Write your expression here.""" path = r"C:\Users\Becky\Documents\carbon_edge_model\error_regression_baccini_biomass.tif" percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\percentile_working_dir" #makes a temporary directory because there's a shitton of rasters to find out the percentiles try: os.makedirs(percentile_working_dir) except OSError: pass #checks to see if the directory already exists, if it doesn't it makes it, if it does it doesn't do anything percentile_values_list = pygeoprocessing.raster_band_percentile( #(path, 1), percentile_working_dir, [0, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 99, 99.5, 99.9, 99.99, 99.999, 100] ) (path, 1), percentile_working_dir, list(range(0, 101, 1))) # (path,1) is indicating the first band in that "path" raster; the 2nd argument is the working dir; the third is the list of percentiles we want shutil.rmtree(percentile_working_dir) #this gets rid of that termporary directory print(percentile_values_list)
def calculate_percentiles( raster_path, percentile_list, target_percentile_pickle_path): """Calculate percentiles and save to a pickle file. Parameters: raster_path (str): path to raster. percentile_list (list): list of increasing order percentile thresholds between the ranges 0-100. target_percentile_pickle_path (str): the result of the percentile function will be saved in a list that is pickled in this file. Returns: None. """ working_dir = os.path.dirname(target_percentile_pickle_path) heapfile_dir = tempfile.mkdtemp(dir=working_dir) percentile_values = pygeoprocessing.raster_band_percentile( (raster_path, 1), heapfile_dir, percentile_list) with open(target_percentile_pickle_path, 'wb') as pickle_file: pickle.dump(percentile_values, pickle_file) shutil.rmtree(heapfile_dir)
def _calculate_visual_quality(source_raster_path, working_dir, target_path): """Calculate visual quality based on a raster. Visual quality is based on the nearest-rank method for breaking pixel values from the source raster into percentiles. Args: source_raster_path (string): The path to a raster from which percentiles should be calculated. Nodata values and pixel values of 0 are ignored. working_dir (string): A directory where working files can be saved. A new temporary directory will be created within. This new temporary directory will be removed at the end of the function. target_path (string): The path to where the output raster will be written. Returns: ``None`` """ # Using the nearest-rank method. LOGGER.info('Calculating visual quality') raster_info = pygeoprocessing.get_raster_info(source_raster_path) raster_nodata = raster_info['nodata'][0] temp_dir = tempfile.mkdtemp(dir=working_dir, prefix='visual_quality') # phase 1: calculate percentiles from the visible_structures raster LOGGER.info('Determining percentiles for %s', os.path.basename(source_raster_path)) def _mask_zeros(valuation_matrix): """Assign zeros to nodata, excluding them from percentile calc.""" valid_mask = ~numpy.isclose(valuation_matrix, 0.0) if raster_nodata is not None: valid_mask &= ~utils.array_equals_nodata( valuation_matrix, raster_nodata) visual_quality = numpy.empty(valuation_matrix.shape, dtype=numpy.float64) visual_quality[:] = _VALUATION_NODATA visual_quality[valid_mask] = valuation_matrix[valid_mask] return visual_quality masked_raster_path = os.path.join(temp_dir, 'zeros_masked.tif') pygeoprocessing.raster_calculator( [(source_raster_path, 1)], _mask_zeros, masked_raster_path, gdal.GDT_Float64, _VALUATION_NODATA, raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS) percentile_values = pygeoprocessing.raster_band_percentile( (masked_raster_path, 1), temp_dir, [0., 25., 50., 75.]) shutil.rmtree(temp_dir, ignore_errors=True) # Phase 2: map values to their bins to indicate visual quality. percentile_bins = numpy.array(percentile_values) LOGGER.info('Mapping percentile breaks %s', percentile_bins) def _map_percentiles(valuation_matrix): nonzero = (valuation_matrix != 0) nodata = utils.array_equals_nodata(valuation_matrix, raster_nodata) valid_indexes = (~nodata & nonzero) visual_quality = numpy.empty(valuation_matrix.shape, dtype=numpy.int8) visual_quality[:] = _BYTE_NODATA visual_quality[~nonzero & ~nodata] = 0 visual_quality[valid_indexes] = numpy.digitize( valuation_matrix[valid_indexes], percentile_bins) return visual_quality pygeoprocessing.raster_calculator( [(source_raster_path, 1)], _map_percentiles, target_path, gdal.GDT_Byte, _BYTE_NODATA, raster_driver_creation_tuple=BYTE_GTIFF_CREATION_OPTIONS)
def _evaluate_expression(processed_raster_list_file_path, symbol_to_path_band_map, args, workspace_dir): """Evaluate expression once rasters have been processed.""" LOGGER.debug(processed_raster_list_file_path) with open(processed_raster_list_file_path, 'rb') as (processed_raster_list_file): processed_raster_path_list = pickle.load(processed_raster_list_file) for symbol, raster_path in zip(symbol_to_path_band_map, processed_raster_path_list): path_band_id = symbol_to_path_band_map[symbol][1] symbol_to_path_band_map[symbol] = (raster_path, path_band_id) # this sets a common target sr, pixel size, and resample method . args.update({ 'churn_dir': workspace_dir, 'symbol_to_path_band_map': symbol_to_path_band_map, }) del args['symbol_to_path_map'] if 'build_overview' in args: del args['build_overview'] default_nan = None default_inf = None if 'default_nan' in args: default_nan = args['default_nan'] if 'default_inf' in args: default_inf = args['default_inf'] expression = args['expression'] # search for percentile functions match_obj = re.match(r'(.*)(percentile\(([^,]*), ([^)]*)\))(.*)', expression) if match_obj: base_raster_path_band = args['symbol_to_path_band_map'][ match_obj.group(3)] percentile_threshold = float(match_obj.group(4)) working_sort_directory = tempfile.mkdtemp(dir=workspace_dir) LOGGER.debug('doing percentile of %s to %s', base_raster_path_band, percentile_threshold) percentile_val = pygeoprocessing.raster_band_percentile( base_raster_path_band, working_sort_directory, [percentile_threshold])[0] expression = '%s%f%s' % (match_obj.group(1), percentile_val, match_obj.group(5)) LOGGER.debug('new expression: %s', expression) if not expression.startswith('mask(raster'): pygeoprocessing.symbolic.evaluate_raster_calculator_expression( expression, args['symbol_to_path_band_map'], args['target_nodata'], args['target_raster_path'], default_nan=default_nan, default_inf=default_inf) else: # parse out array arg_list = expression.split(',') # the first 1 to n-1 args must be integers mask_val_list = [int(val) for val in arg_list[1:-1]] # the last argument could be 'invert=?' if 'invert' in arg_list[-1]: invert = 'True' in arg_list[-1] else: # if it's not, it'll be another integer mask_val_list.append(int(arg_list[-1][:-1])) invert = False LOGGER.debug('mask raster %s by %s -> %s' % (symbol_to_path_band_map['raster'], str(mask_val_list), args['target_raster_path'])) mask_raster_by_array(symbol_to_path_band_map['raster'], numpy.array(mask_val_list), args['target_raster_path'], invert)
def main(): """Entry point.""" #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: # try: # os.makedirs(dir_path) # except OSError: # pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_wb_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL)) download_raster_task = task_graph.add_task( func=ecoshard.download_url, args=(RASTER_URL, raster_path), target_path_list=[raster_path], task_name='download raster') #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) #world_borders_layer = world_borders_vector.GetLayer() #wgs84_srs = osr.SpatialReference() #wgs84_srs.ImportFromEPSG(4326) # mask out everything that's not a country masked_raster_path = os.path.join( WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext( os.path.basename(raster_path))) # we need to define this because otherwise no nodata value is defined mask_nodata = -1 mask_task = task_graph.add_task( func=pygeoprocessing.mask_raster, args=( (raster_path, 1), world_borders_path, masked_raster_path), kwargs={ 'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS, 'target_mask_value': mask_nodata, }, target_path_list=[masked_raster_path], dependent_task_list=[download_wb_task, download_raster_task], task_name='mask raster') download_raster_task.join() raster_info = pygeoprocessing.get_raster_info(raster_path) country_name = "Global" country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') target_percentile_pickle_path = os.path.join( WORKSPACE_DIR, '%s.pkl' % ( os.path.basename(os.path.splitext(raster_path)[0]))) calculate_percentiles_task = task_graph.add_task( func=calculate_percentiles, args=( raster_path, PERCENTILE_LIST, target_percentile_pickle_path), target_path_list=[target_percentile_pickle_path], dependent_task_list=[mask_task], task_name='calculate percentiles') calculate_percentiles_task.join() with open(target_percentile_pickle_path, 'rb') as pickle_file: percentile_values = pickle.load(pickle_file) LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) raster_info = pygeoprocessing.get_raster_info(raster_path) nodata = raster_info['nodata'][0] valid_pixel_count = 0 total_pixel_count = 0 total_pixels = ( raster_info['raster_size'][0] * raster_info['raster_size'][1]) for _, data_block in pygeoprocessing.iterblocks( (raster_path, 1), largest_block=2**28): nodata_mask = ~numpy.isclose(data_block, nodata) nonzero_count = numpy.count_nonzero(nodata_mask) if nonzero_count == 0: continue valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum((data_block[ nodata_mask & (data_block >= percentile_value)]).astype( numpy.float32)) total_pixel_count += data_block.size LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels) LOGGER.debug('current cdf array: %s', cdf_array) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close() return for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('nev_name') country_name= country_name.replace('.','') LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] valid_pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) valid_pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
def main(): """Entry point.""" for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]: try: os.makedirs(dir_path) except OSError: pass task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0) world_borders_path = os.path.join( WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL)) download_task = task_graph.add_task( func=ecoshard.download_url, args=(WORLD_BORDERS_URL, world_borders_path), target_path_list=[world_borders_path], task_name='download world borders') download_task.join() world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR) world_borders_layer = world_borders_vector.GetLayer() wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) raster_info = pygeoprocessing.get_raster_info(RASTER_PATH) country_threshold_table_path = os.path.join( WORKSPACE_DIR, 'country_threshold.csv') country_threshold_table_file = open(country_threshold_table_path, 'w') country_threshold_table_file.write('country,percentile at 90% max,pixel count\n') for world_border_feature in world_borders_layer: country_name = world_border_feature.GetField('NAME') if country_name != 'Canada': continue LOGGER.debug(country_name) country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name) try: os.makedirs(country_workspace) except OSError: pass country_vector = os.path.join( country_workspace, '%s.gpkg' % country_name) country_vector_complete_token = os.path.join( country_workspace, '%s.COMPLETE' % country_name) extract_feature( world_borders_path, world_border_feature.GetFID(), wgs84_srs.ExportToWkt(), country_vector, country_vector_complete_token) country_raster_path = os.path.join(country_workspace, '%s_%s' % ( country_name, os.path.basename(RASTER_PATH))) country_vector_info = pygeoprocessing.get_vector_info(country_vector) pygeoprocessing.warp_raster( RASTER_PATH, raster_info['pixel_size'], country_raster_path, 'near', target_bb=country_vector_info['bounding_box'], vector_mask_options={'mask_vector_path': country_vector}, working_dir=country_workspace) percentile_values = pygeoprocessing.raster_band_percentile( (country_raster_path, 1), country_workspace, PERCENTILE_LIST) if len(percentile_values) != len(PERCENTILE_LIST): continue LOGGER.debug( "len percentile_values: %d len PERCENTILE_LIST: %d", len(percentile_values), len(PERCENTILE_LIST)) cdf_array = [0.0] * len(percentile_values) nodata = pygeoprocessing.get_raster_info( country_raster_path)['nodata'][0] pixel_count = 0 for _, data_block in pygeoprocessing.iterblocks( (country_raster_path, 1)): nodata_mask = ~numpy.isclose(data_block, nodata) pixel_count += numpy.count_nonzero(nodata_mask) for index, percentile_value in enumerate(percentile_values): cdf_array[index] += numpy.sum(data_block[ nodata_mask & (data_block >= percentile_value)]) # threshold is at 90% says Becky threshold_limit = 0.9 * cdf_array[2] LOGGER.debug(cdf_array) fig, ax = matplotlib.pyplot.subplots() ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array) f = scipy.interpolate.interp1d( cdf_array, list(reversed(PERCENTILE_LIST))) try: cdf_threshold = f(threshold_limit) except ValueError: LOGGER.exception( "error when passing threshold_limit: %s\ncdf_array: %s" % ( threshold_limit, cdf_array)) cdf_threshold = cdf_array[2] ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2) ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2) ax.grid(True, linestyle='-.') ax.set_title( '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, pixel_count)) ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH)) ax.set_ylabel('100-percentile') ax.tick_params(labelcolor='r', labelsize='medium', width=3) matplotlib.pyplot.autoscale(enable=True, tight=True) matplotlib.pyplot.savefig( os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name)) country_threshold_table_file.write( '%s, %f, %d\n' % (country_name, cdf_threshold, pixel_count)) country_threshold_table_file.flush() country_threshold_table_file.close()
def main(): # POTENTIAL # [0.0, 8.223874317755279e-18, 0.06352668319825519, 0.6784644064412253, 1.2982949910007597, 1.4329746715109062, 1.5756065342319365, 1.7761127919757702, 2.040984541853515, 2.344609197149186, 2.55102265792189, 2.8146687301480546, 5.87844488615983] path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_potential_ES_score_nspwng.tif" nodata_value = pygeoprocessing.get_raster_info(path)['nodata'][0] top2_sum = 0.0 top5_sum = 0.0 top10_sum = 0.0 top20_sum = 0.0 top30_sum = 0.0 top40_sum = 0.0 top50_sum = 0.0 top60_sum = 0.0 top70_sum = 0.0 top80_sum = 0.0 top90_sum = 0.0 full_sum = 0.0 for _, block_data in pygeoprocessing.iterblocks((path, 1)): nodata_mask = numpy.isclose(block_data, nodata_value) top2_mask = block_data > 2.8146687301480546 top2_sum += numpy.sum(block_data[top2_mask & ~nodata_mask]) top5_mask = block_data > 2.55102265792189 top5_sum += numpy.sum(block_data[top5_mask & ~nodata_mask]) top10_mask = block_data > 2.344609197149186 top10_sum += numpy.sum(block_data[top10_mask & ~nodata_mask]) top20_mask = block_data > 2.040984541853515 top20_sum += numpy.sum(block_data[top20_mask & ~nodata_mask]) top30_mask = block_data > 1.7761127919757702 top30_sum += numpy.sum(block_data[top30_mask & ~nodata_mask]) top40_mask = block_data > 1.5756065342319365 top40_sum += numpy.sum(block_data[top40_mask & ~nodata_mask]) top50_mask = block_data > 1.4329746715109062 top50_sum += numpy.sum(block_data[top50_mask & ~nodata_mask]) top60_mask = block_data > 1.2982949910007597 top60_sum += numpy.sum(block_data[top60_mask & ~nodata_mask]) top70_mask = block_data > 0.6784644064412253 top70_sum += numpy.sum(block_data[top70_mask & ~nodata_mask]) top80_mask = block_data > 0.06352668319825519 top80_sum += numpy.sum(block_data[top80_mask & ~nodata_mask]) top90_mask = block_data > 8.223874317755279e-18 top90_sum += numpy.sum(block_data[top90_mask & ~nodata_mask]) nonzero_mask = block_data != 0 full_sum += numpy.sum(block_data[nonzero_mask & ~nodata_mask]) print( 'Pixel sum stats from %s\n' '2 pct sum: %14.2f\n' '5 pct sum: %14.2f\n' '10 pct sum: %14.2f\n' '20 pct sum: %14.2f\n' '30 pct sum: %14.2f\n' '40 pct sum: %14.2f\n' '50 pct sum: %14.2f\n' '60 pct sum: %14.2f\n' '70 pct sum: %14.2f\n' '80 pct sum: %14.2f\n' '90 pct sum: %14.2f\n' '100 pct sum: %14.2f\n' % (path, top2_sum, top5_sum, top10_sum, top20_sum, top30_sum, top40_sum, top50_sum, top60_sum, top70_sum, top80_sum, top90_sum, full_sum)) #Pixel sum stats from C:\Users\Becky\Documents\raster_calculations\aggregate_potential_ES_score_nspwpg.tif # This layer only had 5 services so isn't fully comparable to realized (no surrogate for non-wood foraged products, which should have just been all natural habitat for potential) # # [8.223874317755279e-18, 0.06277088660611055, 0.31905198201749124, 0.43141886583982053, 0.5513050308982201, 0.7021776828519225, 0.8801414329582294, 1.0867488999270096, 1.3572950878165897, 1.5653558772021574, 2.14759821821794, 4.87844488615983] #1 pct sum: 34709125.64 #5 pct sum: 135799124.58 #10 pct sum: 237768933.24 #20 pct sum: 410918702.08 #30 pct sum: 549156818.78 #40 pct sum: 660915846.65 #50 pct sum: 749234063.39 #60 pct sum: 818545087.19 #70 pct sum: 871880286.70 #80 pct sum: 901438219.50 #90 pct sum: 903098652.38 #100 pct sum: 903098652.38 return # REALIZED # [0.0, 0.0, 2.6564152339677546e-05, 0.00449669105901578, 0.026592994668002544, 0.08908325455615322, 0.21252896986988581, 0.4257240946680402, 0.8519801985470177, 1.1987215681382737, 1.54221074228756] path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0.tif" nodata_value = pygeoprocessing.get_raster_info(path)['nodata'][0] top2_sum = 0.0 top5_sum = 0.0 top10_sum = 0.0 top20_sum = 0.0 top30_sum = 0.0 top40_sum = 0.0 top50_sum = 0.0 top60_sum = 0.0 top70_sum = 0.0 top80_sum = 0.0 top90_sum = 0.0 full_sum = 0.0 for _, block_data in pygeoprocessing.iterblocks((path, 1)): nodata_mask = numpy.isclose(block_data, nodata_value) top2_mask = block_data > 1.54221074228756 top2_sum += numpy.sum(block_data[top2_mask & ~nodata_mask]) top5_mask = block_data > 1.1987215681382737 top5_sum += numpy.sum(block_data[top5_mask & ~nodata_mask]) top10_mask = block_data > 0.8519801985470177 top10_sum += numpy.sum(block_data[top10_mask & ~nodata_mask]) top20_mask = block_data > 0.4257240946680402 top20_sum += numpy.sum(block_data[top20_mask & ~nodata_mask]) top30_mask = block_data > 0.21252896986988581 top30_sum += numpy.sum(block_data[top30_mask & ~nodata_mask]) top40_mask = block_data > 0.08908325455615322 top40_sum += numpy.sum(block_data[top40_mask & ~nodata_mask]) top50_mask = block_data > 0.026592994668002544 top50_sum += numpy.sum(block_data[top50_mask & ~nodata_mask]) top60_mask = block_data > 0.00449669105901578 top60_sum += numpy.sum(block_data[top60_mask & ~nodata_mask]) top70_mask = block_data > 2.6564152339677546e-05 top70_sum += numpy.sum(block_data[top70_mask & ~nodata_mask]) nonzero_mask = block_data != 0 full_sum += numpy.sum(block_data[nonzero_mask & ~nodata_mask]) print('Pixel sum stats from %s\n' '2.5 pct sum: %14.2f\n' '5 pct sum: %14.2f\n' '10 pct sum: %14.2f\n' '20 pct sum: %14.2f\n' '30 pct sum: %14.2f\n' '40 pct sum: %14.2f\n' '50 pct sum: %14.2f\n' '60 pct sum: %14.2f\n' '70 pct sum: %14.2f\n' '100 pct sum: %14.2f\n' % (path, top2_sum, top5_sum, top10_sum, top20_sum, top30_sum, top40_sum, top50_sum, top60_sum, top70_sum, full_sum)) #2.5 pct sum: 77750003.43 #5 pct sum: 130085623.90 #10 pct sum: 209758688.42 #20 pct sum: 304675563.91 #30 pct sum: 352506707.61 #40 pct sum: 375005156.25 #50 pct sum: 383134918.72 #60 pct sum: 385359011.24 #70 pct sum: 385546722.25 #100 pct sum: 385546979.30 return # terminates at this point #path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0.tif" path = r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\masked_nathab_esa_md5_40577bae3ef60519b1043bb8582a07af.tif" # gets the nodata value from the first band ([0]) of `path` nodata_value = pygeoprocessing.get_raster_info(path)['nodata'][0] # loop over all memory blocks of the first band of path (indicated by # the (path, 1) tuple, and ignore the second argument from iterblocks that # shows what block it is (that's the `_`) nonzero_count = 0 total_pixels = 0 nodata_count = 0 running_sum = 0.0 for _, block_data in pygeoprocessing.iterblocks((path, 1)): # we'll use this nodata mask to mask only valid nonzero counts and # also to count the number of nodata in the raster nodata_mask = numpy.isclose(block_data, nodata_value) # make a mask where the raster block is != 0 AND is not equal to a # nodata value nonzero_mask = block_data != 0 nonzero_count += numpy.count_nonzero(nonzero_mask & ~nodata_mask) # only get the valid numbers for the sum running_sum += numpy.sum(block_data[nonzero_mask & ~nodata_mask]) # count # of nodata pixels nodata_count += numpy.count_nonzero(nodata_mask) # and count for the total size of the block total_pixels += block_data.size # this is fine: print('Pixel count stats from %s\n' 'total pixels: %11d\n' 'nonzero non-nodata pixel count: %11d\n' 'nodata count: %11d\n' 'sum: %14.2f\n' % (path, total_pixels, nonzero_count, nodata_count, running_sum)) return #print( # 'Pixel count stats from %s\n' # 'total pixels: %11d\n' # 'nonzero non-nodata pixel count: %11d\n' % ( # path, total_pixels, nonzero_count)) ## for aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0 #total pixels: 6531840000 #nonzero non-nodata pixel count: 1133004447 #nodata count: 5118894498 ## for masked_nathab_esa_md5_40577bae3ef60519b1043bb8582a07af.tif #total pixels: 8398080000 #nonzero non-nodata pixel count: 1257421938 #nodata count: 0 #sum: 0.00 #So 1/10 of 1257421938 is 125742194 <-- the number of pixels at this resolution making up 10% of the remaining natural habitat land area # For aggregate ES, that corresponds to 125742194/1133004447 is 0.1109812007648722. So if we want the top 11th percentile we need to take the 0.89 nathab_path = r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\masked_nathab_esa_md5_40577bae3ef60519b1043bb8582a07af.tif" nathab_nodata_value = pygeoprocessing.get_raster_info( nathab_path)['nodata'][0] nathab_nonzero_count = 0 for _, nathab_block_data in pygeoprocessing.iterblocks((path, 1)): nathab_nodata_mask = numpy.isclose(nathab_block_data, nathab_nodata_value) nathab_nonzero_mask = nathab_block_data != 0 nathab_nonzero_count += numpy.count_nonzero(nathab_nonzero_mask & ~nathab_nodata_mask) pct_path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0.tif" percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\percentile_working_dir" try: os.makedirs(percentile_working_dir) except OSError: pass percentile_values_list = pygeoprocessing.raster_band_percentile( (pct_path, 1), percentile_working_dir, [1, 12, 23, 34, 45, 56, 67, 78, 89, 94.5, 97.25]) shutil.rmtree(percentile_working_dir) print(percentile_values_list) # aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0 # [0.0, 0.0, 2.6564152339677546e-05, 0.00449669105901578, 0.026592994668002544, 0.08908325455615322, 0.21252896986988581, 0.4257240946680402, 0.8519801985470177, 1.1987215681382737, 1.54221074228756] pct_path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_potential_ES_score_nspwpg.tif" percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\percentile_working_dir" try: os.makedirs(percentile_working_dir) except OSError: pass percentile_values_list = pygeoprocessing.raster_band_percentile( (pct_path, 1), percentile_working_dir, [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 100]) shutil.rmtree(percentile_working_dir) print(percentile_values_list)