def force_geotiff_to_match_projection_ndv_and_datatype(input_path, match_path, output_path, output_datatype=None, output_ndv=None): """Rather than actually projecting, just change the metadata so it matches exactly. This only will be useful if there was a data error and something got a projection defined when the underlying data wasnt actually transofmred into that shape. NOTE that the output will keep the same geotransform as input, and only the projection, no data and datatype will change. """ if not output_datatype: output_datatype = hb.get_datatype_from_uri(match_path) if not output_ndv: output_ndv = hb.get_ndv_from_path(match_path) match_wkt = hb.get_dataset_projection_wkt_uri(match_path) input_geotransform = hb.get_geotransform_uri(input_path) # Load the array, but use numpy to convert it to the new datatype input_array = hb.as_array(input_path).astype(hb.gdal_number_to_numpy_type[output_datatype]) if not output_ndv: output_ndv = -9999 hb.save_array_as_geotiff(input_array, output_path, data_type=output_datatype, ndv=output_ndv, geotransform_override=input_geotransform, projection_override=match_wkt)
def force_global_angular_data_to_equal_area_earth_grid(input_path, output_path): output_datatype = hb.get_datatype_from_uri(input_path) output_ndv = hb.get_ndv_from_path(input_path) match_wkt = hb.get_dataset_projection_wkt_uri(input_path) match_wkt = hb.get_wkt_from_epsg_code(6933) input_geotransform = hb.get_geotransform_uri(input_path) output_geotransform = list(hb.common_geotransforms['wec_30s']) output_geotransform[1] = input_geotransform[1] * hb.size_of_one_arcdegree_at_equator_in_meters output_geotransform[5] = input_geotransform[5] * hb.size_of_one_arcdegree_at_equator_in_meters # Load the array, but use numpy to convert it to the new datatype input_array = hb.as_array(input_path).astype(hb.gdal_number_to_numpy_type[output_datatype]) if not output_ndv: output_ndv = -9999 hb.save_array_as_geotiff(input_array, output_path, data_type=output_datatype, ndv=output_ndv, geotransform_override=output_geotransform, projection_override=match_wkt)
def resample_raster_preserving_sum(input_raster, match_raster, intermediate_raster, output_raster): rasterArray = gdal_array.LoadFile(input_raster) # Replace ndv (-3.4028230607370965e+38) by 0 and sum population raster_info = pygeoprocessing.get_raster_info(input_raster) ndv = raster_info['nodata'][0] rasterArray[rasterArray == ndv] = 0 total_pop = rasterArray.sum() print('Sum of pixels: ' + str(total_pop / 1e9) + ' bio') # Resample population "bilinear" hb.spatial_utils.align_dataset_to_match(input_raster, match_raster, intermediate_raster, resample_method='bilinear') rasterArray = gdal_array.LoadFile(intermediate_raster) # Replace ndv by 0 and sum population raster_info = pygeoprocessing.get_raster_info(intermediate_raster) ndv = raster_info['nodata'][0] rasterArray[rasterArray == ndv] = 0 total_new_fake_pop = rasterArray.sum() # Write output Raster = intermediate raster * total_pop/total_new_fake_pop print('Writing raster at ' + output_raster) outputArray = rasterArray * (total_pop / total_new_fake_pop) hb.save_array_as_geotiff(outputArray, output_raster, geotiff_uri_to_match=match_raster)
def calc_percent_of_overall_forest_cover_within_distance_threshold(): global p convolution_edge_size = int( round(float(p.distance_threshold) / float(p.resampling_threshold), 0)) * 2 + 1 convolution_array = np.zeros( (convolution_edge_size, convolution_edge_size), dtype=np.int8) convolution_array = circle_from_ogrid(convolution_array, convolution_edge_size) convolution_path = os.path.join(p.cur_dir, 'convolution.tif') hb.save_array_as_geotiff(convolution_array, convolution_path, p.resampled_lulc_path, data_type=1, ndv=255, n_cols_override=convolution_edge_size, n_rows_override=convolution_edge_size) p.percent_of_overall_forest_cover_within_threshold_path = os.path.join( p.cur_dir, 'percent_of_overall_forest_cover_within_threshold.tif') hb.convolve_2d((p.forest_binary_path, 1), (convolution_path, 1), p.percent_of_overall_forest_cover_within_threshold_path, ignore_nodata=True, mask_nodata=True, normalize_kernel=False, target_datatype=gdal.GDT_Float64, target_nodata=255, gtiff_creation_options=hb.DEFAULT_GTIFF_CREATION_OPTIONS, n_threads=1, working_dir=None)
def create_conversion_eligibility(): global p p.conversion_eligibility_dir = p.cur_dir if p.run_this and p.run_this_zone: # Prevent illogical conversion eg new ag onto existing ag, or new ag onto urban conversion_eligibility_params = hb.file_to_python_object( p.conversion_eligibility_path, declare_type='DD', output_key_data_type=str, output_value_data_type=int) simplified_lulc_array = hb.as_array(p.lulc_simplified_path) for i in p.classes_with_change: conversion_eligibility_raster_path = os.path.join( p.conversion_eligibility_dir, str(i) + '_conversion_eligibility.tif') conversion_eligibility_array = np.zeros( simplified_lulc_array.shape).astype(np.float64) for j in p.classes_with_effect: conversion_eligibility_array = np.where( simplified_lulc_array == j, conversion_eligibility_params[str(j)][str(i)], conversion_eligibility_array) hb.save_array_as_geotiff(conversion_eligibility_array, conversion_eligibility_raster_path, p.match_int_path, compress=True)
def create_overall_suitability(): global p p.overall_suitability_dir = p.cur_dir hb.create_directories(p.overall_suitability_dir) p.overall_suitability_paths = [] if p.run_this and p.run_this_zone: # NOTE, here the methods assume ONLY crop will be changing insofar as the physical suitability is defined wrt crops; 0 is 1 becasue already got rid of 0 in unique values physical_suitability_array = hb.as_array( p.current_physical_suitability_path) for i in p.classes_with_change: suitability_path = hb.ruri( os.path.join(p.overall_suitability_dir, 'overall_suitability_' + str(i) + '.tif')) p.overall_suitability_paths.append(suitability_path) combined_adjacency_effect_path = os.path.join( p.convolution_inputs_dir, 'combined_adjacency_effect_' + str(i) + '.tif') adjacency_effect_array = hb.as_array( combined_adjacency_effect_path) adjacency_effect_array = seals_utils.normalize_array( adjacency_effect_array ) # Didn't put this in HB because didn't want to redo the 0.4.0 release. conversion_eligibility_raster_path = os.path.join( p.create_conversion_eligibility_dir, str(i) + '_conversion_eligibility.tif') conversion_eligibility_array = hb.as_array( conversion_eligibility_raster_path) try: physical_suitability_importance = float( p.physical_suitability_importance) except: physical_suitability_importance = 0.5 L.warning( 'Could not interpret physical suitability importance. Using default of 0.5' ) physical_suitability_array = seals_utils.normalize_array( physical_suitability_array) overall_suitability_array = ( adjacency_effect_array + (physical_suitability_importance * physical_suitability_array)) * conversion_eligibility_array overall_suitability_array = np.where( np.isnan(overall_suitability_array), 0, overall_suitability_array) overall_suitability_array = np.where(overall_suitability_array < 0, 0, overall_suitability_array) hb.save_array_as_geotiff(overall_suitability_array, suitability_path, p.match_float_path, compress=True)
def arrayframe_load_and_save(): input_array = np.arange(0, 18, 1).reshape((3, 6)) input_uri = hb.temp('.tif', remove_at_exit=False) geotransform = hb.calc_cylindrical_geotransform_from_array(input_array) # projection = hb.get_wkt_from_epsg_code(hb.common_epsg_codes_by_name['plate_carree']) projection = 'plate_carree' hb.save_array_as_geotiff(input_array, input_uri, geotransform_override=geotransform, projection_override=projection) hb.ArrayFrame(input_uri)
def test_arrayframe_load_and_save(self): input_array = np.arange(0, 18, 1).reshape((3, 6)) input_uri = hb.temp('.tif', remove_at_exit=True) geotransform = hb.calc_cylindrical_geotransform_from_array(input_array) # projection = hb.get_wkt_from_epsg_code(hb.common_epsg_codes_by_name['plate_carree']) projection = 'wgs84' ndv = 255 data_type = 1 hb.save_array_as_geotiff(input_array, input_uri, geotransform_override=geotransform, projection_override=projection, ndv=ndv, data_type=data_type) hb.ArrayFrame(input_uri)
def create_calories_per_ha(p): uris = hb.get_list_of_file_uris_recursively( 'earthstat/crop_production', filter_extensions='.tif', filter_strings='YieldPerHectare') nutritional_content_uri = 'crop_nutritional_contents.csv' nutritional_content_odict = hb.file_to_python_object( nutritional_content_uri, declare_type='DD') total_calories_per_ha_masked_uri = os.path.join( p.cur_dir, 'total_calories_per_ha_masked.tif') total_calories_per_ha_masked_array = np.zeros(hb.as_array( uris[0]).shape).astype(np.float64) for uri in uris: earthstat_name = hb.explode_uri(uri)['file_root'].split('_')[0] kcal_per_ton = ( float(nutritional_content_odict[earthstat_name]["Kcal/Kg"]) * 1000.0) yield_per_ha = hb.as_array(uri).astype(np.float64) output_array = yield_per_ha * kcal_per_ton mask_uri = uri.replace('YieldPerHectare', 'HarvestedAreaFraction') mask_array = hb.as_array(mask_uri) mask_array = np.where(mask_array < 0.01, 0, 1) output_array *= mask_array print(output_array.dtype, earthstat_name) output_uri = os.path.join( p.cur_dir, earthstat_name + '_calories_per_ha_masked.tif') hb.save_array_as_geotiff(output_array, output_uri, uri, data_type=7) total_calories_per_ha_masked_array += output_array hb.save_array_as_geotiff(total_calories_per_ha_masked_array, total_calories_per_ha_masked_uri, uris[0], data_type=7)
def convert_states_to_ag_extent(p): if p.tasks['convert_states_to_ag_extent']: def add_crop_layers_from_dir(input_dir): crop_layer_names = [ "c4per ^ area_fraction ^ C4 perennial crops.tif", "c4ann ^ area_fraction ^ C4 annual crops.tif", "c3per ^ area_fraction ^ C3 perennial crops.tif", "c3nfx ^ area_fraction ^ C3 nitrogen-fixing crops.tif", "c3ann ^ area_fraction ^ C3 annual crops.tif", ] uris_to_combine = [os.path.join(input_dir, i) for i in crop_layer_names] print('uris_to_combine', uris_to_combine) match_af = hb.ArrayFrame(uris_to_combine[0]) proportion_cultivated = np.zeros(match_af.shape) mask = np.where((match_af.data >= 0.0) & (match_af.data <= 1.0)) for uri in uris_to_combine: proportion_cultivated[mask] += hb.ArrayFrame(uri).data[mask] return proportion_cultivated match_path = os.path.join(p.task_dirs['extract_lulc'], p.scenario_names[0], str(p.years[0]), "c4per ^ area_fraction ^ C4 perennial crops.tif") for scenario_name in p.scenario_names: print('task_dirs', p.task_dirs['extract_lulc']) scenario_dir = os.path.join(p.task_dirs['extract_lulc'], scenario_name) for year in p.years: input_dir = os.path.join(p.task_dirs['extract_lulc'], scenario_name, str(year)) print(input_dir) array = add_crop_layers_from_dir(input_dir) output_dir = os.path.join(p.task_dirs['convert_states_to_ag_extent'], scenario_name, str(year)) hb.create_dirs(output_dir) output_path = os.path.join(output_dir, 'proportion_ag.tif') hb.save_array_as_geotiff(array, output_path, match_path) else: pass
def convert_simplified_to_original_classes(): global p lulc_class_types_odict = hb.file_to_python_object(p.lulc_class_types_path, declare_type='DD') p.simple_classes_to_projected_original_classes = OrderedDict() for original_class, csv_odict in lulc_class_types_odict.items(): if csv_odict['output_class_id'] != '': p.simple_classes_to_projected_original_classes[int( csv_odict['lulc_class_type'])] = int( csv_odict['output_class_id']) if p.run_this and p.run_this_zone: lulc_original_classes_array = hb.as_array( p.base_year_current_zone_lulc_path) for c, path in enumerate(p.change_array_paths): change_array = hb.as_array(path) change_array_ndv = hb.get_nodata_from_uri(path) lulc_projected_original_classes_array = np.where( (change_array > 0) & (change_array != change_array_ndv), p.simple_classes_to_projected_original_classes[ p.classes_projected_to_change[c]], lulc_original_classes_array) p.lulc_projected_original_classes_path = os.path.join( p.cur_dir, 'lulc_projected_original_classes.tif') hb.save_array_as_geotiff(lulc_projected_original_classes_array, p.lulc_projected_original_classes_path, p.match_int_path) p.layers_to_stitch.append(p.lulc_projected_original_classes_path) # ALSO NOTE that we only return this once, because separate batched tasks are appending to it return ( 'layers_to_stitch', 'append_to_list', p.layers_to_stitch ) # WARNING the only intended use of returns in a tasks is if its a return resource to be synced among parallel tasks.
def aggregate_crops_by_type(**kw): """CMIP6 and the land-use harmonization project have centered on 5 crop types: c3 annual, c3 perennial, c4 annual, c4 perennial, nitrogen fixer Aggregate the 15 crops to those four categories by modifying the baseline_regression_data.""" vars_names_to_aggregate = [ 'production_value_per_ha', 'calories_per_ha', 'proportion_cultivated', 'PotassiumApplication_Rate', 'PhosphorusApplication_Rate', 'NitrogenApplication_Rate', ] crop_membership = OrderedDict() crop_membership['c3_annual'] = [ 'aniseetc', 'artichoke', 'asparagus', 'bambara', 'barley', 'buckwheat', 'cabbage', 'canaryseed', 'carob', 'carrot', 'cassava', 'cauliflower', 'cerealnes', 'chestnut', 'cinnamon', 'cucumberetc', 'currant', 'date', 'eggplant', 'fonio', 'garlic', 'ginger', 'mixedgrain', 'hazelnut', 'hempseed', 'hop', 'kapokseed', 'linseed', 'mango', 'mate', 'mustard', 'nutmeg', 'okra', 'onion', 'greenonion', 'peppermint', 'potato', 'pumpkinetc', 'pyrethrum', 'ramie', 'rapeseed', 'rice', 'safflower', 'sisal', 'sorghumfor', 'sourcherry', 'spinach', 'sugarbeet', 'sunflower', 'taro', 'tobacco', 'tomato', 'triticale', 'tung', 'vanilla', 'vetch', 'walnut', 'watermelon', 'wheat', 'yam', 'yautia', ] crop_membership['c3_perennial'] = [ 'almond', 'apple', 'apricot', 'areca', 'avocado', 'banana', 'blueberry', 'brazil', 'cashewapple', 'cashew', 'cherry', 'chicory', 'chilleetc', 'citrusnes', 'clove', 'cocoa', 'coconut', 'coffee', 'cotton', 'cranberry', 'fig', 'flax', 'grapefruitetc', 'grape', 'jute', 'karite', 'kiwi', 'kolanut', 'lemonlime', 'lettuce', 'abaca', 'melonetc', 'melonseed', 'oats', 'oilpalm', 'oilseedfor', 'olive', 'orange', 'papaya', 'peachetc', 'pear', 'pepper', 'persimmon', 'pineapple', 'pistachio', 'plantain', 'plum', 'poppy', 'quince', 'quinoa', 'rasberry', 'rubber', 'rye', 'stonefruitnes', 'strawberry', 'stringbean', 'sweetpotato', 'tangetc', 'tea', ] crop_membership['c4_annual'] = [ 'maize', 'millet', 'sorghum', ] crop_membership['c4_perennial'] = [ 'greencorn', 'sugarcane', ] crop_membership['nitrogen_fixer'] = [ 'bean', 'greenbean', 'soybean', 'chickpea', 'clover', 'cowpea', 'groundnut', 'lupin', 'pea', 'greenpea', 'pigeonpea', 'lentil', 'legumenes', 'broadbean', 'castor', ] match_path = kw['5min_floats_match_path'] match_array = hb.as_array(match_path) # Iterate through crop_types if kw['runtime_conditionals']['aggregate_crops_by_type']: df = pd.DataFrame(index=range(1, 100), columns=crop_membership.keys()) for crop_type, crops in crop_membership.items(): L.info('Aggregating ' + str(crop_type) + ' ' + str(crops)) crop_type_calories_output_path = os.path.join( dirs['aggregate_crops_by_type'], crop_type + '_calories.tif') crop_type_calories_array = np.zeros(match_array.shape) current_crop_calories_array = None for crop in crops: crop_calories_path = os.path.join( hb.BASE_DATA_DIR, 'crops/crop_calories', crop + '_calories_per_ha_masked.tif') current_crop_calories_array = hb.as_array(crop_calories_path) current_crop_calories_array[np.isnan( current_crop_calories_array)] = 0.0 current_crop_calories_array[ current_crop_calories_array > 1e+14] = 0.0 current_crop_calories_array[ current_crop_calories_array < 0] = 0.0 current_crop_climate_bins_path = os.path.join( hb.BASE_DATA_DIR, r'crops\invest\extended_climate_bin_maps\extendedclimatebins' + crop + '.tif') current_crop_climate_bins = hb.as_array( current_crop_climate_bins_path) for i in range(1, 101): sum_ = np.sum( np.where(current_crop_climate_bins == i, current_crop_calories_array, 0)) # print(np.sum(current_crop_climate_bins)) crop_type_calories_array += current_crop_calories_array # print('crop_calories_path', crop_calories_path, np.sum(current_crop_calories_array), np.sum(crop_type_calories_array)) # # print(crop_type, np.sum(crop_type_calories_array)) hb.save_array_as_geotiff(crop_type_calories_array, crop_type_calories_output_path, match_path) return kw
def create_af_from_array(input_array, af_path, match_af, compress=False): if not os.path.exists(os.path.split(af_path)[0]): hb.create_directories(os.path.split(af_path)[0]) hb.save_array_as_geotiff(input_array, af_path, match_af.path, compress=compress) return hb.ArrayFrame(af_path)
def calc_standardized_ecological_uncertainty(): global p if not os.path.exists(p.output_dir): hb.create_directories(p.output_dir) p.standardized_ecological_uncertainty_analysis_res_path = os.path.join( p.cur_dir, 'standardized_ecological_uncertainty_analysis_res.tif') hb.raster_calculator_hb( [(p.percent_of_overall_forest_cover_within_threshold_path, 1)], standardized_ecological_uncertainty, p.standardized_ecological_uncertainty_analysis_res_path, 7, -9999.0, gtiff_creation_options=hb.DEFAULT_GTIFF_CREATION_OPTIONS) p.standardized_ecological_uncertainty_unmasked_path = os.path.join( p.cur_dir, 'standardized_ecological_uncertainty_unmasked.tif') hb.resample_to_match( p.standardized_ecological_uncertainty_analysis_res_path, p.clipped_lulc_path, p.standardized_ecological_uncertainty_unmasked_path) def mask_op(x, y): return np.where(x != 255, x * y, 255) p.standardized_ecological_uncertainty_unnormalized_path = os.path.join( p.cur_dir, 'standardized_ecological_uncertainty_unnormalized.tif') hb.raster_calculator_hb( [(p.standardized_ecological_uncertainty_unmasked_path, 1), (p.is_restorable_path, 1)], mask_op, p.standardized_ecological_uncertainty_unnormalized_path, 7, -9999.0, gtiff_creation_options=hb.DEFAULT_GTIFF_CREATION_OPTIONS) x = hb.as_array(p.standardized_ecological_uncertainty_unnormalized_path) min = np.min(x) max = np.max(x) desired_max = 100.0 scalar = desired_max / max o = np.where(x != -9999.0, x * scalar, -9999.0) x = None p.standardized_ecological_uncertainty_path = os.path.join( p.cur_dir, 'standardized_ecological_uncertainty.tif') hb.save_array_as_geotiff( o, p.standardized_ecological_uncertainty_path, p.standardized_ecological_uncertainty_unnormalized_path, data_type=7, ndv=-9999.0) r = hb.as_array(p.is_restorable_path) keys_where = np.where( r == 1 ) # Not the NDV cause we're calculating deciles of the actually restorable land size = len(keys_where[0]) output = np.ones(o.shape) * 255 stride = int(size / 10.0) sorted_keys_1dim = o[keys_where].argsort(axis=None) sorted_keys = (keys_where[0][sorted_keys_1dim], keys_where[1][sorted_keys_1dim]) for i in range(10): L.info('Calculating percentile ' + str((i + 1) * 10)) output[sorted_keys[0][i * stride:(i + 1) * stride], sorted_keys[1][i * stride:(i + 1) * stride]] = i + 1 output = output.reshape(o.shape) p.restoration_success_deciles_pre_final_mask_path = os.path.join( p.cur_dir, 'restoration_success_deciles_pre_final_mask.tif') hb.save_array_as_geotiff( output, p.restoration_success_deciles_pre_final_mask_path, p.standardized_ecological_uncertainty_unnormalized_path, data_type=1, ndv=255) ## NOTE FOR NEXT RELEASE: The following section was written to be memory safe and fast via raster_calculator_hb. However, ## This would make each tile in the calculation independent of others, which would incorrectly identify max value for as the LOCAL value not global. ## This resulted in tiling artifacts. And then again, the percentile calculaiton was messed up too. Thus, here, I reverted for time sake ## to non-memory safe numpy arrays. # def normalize_op(x): # min = np.min(x) # max = np.max(x) # desired_max = 100.0 # scalar = desired_max / max # return np.where(x != -9999.0, x * scalar, -9999.0) # # p.standardized_ecological_uncertainty_path = os.path.join(p.cur_dir, 'standardized_ecological_uncertainty.tif') # hb.raster_calculator_hb([(p.standardized_ecological_uncertainty_unnormalized_path, 1)], normalize_op, p.standardized_ecological_uncertainty_path, 7, -9999.0) # # def make_deciles(x, y): # # keys_where = np.where(y == 1) # Not the NDV cause we're calculating deciles of the actually restorable land # # keys_where = np.where(x != -9999.0) # size = len(keys_where[0]) # output = np.ones(x.shape) * -9999.0 # stride = int(size / 10.0) # # sorted_keys_1dim = x[keys_where].argsort(axis=None) # sorted_keys = (keys_where[0][sorted_keys_1dim], keys_where[1][sorted_keys_1dim]) # for i in range(10): # L.info('Calculating percentile ' + str((i + 1) * 10)) # # output[sorted_keys[0][i * stride: (i + 1) * stride], sorted_keys[1][i * stride: (i + 1) * stride]] = i + 1 # output = output.reshape(x.shape) # # return output # p.restoration_success_deciles_pre_final_mask_path = os.path.join(p.cur_dir, 'restoration_success_deciles_pre_final_mask.tif') # hb.raster_calculator_hb([(p.standardized_ecological_uncertainty_path, 1), # (p.is_restorable_path, 1), # ], make_deciles, p.restoration_success_deciles_pre_final_mask_path, 1, 255) p.restoration_success_deciles_path = os.path.join( p.output_dir, 'restoration_success_deciles.tif') hb.set_ndv_by_mask_path(p.restoration_success_deciles_pre_final_mask_path, p.valid_mask_input_res_path, p.restoration_success_deciles_path) def cast_int(x): return np.byte(x) p.standardized_ecological_uncertainty_ints_pre_final_mask_path = os.path.join( p.cur_dir, 'standardized_ecological_uncertainty_ints_pre_final_mask.tif') hb.raster_calculator_hb( [(p.standardized_ecological_uncertainty_path, 1)], cast_int, p.standardized_ecological_uncertainty_ints_pre_final_mask_path, 1, 255) p.standardized_ecological_uncertainty_ints_path = os.path.join( p.output_dir, 'standardized_ecological_uncertainty_percent.tif') hb.set_ndv_by_mask_path( p.standardized_ecological_uncertainty_ints_pre_final_mask_path, p.valid_mask_input_res_path, p.standardized_ecological_uncertainty_ints_path)
def caloric_production_change(**kw): if kw['runtime_conditionals']['caloric_production_change']: base_year = 2015 for scenario in kw['scenario_names']: for year in kw['years']: if year != base_year: for c, crop_type in enumerate(kw['crop_types_short']): base_dir = os.path.join(dirs['resample_lulc'], scenario, str(base_year)) base_year_path = hb.list_filtered_paths_recursively( base_dir, include_strings=crop_type, include_extensions='.tif')[0] base_year_array = hb.as_array(base_year_path) base_year_array[np.isnan(base_year_array)] = 0.0 base_year_array[base_year_array > 1e+14] = 0.0 base_year_array[base_year_array < 0] = 0.0 input_dir = os.path.join(dirs['resample_lulc'], scenario, str(year)) input_path = hb.list_filtered_paths_recursively( input_dir, include_strings=crop_type, include_extensions='.tif')[0] input_array = hb.as_array(input_path) input_array[np.isnan(input_array)] = 0.0 input_array[input_array > 1e+14] = 0.0 input_array[input_array < 0] = 0.0 calories_per_ha_array = hb.as_array( os.path.join(dirs['aggregate_crops_by_type'], kw['crop_types'][c] + '_calories.tif')) calories_per_ha_array[np.isnan( calories_per_ha_array)] = 0.0 calories_per_ha_array[ calories_per_ha_array > 1e+14] = 0.0 calories_per_ha_array[calories_per_ha_array < 0] = 0.0 ha_per_cell_array = hb.as_array( os.path.join(hb.BASE_DATA_DIR, 'misc', 'ha_per_cell_5m.tif')) extent_difference_array = base_year_array - input_array baseline_calorie_provision = calories_per_ha_array * ha_per_cell_array * base_year_array calorie_provision_per_cell = calories_per_ha_array * ha_per_cell_array * input_array caloric_change_per_cell = calories_per_ha_array * ha_per_cell_array * extent_difference_array # calorie_provision_percent_change = (calorie_provision_per_cell / baseline_calorie_provision) * 100.0 - 100.0 calorie_provision_percent_change = np.divide( calorie_provision_per_cell, baseline_calorie_provision, out=np.zeros_like(calorie_provision_per_cell), where=baseline_calorie_provision != 0) calorie_provision_percent_change = np.multiply( calorie_provision_percent_change, 100.0, out=np.zeros_like(calorie_provision_per_cell), where=baseline_calorie_provision != 0) calorie_provision_percent_change = np.subtract( calorie_provision_percent_change, 100.0, out=np.zeros_like(calorie_provision_per_cell), where=baseline_calorie_provision != 0) hb.create_dirs( os.path.join(dirs['caloric_production_change'], scenario, str(year))) extent_difference_path = os.path.join( dirs['caloric_production_change'], scenario, str(year), crop_type + '_extent_difference.tif') hb.save_array_as_geotiff( extent_difference_array, extent_difference_path, kw['5min_floats_match_path'], no_data_value_override=-9999.0) caloric_change_per_cell_path = os.path.join( dirs['caloric_production_change'], scenario, str(year), crop_type + '_caloric_change_per_cell.tif') hb.save_array_as_geotiff( caloric_change_per_cell, caloric_change_per_cell_path, kw['5min_floats_match_path'], no_data_value_override=-9999.0) caloric_production_per_cell_path = os.path.join( dirs['caloric_production_change'], scenario, str(year), crop_type + '_calories_per_cell.tif') hb.save_array_as_geotiff( calorie_provision_per_cell, caloric_production_per_cell_path, kw['5min_floats_match_path'], no_data_value_override=-9999.0) calorie_provision_percent_change_path = os.path.join( dirs['caloric_production_change'], scenario, str(year), crop_type + '_calories_percent_change.tif') hb.save_array_as_geotiff( calorie_provision_percent_change, calorie_provision_percent_change_path, kw['5min_floats_match_path'], no_data_value_override=-9999.0, data_type_override=6) produce_final = True if produce_final: overlay_shp_uri = os.path.join( hb.BASE_DATA_DIR, 'misc', 'countries') scenario_string = scenario.split( '-')[1][0:4].upper() + 'xRCP' + scenario.split( '-')[1][4] + '.' + scenario.split( '-')[1][5] + '_' + scenario.split( '-')[0] + '_global_' + str(year) kw['output_dir'] = kw['output_dir'].replace( '\\', '/') output_path = os.path.join( kw['output_dir'], scenario_string + '_' + crop_type + '_kcal_production_per_cell.tif') shutil.copy(caloric_production_per_cell_path, output_path) ge.show_raster_uri( output_path, output_uri=output_path.replace('.tif', '.png'), title=hb.explode_uri(output_path) ['file_root'].replace('_', ' ').title(), cbar_label= 'Kcal production per grid-cell given 2050 land-use', cbar_percentiles=[2, 50, 98], overlay_shp_uri=overlay_shp_uri, use_basemap=True, bounding_box='clip_poles' ) #cbar_percentiles=[1, 50, 99], output_path = os.path.join( kw['output_dir'], scenario_string + '_' + crop_type + '_percent_change.tif') shutil.copy(calorie_provision_percent_change_path, output_path) ge.show_raster_uri( output_path, output_uri=output_path.replace('.tif', '.png'), title=hb.explode_uri(output_path) ['file_root'].replace('_', ' ').title(), cbar_label= 'Percent change in kcal production from land-use change', vmin=-50, vmid=0, vmax=50, overlay_shp_uri=overlay_shp_uri, use_basemap=True, bounding_box='clip_poles' ) #cbar_percentiles=[1, 50, 99], return kw
def create_lulc(): global p L.info('Creating class-types lulc.') p.name_from_iterator_replacements = hb.file_root(p.area_of_interest_path) p.base_year_current_zone_lulc_path = os.path.join( p.cur_dir, 'base_year_' + p.name_from_iterator_replacements + '.tif') # Create match paths of both data types p.match_int_path = p.base_year_current_zone_lulc_path p.lulc_simplified_path = os.path.join(p.cur_dir, 'lulc_simplified.tif') # p.lulc_simplified_path = p.base_year_current_zone_lulc_path p.valid_mask_path = os.path.join(p.cur_dir, 'valid_mask_high_res.tif') p.proportion_valid_fine_per_coarse_cell_path = os.path.join( p.cur_dir, 'proportion_valid_fine_per_coarse_cell.tif') if p.run_this: hb.clip_while_aligning_to_coarser( p.base_year_lulc_path, p.base_year_current_zone_lulc_path, p.area_of_interest_path, p.current_change_map_paths[0], resample_method='nearest', output_data_type=1, nodata_target=255, all_touched=True, verbose=True, ensure_fits=True, gtiff_creation_options=hb.DEFAULT_GTIFF_CREATION_OPTIONS) # Set NDV masking based on AOI of current zone. hb.create_valid_mask_from_vector_path( p.area_of_interest_path, p.base_year_current_zone_lulc_path, p.valid_mask_path) p.valid_mask = hb.as_array(p.valid_mask_path) hb.set_ndv_by_mask_path(p.base_year_current_zone_lulc_path, p.valid_mask_path) p.proportion_valid_fine_per_coarse_cell = hazelbean.pyramids.calc_proportion_of_coarse_res_with_valid_fine_res( p.current_change_map_paths[0], p.valid_mask_path) hb.save_array_as_geotiff(p.proportion_valid_fine_per_coarse_cell, p.proportion_valid_fine_per_coarse_cell_path, p.current_change_map_paths[0]) lulc_ds = gdal.Open(p.base_year_current_zone_lulc_path) lulc_band = lulc_ds.GetRasterBand(1) lulc_array = lulc_band.ReadAsArray().astype(np.int) p.scaled_proportion_to_allocate_paths = [] for path in p.current_change_map_paths: unscaled = hb.as_array(path).astype(np.float64) scaled_proportion_to_allocate = p.proportion_valid_fine_per_coarse_cell * unscaled scaled_proportion_to_allocate_path = os.path.join( p.cur_dir, os.path.split(hb.suri(path, 'scaled'))[1]) hb.save_array_as_geotiff(scaled_proportion_to_allocate, scaled_proportion_to_allocate_path, path, ndv=-9999.0, data_type=7) p.scaled_proportion_to_allocate_paths.append( scaled_proportion_to_allocate_path) if os.path.exists(p.lulc_class_types_path): # load the simplified class correspondnce as a nested dictionary. lulc_class_types_odict = hb.file_to_python_object( p.lulc_class_types_path, declare_type='DD') # For cythonization reasons, I need to ensure this comes in as ints lulc_class_types_ints_dict = dict() p.lulc_unsimplified_classes_list = [] for row_name in lulc_class_types_odict.keys(): lulc_class_types_ints_dict[int(row_name)] = int( lulc_class_types_odict[row_name]['lulc_class_type']) p.lulc_unsimplified_classes_list.append(int(row_name)) p.max_unsimplified_lulc_classes = max( p.lulc_unsimplified_classes_list) p.new_unsimplified_lulc_addition_value = 10**( len(str(p.max_unsimplified_lulc_classes)) + 1 ) / 10 # DOCUMENTATION, new classes are defined here as adding 1 order # # 1 is agriculture, 2 is mixed ag/natural, 3 is natural, 4 is urban, 0 is no data lulc_simplified_array = hb.reclassify_int_array_by_dict_to_ints( lulc_array, lulc_class_types_ints_dict) no_data_value_override = hb.get_nodata_from_uri( p.base_year_current_zone_lulc_path) hb.save_array_as_geotiff(lulc_simplified_array, p.lulc_simplified_path, p.base_year_current_zone_lulc_path, data_type=1, set_inf_to_no_data_value=False, ndv=no_data_value_override, compress=True) else: L.warn( 'No lulc_class_types_path specified. Assuming you want to run every class uniquely.' ) # If we don't run this zone, we know we will need to use the unmodified lulc when stitching everything back together if p.run_this_zone is False: p.layers_to_stitch.append(p.base_year_current_zone_lulc_path) else: p.lulc_simplified_path = p.base_year_current_zone_lulc_path
def create_physical_suitability(): global p L.info('Creating physical suitability layer from base data.') # physical suitability calculations, though for speed it's included as a base datum. dem_unaligned_path = hb.temp( '.tif', folder=p.workspace_dir, remove_at_exit=True) #hb.temp('.tif', remove_at_exit=True) stats_to_calculate = ['TRI'] hb.clip_hydrosheds_dem_from_aoi(dem_unaligned_path, p.area_of_interest_path, p.match_float_path) hb.calculate_topographic_stats_from_dem( dem_unaligned_path, p.physical_suitability_dir, stats_to_calculate=stats_to_calculate, output_suffix='unaligned') dem_path = os.path.join(p.physical_suitability_dir, 'dem.tif') hb.align_dataset_to_match(dem_unaligned_path, p.match_float_path, dem_path, aoi_uri=p.area_of_interest_path) for stat in stats_to_calculate: stat_unaligned_path = os.path.join(p.physical_suitability_dir, stat + '_unaligned.tif') hb.delete_path_at_exit(stat_unaligned_path) stat_path = os.path.join(p.physical_suitability_dir, stat + '.tif') hb.align_dataset_to_match(stat_unaligned_path, p.match_float_path, stat_path, resample_method='bilinear', align_to_match=True, aoi_uri=p.area_of_interest_path) soc_path = os.path.join(p.physical_suitability_dir, 'soc.tif') hb.align_dataset_to_match(p.base_data_soc_path, p.match_int_path, soc_path, aoi_uri=p.area_of_interest_path, output_data_type=7) tri_path = os.path.join(p.physical_suitability_dir, 'tri.tif') hb.align_dataset_to_match(p.base_data_tri_path, p.match_int_path, tri_path, aoi_uri=p.area_of_interest_path, output_data_type=7) # TODOO Create cythonized array_sum_product() p.physical_suitability_path = os.path.join(p.physical_suitability_dir, 'physical_suitability.tif') soc_array = hb.as_array(soc_path) tri_array = hb.as_array(tri_path) physical_suitability_array = np.log(soc_array) - np.log(tri_array) # p.global_physical_suitability_path = os.path.join(p.model_base_data_dir, 'physical_suitability_compressed.tif') p.clipped_physical_suitability_path = os.path.join( p.cur_dir, 'physical_suitability.tif') if p.run_this and p.run_this_zone: # hb.clip_raster_by_vector(p.global_physical_suitability_path, p.physical_suitability_path, p.coarse_res_aoi_path, all_touched=True) hb.clip_while_aligning_to_coarser( p.physical_suitability_path, p.clipped_physical_suitability_path, p.area_of_interest_path, p.current_change_map_paths[0], resample_method='nearest', all_touched=True, verbose=True, ensure_fits=True, gtiff_creation_options=hb.DEFAULT_GTIFF_CREATION_OPTIONS) p.current_physical_suitability_path = p.clipped_physical_suitability_path # NOTE awkward naming # hb.clip_dataset_uri(p.global_physical_suitability_path, p.coarse_res_aoi_path, p.physical_suitability_path, False, all_touched=False) physical_suitability_array = hb.as_array( p.current_physical_suitability_path) p.match_float_path = p.current_physical_suitability_path np.seterr(divide='ignore', invalid='ignore') physical_suitability_array = np.where( physical_suitability_array > -1000, physical_suitability_array, 0) physical_suitability_array = np.where( physical_suitability_array < 100000000, physical_suitability_array, 0) hb.save_array_as_geotiff(physical_suitability_array, p.current_physical_suitability_path, p.match_float_path, compress=True)
ssp5_urban_change_array = ssp5_urban_array - base_urban_array ssp1_pasture_change_array = ssp1_pasture_array - base_pasture_array ssp3_pasture_change_array = ssp3_pasture_array - base_pasture_array ssp5_pasture_change_array = ssp5_pasture_array - base_pasture_array ssp1_ag_change_path = os.path.join(run_dir, 'ssp1_ag_change.tif') ssp3_ag_change_path = os.path.join(run_dir, 'ssp3_ag_change.tif') ssp5_ag_change_path = os.path.join(run_dir, 'ssp5_ag_change.tif') ssp1_urban_change_path = os.path.join(run_dir, 'ssp1_urban_change.tif') ssp3_urban_change_path = os.path.join(run_dir, 'ssp3_urban_change.tif') ssp5_urban_change_path = os.path.join(run_dir, 'ssp5_urban_change.tif') ssp1_pasture_change_path = os.path.join(run_dir, 'ssp1_pasture_change.tif') ssp3_pasture_change_path = os.path.join(run_dir, 'ssp3_pasture_change.tif') ssp5_pasture_change_path = os.path.join(run_dir, 'ssp5_pasture_change.tif') hb.save_array_as_geotiff(ssp1_ag_change_array, ssp1_ag_change_path, match_30km_path) hb.save_array_as_geotiff(ssp3_ag_change_array, ssp3_ag_change_path, match_30km_path) hb.save_array_as_geotiff(ssp5_ag_change_array, ssp5_ag_change_path, match_30km_path) hb.save_array_as_geotiff(ssp1_urban_change_array, ssp1_urban_change_path, match_30km_path) hb.save_array_as_geotiff(ssp3_urban_change_array, ssp3_urban_change_path, match_30km_path) hb.save_array_as_geotiff(ssp5_urban_change_array, ssp5_urban_change_path, match_30km_path) hb.save_array_as_geotiff(ssp1_pasture_change_array, ssp1_pasture_change_path, match_30km_path) hb.save_array_as_geotiff(ssp3_pasture_change_array, ssp3_pasture_change_path, match_30km_path) hb.save_array_as_geotiff(ssp5_pasture_change_array, ssp5_pasture_change_path,
def create_allocation_from_change_map(): global p p.projected_lulc_simplified_path = hb.ruri( os.path.join(p.cur_dir, 'projected_lulc_simplified.tif')) # AGROSERVE shortcut note: assumed that it happens in SEQUENCE first cropland then pasture. if p.run_this and p.run_this_zone: lulc_array = hb.as_array(p.lulc_simplified_path) new_lulc_array = np.copy(lulc_array) p.change_array_paths = [] for change_map_index, change_map_path in enumerate( p.scaled_proportion_to_allocate_paths): change_to_allocate_array = hb.as_array(change_map_path) # Often it is the case that the number of cells that will be allocated is greater than the amount of high-res cells actually available for conversion. This happens only if the # conversion_elligibility.csv rules out cells (it will not happen if only adjacency and physical suitability is done, as there will be SOME places allbethem terrible. num_cells_skipped = np.zeros(change_to_allocate_array.shape) class_to_allocate = int( os.path.split(change_map_path)[1].split('_')[0]) current_overall_suitability_path = p.overall_suitability_paths[ change_map_index] overall_suitability_array = hb.as_array( current_overall_suitability_path) # Test that map resolutions are workable multiples of each other aspect_ratio_test_result = int( round(overall_suitability_array.shape[0] / change_to_allocate_array.shape[0])) == int( round(overall_suitability_array.shape[1] / change_to_allocate_array.shape[1])) if not aspect_ratio_test_result: warnings.warn('aspect_ratio_test_value FAILED.') aspect_ratio = int( round(overall_suitability_array.shape[0] / change_to_allocate_array.shape[0])) L.info('Beginning allocation using allocation ratio of ' + str(aspect_ratio)) L.info('Sizes involved: overall_suitability_array, ' + str(overall_suitability_array.shape) + ' change_to_allocate_array, ' + str(change_to_allocate_array.shape)) ha_per_source_cell = 300**2 / 100**2 change_array = np.zeros(lulc_array.shape) combined_rank_array = np.zeros(lulc_array.shape).astype(np.int64) # TODOO Note that i ignored smaller-than-chunk shards. for change_map_region_row in range( change_to_allocate_array.shape[0]): L.info('Starting horizontal row ' + str(change_map_region_row)) for change_map_region_col in range( change_to_allocate_array.shape[1]): if not change_to_allocate_array[change_map_region_row, change_map_region_col] > 0: num_cells_to_allocate = 0 else: num_cells_to_allocate = int( round( change_to_allocate_array[change_map_region_row, change_map_region_col] / ha_per_source_cell)) if num_cells_to_allocate > 0: source_map_starting_row = change_map_region_row * aspect_ratio source_map_starting_col = change_map_region_col * aspect_ratio combined_adjacency_effect_chunk = overall_suitability_array[ source_map_starting_row:source_map_starting_row + aspect_ratio, source_map_starting_col:source_map_starting_col + aspect_ratio] ranked_chunk, sorted_keys = hb.get_rank_array_and_keys( combined_adjacency_effect_chunk, ndv=0) if num_cells_to_allocate > len(sorted_keys[0]): previous_num_cells_to_allocate = num_cells_to_allocate num_skipped = num_cells_to_allocate - len( sorted_keys[0]) num_cells_to_allocate = len(sorted_keys[0]) L.warning( 'Allocation algorithm requested to allocate more cells than were available for transition given the suitability constraints. Num requested: ' + str(previous_num_cells_to_allocate) + ', Num allocated: ' + str(len(sorted_keys[0])) + ', Num skipped ' + str(num_skipped)) num_cells_skipped[ change_map_region_row, change_map_region_col] = num_skipped sorted_keys_array = np.array(sorted_keys) # Create a tuple (ready for use as a numpy key) of the top allocation_amoutn keys keys_to_change = ( sorted_keys_array[0][0:num_cells_to_allocate], sorted_keys_array[1][0:num_cells_to_allocate]) change_chunk = np.zeros(ranked_chunk.shape) change_chunk[keys_to_change] = 1 ## TODOO this was useful but there was a 29x29 vs 30x30 error. Renable after fix. # Just for visualization purposes, who what all the ranked zones look like together when mosaiced. combined_rank_array[ source_map_starting_row:source_map_starting_row + aspect_ratio, source_map_starting_col:source_map_starting_col + aspect_ratio] = ranked_chunk # TODOO BUG, there's a slight shift to the right that comes in here. change_array[ source_map_starting_row:source_map_starting_row + aspect_ratio, source_map_starting_col:source_map_starting_col + aspect_ratio] = change_chunk L.info('Processing outputted results.') p.new_classes_int_list = [13] p.final_lulc_addition_value = 13 new_lulc_array = np.where( (change_array == 1), p.final_lulc_addition_value, new_lulc_array) # NOTE, pasture will be 8 thus, crops 9 change_array_path = os.path.join( p.cur_dir, str(class_to_allocate) + '_change_array.tif') p.change_array_paths.append(change_array_path) hb.save_array_as_geotiff(change_array, change_array_path, p.match_int_path, compress=True) p.num_cells_skipped_path = hb.ruri( os.path.join(p.cur_dir, str(class_to_allocate) + '_num_cells_skipped.tif')) hb.save_array_as_geotiff(num_cells_skipped, p.num_cells_skipped_path, change_map_path, compress=True) p.combined_rank_array_path = hb.ruri( os.path.join( p.cur_dir, str(class_to_allocate) + '_combined_rank_array.tif')) hb.save_array_as_geotiff(combined_rank_array, p.combined_rank_array_path, p.match_int_path, compress=True, data_type=7) hb.save_array_as_geotiff(new_lulc_array, p.projected_lulc_simplified_path, p.match_int_path, compress=True)
def create_convolution_inputs(): global p p.convolution_inputs_dir = p.cur_dir if p.run_this and p.run_this_zone: lulc_array = hb.as_array(p.lulc_simplified_path) ndv = hb.get_nodata_from_uri(p.lulc_simplified_path) # Get which values exist in simplified_lulc unique_values = list(hb.enumerate_array_as_odict(lulc_array).keys()) unique_values = [int(i) for i in unique_values] try: p.classes_to_ignore = [ int(i) for i in p.classes_to_ignore.split(' ') ] except: p.classes_to_ignore = [] # TODOO Better approach than ignoring classes would be to encode ALL such information into the different CSVs. This would allow more grandular control over how, e.g. water DOES have attraction effect but does not necessarily expand. ignore_values = [ndv] + p.classes_to_ignore p.simplified_lulc_classes = [ i for i in unique_values if i not in ignore_values ] # HACK p.classes_to_ignore = [0] p.classes_with_effect = [ i for i in p.simplified_lulc_classes if i not in p.classes_to_ignore ] L.info('Creating binaries for classes ' + str(p.classes_with_effect)) try: p.max_simplified_lulc_classes = max(p.simplified_lulc_classes) except: p.max_simplified_lulc_classes = 20 p.new_simplified_lulc_addition_value = 10**( len(str(p.max_simplified_lulc_classes)) + 1 ) / 10 # DOCUMENTATION, new classes are defined here as adding 1 order of magnitude larger value (2 becomes 12 if the max is 5. 2 becomes 102 if the max is 15. p.classes_with_change = [ int(os.path.split(i)[1].split('_')[0]) for i in p.current_change_map_paths ] binary_paths = [] for unique_value in p.classes_with_effect: # binary_array = np.zeros(lulc_array.shape) binary_array = np.where(lulc_array == unique_value, 1, 0).astype(np.uint8) binary_path = os.path.join( p.convolution_inputs_dir, 'class_' + str(unique_value) + '_binary.tif') binary_paths.append(binary_path) hb.save_array_as_geotiff(binary_array, binary_path, p.lulc_simplified_path, compress=True) convolution_params = hb.file_to_python_object( p.class_proximity_parameters_path, declare_type='DD', output_key_data_type=str, output_value_data_type=float) convolution_paths = [] for i, v in enumerate(p.classes_with_effect): L.info('Calculating convolution for class ' + str(v)) binary_array = hb.as_array(binary_paths[i]) convolution_metric = seals_utils.distance_from_blurred_threshold( binary_array, convolution_params[str(v)]['clustering'], 0.5, convolution_params[str(v)]['decay']) convolution_path = os.path.join( p.convolution_inputs_dir, 'class_' + str(p.classes_with_effect[i]) + '_convolution.tif') convolution_paths.append(convolution_path) hb.save_array_as_geotiff(convolution_metric, convolution_path, p.match_float_path, compress=True) pairwise_params = hb.file_to_python_object( p.pairwise_class_relationships_path, declare_type='DD', output_key_data_type=str, output_value_data_type=float) for i in p.classes_with_effect: i_convolution_path = os.path.join( p.convolution_inputs_dir, 'class_' + str(i) + '_convolution.tif') i_convolution_array = hb.as_array(i_convolution_path) for j in p.classes_with_change: L.info('Processing effect of ' + str(i) + ' on ' + str(j)) adjacency_effect_path = os.path.join( p.convolution_inputs_dir, 'adjacency_effect_of_' + str(i) + '_on_' + str(j) + '.tif') adjacency_effect_array = i_convolution_array * pairwise_params[ str(i)][str(j)] hb.save_array_as_geotiff(adjacency_effect_array, adjacency_effect_path, p.match_float_path, compress=True) for i in p.classes_with_change: L.info('Combining adjacency effects for class ' + str(i)) combined_adjacency_effect_array = np.ones(lulc_array.shape) combined_adjacency_effect_path = os.path.join( p.convolution_inputs_dir, 'combined_adjacency_effect_' + str(i) + '.tif') for j in p.classes_with_effect: current_uri = os.path.join( p.convolution_inputs_dir, 'adjacency_effect_of_' + str(j) + '_on_' + str(i) + '.tif') # NOTICE SWITCHED I and J current_effect = hb.as_array(current_uri) combined_adjacency_effect_array *= current_effect + 1.0 # Center on 1 so that 0.0 has no effect hb.save_array_as_geotiff(combined_adjacency_effect_array, combined_adjacency_effect_path, p.match_float_path, compress=True)