def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload): uu.print_log("Calculating gross emissions for", tile_id, "using", sensit_type, "model type...") start = datetime.datetime.now() # Runs the correct c++ script given the emitted_pools (biomass+soil or soil_only) and model type selected. # soil_only, no_shiftin_ag, and convert_to_grassland have special gross emissions C++ scripts. # The other sensitivity analyses and the standard model all use the same gross emissions C++ script. if (emitted_pools == 'soil_only') & (sensit_type == 'std'): cmd = [ '{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst), tile_id, sensit_type, folder ] elif (emitted_pools == 'biomass_soil') & ( sensit_type in ['convert_to_grassland', 'no_shifting_ag']): cmd = [ '{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type), tile_id, sensit_type, folder ] # This C++ script has an extra argument that names the input carbon emitted_pools and output emissions correctly elif (emitted_pools == 'biomass_soil') & ( sensit_type not in ['no_shifting_ag', 'convert_to_grassland']): cmd = [ '{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst), tile_id, sensit_type, folder ] else: uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') uu.log_subprocess_output_full(cmd) # Identifies which pattern to use for counting tile completion pattern = cn.pattern_gross_emis_commod_biomass_soil if (emitted_pools == 'biomass_soil') & (sensit_type == 'std'): pattern = pattern elif (emitted_pools == 'biomass_soil') & (sensit_type != 'std'): pattern = pattern + "_" + sensit_type elif emitted_pools == 'soil_only': pattern = pattern.replace('biomass_soil', 'soil_only') else: uu.exception_log(no_upload, 'Pool option not valid') # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, pattern, no_upload)
def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type) uu.print_log(tile_id_list_outer) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list_outer))) + "\n") # Files to download for this script download_dict = { cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], cn.net_flux_dir: [cn.pattern_net_flux] } # List of output directories and output file name patterns. # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs. # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent. output_dir_list = [ cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn. gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn. gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir, cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir, cn.net_flux_per_pixel_forest_extent_dir ] output_pattern_list = [ cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent, cn. pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent, cn. pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent, cn.pattern_net_flux_per_pixel_full_extent, cn.pattern_net_flux_forest_extent, cn.pattern_net_flux_per_pixel_forest_extent ] # Pixel area tiles-- necessary for calculating per pixel values uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list_outer) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list_outer) uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list_outer) uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list_outer) uu.print_log("Model outputs to process are:", download_dict) # If the model run isn't the standard one, the output directory is changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Iterates through input tile sets for key, values in download_dict.items(): # Sets the directory and pattern for the input being processed input_dir = key input_pattern = values[0] # If a full model run is specified, the correct set of tiles for the particular script is listed. # A new list is named so that tile_id_list stays as the command line argument. if tile_id_list == 'all': # List of tiles to run in the model tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type) else: tile_id_list_input = tile_id_list uu.print_log(tile_id_list_input) uu.print_log("There are {} tiles to process".format( str(len(tile_id_list_input))) + "\n") uu.print_log("Downloading tiles from", input_dir) uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir, sensit_type, tile_id_list_input) # Blank list of output patterns, populated below output_patterns = [] # Matches the output patterns with the input pattern. # This requires that the output patterns be grouped by input pattern and be in the order described in # the comment above. if "gross_removals" in input_pattern: output_patterns = output_pattern_list[0:3] elif "gross_emis" in input_pattern: output_patterns = output_pattern_list[3:6] elif "net_flux" in input_pattern: output_patterns = output_pattern_list[6:9] else: uu.exception_log( "No output patterns found for input pattern. Please check.") uu.print_log("Input pattern:", input_pattern) uu.print_log("Output patterns:", output_patterns) # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak if cn.count == 96: processes = 18 else: processes = 2 uu.print_log( "Creating derivative outputs for {0} with {1} processors...". format(input_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(create_supplementary_outputs.create_supplementary_outputs, input_pattern=input_pattern, output_patterns=output_patterns, sensit_type=sensit_type), tile_id_list_input) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list_input: # create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type) # Checks the two forest extent output tiles created from each input tile for whether there is data in them. # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent # version may not have pixels in the forest extent version. for output_pattern in output_patterns[1:3]: if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list_input) pool.close() pool.join() else: processes = 55 # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list_input) pool.close() pool.join() # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_plantation_preparation(gadm_index_shp, planted_index_shp): os.chdir(cn.docker_base_dir) # ## Not actually using this but leaving it here in case I want to add this functionality eventually. This # # was to allow users to run plantations for a select (contiguous) area rather than for the whole planet. # # List of bounding box coordinates # bound_list = args.bounding_box # # Checks if bounding box coordinates are in multiples of 10 (10 degree tiles). If they're not, the script stops. # for bound in bound_list: # if bound%10: # uu.exception_log(bound, 'not a multiple of 10. Please make bounding box coordinates are multiples of 10.') # Checks the validity of the two arguments. If either one is invalid, the script ends. if (gadm_index_path not in cn.gadm_plant_1x1_index_dir or planted_index_path not in cn.gadm_plant_1x1_index_dir): uu.exception_log('Invalid inputs. Please provide None or s3 shapefile locations for both arguments.') # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles) total_tile_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log("Number of possible 10x10 tiles to evaluate:", len(total_tile_list)) # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman. # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968. # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest). # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands. planted_lat_tile_list = [tile for tile in total_tile_list if '90N' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80N' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '50S' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '60S' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '70S' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80S' not in tile] # planted_lat_tile_list = ['10N_080W'] uu.print_log(planted_lat_tile_list) uu.print_log("Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len(planted_lat_tile_list)) # If a planted forest extent 1x1 tile index shapefile isn't supplied if 'None' in args.planted_tile_index: ### Entry point 1: # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created. # This runs the process from the very beginning and will take a few days. if 'None' in args.gadm_tile_index: uu.print_log("No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch...") # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas uu.s3_file_download(cn.gadm_path, cn.docker_base_dir) cmd = ['unzip', cn.gadm_zip] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Creates a new GADM shapefile with just the countries that have planted forests in them. # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries. # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list # in constants_and_names.py must be updated, too. uu.print_log("Creating shapefile of countries with planted forests...") os.system('''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp'''.format(str(cn.plantation_countries)[1:-1], cn.gadm_iso)) # Creates 1x1 degree tiles of countries that have planted forests in them. # I think this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small. # This takes several days to run because it iterates through at least 250 10x10 tiles. # For multiprocessor use. processes = 50 uu.print_log('Rasterize GADM 1x1 max processors=', processes) pool = Pool(processes) pool.map(plantation_preparation.rasterize_gadm_1x1, planted_lat_tile_list) pool.close() pool.join() # # Creates 1x1 degree tiles of countries that have planted forests in them. # # For single processor use. # for tile in planted_lat_tile_list: # # plantation_preparation.rasterize_gadm_1x1(tile) # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(cn.pattern_gadm_1x1_index, uu.date_time_today)) cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # # Saves the 1x1 country extent tiles to s3 # # Only use if the entire process can't run in one go on the spot machine # cmd = ['aws', 's3', 'cp', cn.docker_base_dir, 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive'] # # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging # process = Popen(cmd, stdout=PIPE, stderr=STDOUT) # with process.stdout: # uu.log_subprocess_output(process.stdout) # Delete the aux.xml files os.system('''rm GADM*.tif.*''') # List of all 1x1 degree countey extent tiles created gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_") uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1) uu.print_log(len(gadm_list_1x1)) ### Entry point 2: # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied, # a list of the 1x1 tiles is created from the shapefile. # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied # in the shapefile. elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index: uu.print_log("Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles...") uu.print_log('{}/'.format(gadm_index_path)) # Copies the shapefile of 1x1 tiles of extent of countries with planted forests cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Gets the attribute table of the country extent 1x1 tile shapefile gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0] # Converts the attribute table to a dataframe dbf = Dbf5(gadm) df = dbf.to_dataframe() # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list gadm_list_1x1 = df['location'].tolist() gadm_list_1x1 = [str(y) for y in gadm_list_1x1] uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1) uu.print_log("There are", len(gadm_list_1x1), "1x1 country extent tiles to iterate through.") # In case some other arguments are provided else: uu.exception_log('Invalid GADM tile index shapefile provided. Please provide a valid shapefile.') # Creates 1x1 degree tiles of plantation growth wherever there are plantations. # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that # 1x1 country extent tile. # 55 processors seems to use about 350 GB of memory, which seems fine. But there was some error about "PQconnectdb failed-- sorry, too many clients already". # So, moved the number of processors down to 48. # For multiprocessor use processes = 48 uu.print_log('Create 1x1 plantation from 1x1 gadm max processors=', processes) pool = Pool(processes) pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm, gadm_list_1x1) pool.close() pool.join() # # Creates 1x1 degree tiles of plantation growth wherever there are plantations # # For single processor use # for tile in gadm_list_1x1: # # plantation_preparation.create_1x1_plantation(tile) # Creates a shapefile in which each feature is the extent of a plantation extent tile. # This index shapefile can be used the next time this process is run if starting with Entry Point 3. os.system('''gdaltindex {0}_{1}.shp plant_gain_*.tif'''.format(cn.pattern_plant_1x1_index, uu.date_time_today)) cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) ### Entry point 3 # If a shapefile of the extents of 1x1 planted forest tiles is provided. # This is the part that actually creates the sequestration rate and forest type tiles. if cn.pattern_plant_1x1_index in args.planted_tile_index: uu.print_log("Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth rate and forest type tiles...") # Copies the shapefile of 1x1 tiles of extent of planted forests cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(planted_index_shp), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Gets the attribute table of the planted forest extent 1x1 tile shapefile gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0] # Converts the attribute table to a dataframe dbf = Dbf5(gadm) df = dbf.to_dataframe() # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list planted_list_1x1 = df['location'].tolist() planted_list_1x1 = [str(y) for y in planted_list_1x1] uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1) uu.print_log("There are", len(planted_list_1x1), "1x1 planted forest extent tiles to iterate through.") # Creates 1x1 degree tiles of plantation growth and type wherever there are plantations. # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run # of this script), it does not need to check whether there are planted forests in this tile. It goes directly # to intersecting the planted forest table with the 1x1 tile. # For single processor use #for tile in planted_list_1x1: # plantation_preparation.create_1x1_plantation_growth_from_1x1_planted(tile) # For multiprocessor use # processes=40 uses about 360 GB of memory. Works on r4.16xlarge with space to spare # processes=52 uses about 465 GB of memory (quite stably), so this is basically the max. num_of_processes = 52 pool = Pool(num_of_processes) pool.map(plantation_preparation.create_1x1_plantation_growth_from_1x1_planted, planted_list_1x1) pool.close() pool.join() # This works with 50 processors on an r4.16xlarge marchine. Uses about 430 GB out of 480 GB. num_of_processes = 52 pool = Pool(num_of_processes) processes = 50 uu.print_log('Create 1x1 plantation type max processors=', processes) pool = Pool(processes) pool.map(plantation_preparation.create_1x1_plantation_type_from_1x1_planted, planted_list_1x1) pool.close() pool.join() # This rasterizes the plantation removal factor standard deviations # processes=50 peaks at about 450 GB num_of_processes = 50 pool = Pool(num_of_processes) pool.map(plantation_preparation.create_1x1_plantation_stdev_from_1x1_planted, planted_list_1x1) pool.close() pool.join()
def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=None): os.chdir(cn.docker_base_dir) folder = cn.docker_base_dir # If a full model run is specified, the correct set of tiles for the particular script is listed # If the tile_list argument is an s3 folder, the list of tiles in it is created if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000], cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000], cn.peat_mask_dir: [cn.pattern_peat_mask], cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.drivers_processed_dir: [cn.pattern_drivers], cn.climate_zone_processed_dir: [cn.pattern_climate_zone], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.burn_year_dir: [cn.pattern_burn_year] } # Special loss tiles for the Brazil and Mekong sensitivity analyses if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [ cn.pattern_Mekong_loss_processed ] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Checks the validity of the emitted_pools argument if (emitted_pools not in ['soil_only', 'biomass_soil']): uu.exception_log( 'Invalid pool input. Please choose soil_only or biomass_soil.') # Checks if the correct c++ script has been compiled for the pool option selected if emitted_pools == 'biomass_soil': # Output file directories for biomass+soil. Must be in same order as output pattern directories. output_dir_list = [ cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, cn.gross_emis_wildfire_biomass_soil_dir, cn.gross_emis_urban_biomass_soil_dir, cn.gross_emis_no_driver_biomass_soil_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, cn.gross_emis_nodes_biomass_soil_dir ] output_pattern_list = [ cn.pattern_gross_emis_commod_biomass_soil, cn.pattern_gross_emis_shifting_ag_biomass_soil, cn.pattern_gross_emis_forestry_biomass_soil, cn.pattern_gross_emis_wildfire_biomass_soil, cn.pattern_gross_emis_urban_biomass_soil, cn.pattern_gross_emis_no_driver_biomass_soil, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil, cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil, cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil, cn.pattern_gross_emis_nodes_biomass_soil ] # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)): if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format( cn.c_emis_compile_dst, sensit_type)): uu.print_log( "C++ for {} already compiled.".format(sensit_type)) else: uu.exception_log( 'Must compile {} model C++...'.format(sensit_type)) else: if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log('Must compile generic emissions C++...') elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for soil_only already compiled.") # Output file directories for soil_only. Must be in same order as output pattern directories. output_dir_list = [ cn.gross_emis_commod_soil_only_dir, cn.gross_emis_shifting_ag_soil_only_dir, cn.gross_emis_forestry_soil_only_dir, cn.gross_emis_wildfire_soil_only_dir, cn.gross_emis_urban_soil_only_dir, cn.gross_emis_no_driver_soil_only_dir, cn.gross_emis_all_gases_all_drivers_soil_only_dir, cn.gross_emis_co2_only_all_drivers_soil_only_dir, cn.gross_emis_non_co2_all_drivers_soil_only_dir, cn.gross_emis_nodes_soil_only_dir ] output_pattern_list = [ cn.pattern_gross_emis_commod_soil_only, cn.pattern_gross_emis_shifting_ag_soil_only, cn.pattern_gross_emis_forestry_soil_only, cn.pattern_gross_emis_wildfire_soil_only, cn.pattern_gross_emis_urban_soil_only, cn.pattern_gross_emis_no_driver_soil_only, cn.pattern_gross_emis_all_gases_all_drivers_soil_only, cn.pattern_gross_emis_co2_only_all_drivers_soil_only, cn.pattern_gross_emis_non_co2_all_drivers_soil_only, cn.pattern_gross_emis_nodes_soil_only ] else: uu.exception_log('Must compile soil_only C++...') else: uu.exception_log('Pool and/or sensitivity analysis option not valid') # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, folder, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) uu.print_log(output_dir_list) uu.print_log(output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # The C++ code expects certain tiles for every input 10x10. # However, not all Hansen tiles have all of these inputs. # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles. # That way, the C++ script gets all the necessary input files. # If it doesn't get the necessary inputs, it skips that tile. uu.print_log("Making blank tiles for inputs that don't currently exist") # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools pattern_list = [ cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask, cn.pattern_ifl_primary, cn.pattern_drivers, cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year, cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000 ] # textfile that stores the names of the blank tiles that are created for processing. # This will be iterated through to delete the tiles at the end of the script. uu.create_blank_tile_txt() for pattern in pattern_list: pool = multiprocessing.Pool(processes=60) # 60 = 100 GB peak pool.map( partial(uu.make_blank_tile, pattern=pattern, folder=folder, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for pattern in pattern_list: # for tile in tile_id_list: # uu.make_blank_tile(tile, pattern, folder, sensit_type) # Calculates gross emissions for each tile # count/4 uses about 390 GB on a r4.16xlarge spot machine. # processes=18 uses about 440 GB on an r4.16xlarge spot machine. if cn.count == 96: if sensit_type == 'biomass_swap': processes = 15 # 15 processors = XXX GB peak else: processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 714 GB peak else: processes = 9 uu.print_log('Gross emissions max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, sensit_type=sensit_type, folder=folder), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder) # Print the list of blank created tiles, delete the tiles, and delete their text file uu.list_and_delete_blank_tiles() for i in range(0, len(output_pattern_list)): pattern = output_pattern_list[i] uu.print_log("Adding metadata tags for pattern {}".format(pattern)) if cn.count == 96: processes = 45 # 45 processors = XXX GB peak else: processes = 9 uu.print_log('Adding metadata tags max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(calculate_gross_emissions.add_metadata_tags, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # for tile_id in tile_id_list: # calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type) # Uploads emissions to appropriate directory for the carbon emitted_pools chosen for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def main(): no_upload = False sensit_type = "legal_Amazon_loss" # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) Brazil_stages = ['all', 'create_forest_extent', 'create_loss'] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description= 'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES' ) parser.add_argument( '--stages', '-s', required=True, help= 'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}' .format(Brazil_stages)) parser.add_argument( '--run_through', '-r', required=True, help= 'Options: true or false. true: run named stage and following stages. false: run only named stage.' ) args = parser.parse_args() stage_input = args.stages run_through = args.run_through # Checks the validity of the two arguments. If either one is invalid, the script ends. if (stage_input not in Brazil_stages): uu.exception_log( no_upload, 'Invalid stage selection. Please provide a stage from', Brazil_stages) else: pass if (run_through not in ['true', 'false']): uu.exception_log( no_upload, 'Invalid run through option. Please enter true or false.') else: pass actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through, sensit_type) uu.print_log(actual_stages) # By definition, this script is for US-specific removals sensit_type = 'legal_Amazon_loss' # List of output directories and output file name patterns master_output_dir_list = [ cn.Brazil_forest_extent_2000_processed_dir, cn.Brazil_annual_loss_processed_dir ] master_output_pattern_list = [ cn.pattern_Brazil_forest_extent_2000_processed, cn.pattern_Brazil_annual_loss_processed ] # Creates forest extent 2000 raster from multiple PRODES forest extent rasters ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works. if 'create_forest_extent' in actual_stages: uu.print_log('Creating forest extent tiles') # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_base_dir, sensit_type) raw_forest_extent_inputs = glob.glob( '*_AMZ_warped_*tif') # The list of tiles to merge # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(pixelSizeX) uu.print_log(pixelSizeY) # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases. cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), raw_forest_extent_inputs[0], raw_forest_extent_inputs[1], raw_forest_extent_inputs[2], raw_forest_extent_inputs[3], raw_forest_extent_inputs[4], raw_forest_extent_inputs[5] ] uu.log_subprocess_output_full(cmd) # Uploads the merged forest extent raster to s3 for future reference uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir, cn.pattern_Brazil_forest_extent_2000_merged) # Creates legal Amazon extent 2000 tiles source_raster = '{}.tif'.format( cn.pattern_Brazil_forest_extent_2000_merged) out_pattern = cn.pattern_Brazil_forest_extent_2000_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = master_output_dir_list[0] pattern = master_output_pattern_list[0] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates annual loss raster for 2001-2019 from multiples PRODES rasters if 'create_loss' in actual_stages: uu.print_log('Creating annual PRODES loss tiles') tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them cmd = [ 'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.', '--recursive' ] uu.log_subprocess_output_full(cmd) uu.print_log( "Input loss rasters downloaded. Getting resolution of recent raster..." ) # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(" Recent raster resolution: {0} by {1}".format( pixelSizeX, pixelSizeY)) # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out # at about 180 GB, then go back to 0. # This took about 8 minutes. uu.print_log( "Merging input loss rasters into a composite for all years...") cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), 'Prodes2019_annual_loss_2008_2019.tif', 'Prodes2014_annual_loss_2001_2007.tif' ] uu.log_subprocess_output_full(cmd) uu.print_log(" Loss rasters combined into composite") # Uploads the merged loss raster to s3 for future reference uu.upload_final_set(cn.Brazil_annual_loss_merged_dir, cn.pattern_Brazil_annual_loss_merged) # Creates annual loss 2001-2015 tiles uu.print_log("Warping composite PRODES loss to Hansen tiles...") source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged) out_pattern = cn.pattern_Brazil_annual_loss_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) uu.print_log(" PRODES composite loss raster warped to Hansen tiles") # Checks if each tile has data in it. Only tiles with data are uploaded. # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough. upload_dir = master_output_dir_list[1] pattern = master_output_pattern_list[1] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates forest age category tiles if 'forest_age_category' in actual_stages: uu.print_log('Creating forest age category tiles') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[2] # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # With processes=30, peak usage was about 350 GB using WHRC AGB. # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(legal_AMZ_loss.legal_Amazon_forest_age_category, sensit_type=sensit_type, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern) # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) # Creates tiles of the number of years of removals if 'gain_year_count' in actual_stages: uu.print_log('Creating gain year count tiles for natural forest') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only, sensit_type=sensit_type), tile_id_list) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change, sensit_type=sensit_type), tile_id_list) pool.map( partial(legal_AMZ_loss. legal_Amazon_create_gain_year_count_loss_and_gain_standard, sensit_type=sensit_type), tile_id_list) pool = multiprocessing.Pool( int(cn.count / 8) ) # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory. pool.map( partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge, output_pattern=output_pattern), tile_id_list) # # For single processor use # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern) # Intermediate output tiles for checking outputs uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change") uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_and_gain") # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'annual_removals' in actual_stages: uu.print_log('Creating annual removals for natural forest') # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (annual removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[4:6]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[4:6]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Table 4.9 default gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20' ]) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. gain_table_con_eco_only = gain_table_cont_eco_age gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') gain_table_con_eco_only['value'] = 0 gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value age_dict = { 'growth_primary': 10000, 'growth_secondary_greater_20': 20000, 'growth_secondary_less_20': 30000 } # Creates a unique value for each continent-ecozone-age category gain_table_cont_eco_age = gain_table_cont_eco_age.replace( {"variable": age_dict}) gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[ 'gainEcoCon'] + gain_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes gain_table_all_combos = pd.concat( [gain_table_con_eco_only, gain_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary gain_table_dict = pd.Series( gain_table_all_combos.value.values, index=gain_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) gain_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in age_dict.items(): gain_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type gain_table_dict = { float(key): value for key, value in gain_table_dict.items() } uu.print_log(gain_table_dict) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(annual_gain_rate_natrl_forest.annual_gain_rate, sensit_type=sensit_type, gain_table_dict=gain_table_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # # annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'cumulative_removals' in actual_stages: uu.print_log('Creating cumulative removals for natural forest') # Files to download for this script. download_dict = { cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.gain_year_count_natrl_forest_dir: [cn.pattern_gain_year_count_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[6:8]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[6:8]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Calculates cumulative aboveground carbon gain in non-mangrove planted forests output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) # Calculates cumulative belowground carbon gain in non-mangrove planted forests pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type) # # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground) if 'removals_merged' in actual_stages: uu.print_log( 'Creating annual and cumulative removals for all forest types combined (above + belowground)' ) # Files to download for this script download_dict = { cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], cn.annual_gain_BGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.cumul_gain_BGCO2_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_mangrove], cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove], cn.cumul_gain_BGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_BGCO2_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[8:10]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[8:10]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # For multiprocessing output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(merge_cumulative_annual_gain_all_forest_types.gain_merge, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type) # Uploads output tiles to s3 for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: uu.print_log('Creating emissions year carbon emitted_pools') # Specifies that carbon emitted_pools are created for loss year rather than in 2000 extent = 'loss' # Files to download for this script download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [ cn.pattern_JPL_unmasked_processed ] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [ cn.pattern_WHRC_biomass_2000_unmasked ] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] else: download_dict[cn.loss_dir] = [''] tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs( sensit_type, master_output_dir_list[10:16]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[10:16]) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) if extent == 'loss': uu.print_log( "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)" ) # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 410-415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_emitted_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) elif extent == '2000': uu.print_log("Creating tiles of aboveground carbon in 2000") # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(processes=14) pool.map( partial(create_carbon_pools.create_2000_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of belowground carbon") # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[1] pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type) uu.upload_final_set(stage_output_dir_list[1], stage_output_pattern_list[1]) uu.print_log("Creating tiles of deadwood carbon") # processes=16 maxes out at about 430 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[2] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_deadwood, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type) uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) uu.print_log("Creating tiles of litter carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_litter, mang_litter_AGB_ratio=mang_litter_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type) uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) if extent == 'loss': uu.print_log("Creating tiles of soil carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[4] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(create_carbon_pools.create_soil, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type) uu.upload_final_set(stage_output_dir_list[4], stage_output_pattern_list[4]) elif extent == '2000': uu.print_log("Skipping soil for 2000 carbon pool calculation") else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of total carbon") # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check. # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[5] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_total_C, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type) uu.upload_final_set(stage_output_dir_list[5], stage_output_pattern_list[5])
def main(): os.chdir(cn.docker_base_dir) # List of possible model stages to run (not including mangrove and planted forest stages) model_stages = [ 'all', 'model_extent', 'forest_age_category_IPCC', 'annual_removals_IPCC', 'annual_removals_all_forest_types', 'gain_year_count', 'gross_removals_all_forest_types', 'carbon_pools', 'gross_emissions', 'net_flux', 'aggregate' ] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description='Run the full carbon flux model') parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) parser.add_argument( '--stages', '-s', required=True, help='Stages for running the flux model. Options are {}'.format( model_stages)) parser.add_argument( '--run-through', '-r', required=True, help= 'Options: true or false. true: run named stage and following stages. false: run only named stage.' ) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument( '--tile-id-list', '-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) parser.add_argument( '--carbon-pool-extent', '-ce', required=False, help= 'Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss' ) parser.add_argument( '--emitted-pools-to-use', '-p', required=False, help= 'Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.' ) parser.add_argument( '--tcd-threshold', '-tcd', required=False, help= 'Tree cover density threshold above which pixels will be included in the aggregation.' ) parser.add_argument( '--std-net-flux-aggreg', '-sagg', required=False, help= 'The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map' ) parser.add_argument( '--mangroves', '-ma', required=False, help= 'Include mangrove removal rate and standard deviation tile creation step (before model extent). true or false.' ) parser.add_argument( '--us-rates', '-us', required=False, help= 'Include US removal rate and standard deviation tile creation step (before model extent). true or false.' ) parser.add_argument( '--per-pixel-results', '-ppr', required=False, help= 'Include per pixel result calculations for gross emissions (all gases, all pools), gross removals, and net flux. true or false.' ) parser.add_argument('--log-note', '-ln', required=False, help='Note to include in log header about model run.') args = parser.parse_args() sensit_type = args.model_type stage_input = args.stages run_through = args.run_through run_date = args.run_date tile_id_list = args.tile_id_list carbon_pool_extent = args.carbon_pool_extent emitted_pools = args.emitted_pools_to_use thresh = args.tcd_threshold if thresh is not None: thresh = int(thresh) std_net_flux = args.std_net_flux_aggreg include_mangroves = args.mangroves include_us = args.us_rates include_per_pixel = args.per_pixel_results log_note = args.log_note # Start time for script script_start = datetime.datetime.now() # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, stage_input=stage_input, run_through=run_through, carbon_pool_extent=carbon_pool_extent, emitted_pools=emitted_pools, thresh=thresh, std_net_flux=std_net_flux, include_mangroves=include_mangroves, include_us=include_us, include_per_pixel=include_per_pixel, log_note=log_note) # Checks the validity of the model stage arguments. If either one is invalid, the script ends. if (stage_input not in model_stages): uu.exception_log( 'Invalid stage selection. Please provide a stage from', model_stages) else: pass if (run_through not in ['true', 'false']): uu.exception_log( 'Invalid run through option. Please enter true or false.') else: pass # Generates the list of stages to run actual_stages = uu.analysis_stages(model_stages, stage_input, run_through, include_mangroves=include_mangroves, include_us=include_us, include_per_pixel=include_per_pixel) uu.print_log("Analysis stages to run:", actual_stages) # Reports how much storage is being used with files uu.check_storage() # Checks whether the sensitivity analysis argument is valid uu.check_sensit_type(sensit_type) # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation. # Does this up front so the user knows before the run begins that information is missing. if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in [ 'loss', '2000', 'loss,2000', '2000,loss' ]): uu.exception_log( "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss." ) # Checks if the correct c++ script has been compiled for the pool option selected. # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary. if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format( cn.c_emis_compile_dst, sensit_type)): uu.print_log( "C++ for {} already compiled.".format(sensit_type)) else: uu.exception_log( 'Must compile standard {} model C++...'.format( sensit_type)) else: if os.path.exists( '{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log('Must compile generic emissions C++...') elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log('Must compile soil_only C++...') else: uu.exception_log( 'Pool and/or sensitivity analysis option not valid for gross emissions' ) # Checks whether the canopy cover argument is valid up front. if 'aggregate' in actual_stages: if thresh < 0 or thresh > 99: uu.exception_log( 'Invalid tcd. Please provide an integer between 0 and 99.') else: pass # If the tile_list argument is an s3 folder, the list of tiles in it is created if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))), "\n") # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed else: tile_id_list = uu.tile_id_list_check(tile_id_list) # List of output directories and output file name patterns. # The directory list is only used for counting tiles in output folders at the end of the model output_dir_list = [ cn.model_extent_dir, cn.age_cat_IPCC_dir, cn.annual_gain_AGB_IPCC_defaults_dir, cn.annual_gain_BGB_IPCC_defaults_dir, cn.stdev_annual_gain_AGB_IPCC_defaults_dir, cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir, cn.stdev_annual_gain_AGC_all_types_dir, cn.gain_year_count_dir, cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir ] # Prepends the mangrove and US output directories if mangroves are included if 'annual_removals_mangrove' in actual_stages: output_dir_list = [ cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir, cn.stdev_annual_gain_AGB_mangrove_dir ] + output_dir_list if 'annual_removals_us' in actual_stages: output_dir_list = [ cn.annual_gain_AGC_BGC_natrl_forest_US_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir ] + output_dir_list # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: if 'loss' in carbon_pool_extent: output_dir_list = output_dir_list + [ cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir ] if '2000' in carbon_pool_extent: output_dir_list = output_dir_list + [ cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir ] # Adds the biomass_soil output directories or the soil_only output directories depending on the model run if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': output_dir_list = output_dir_list + [ cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, cn.gross_emis_wildfire_biomass_soil_dir, cn.gross_emis_urban_biomass_soil_dir, cn.gross_emis_no_driver_biomass_soil_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, cn.gross_emis_nodes_biomass_soil_dir ] else: output_dir_list = output_dir_list + [ cn.gross_emis_commod_soil_only_dir, cn.gross_emis_shifting_ag_soil_only_dir, cn.gross_emis_forestry_soil_only_dir, cn.gross_emis_wildfire_soil_only_dir, cn.gross_emis_urban_soil_only_dir, cn.gross_emis_no_driver_soil_only_dir, cn.gross_emis_all_gases_all_drivers_soil_only_dir, cn.gross_emis_co2_only_all_drivers_soil_only_dir, cn.gross_emis_non_co2_all_drivers_soil_only_dir, cn.gross_emis_nodes_soil_only_dir ] output_dir_list = output_dir_list + [ cn.net_flux_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir, cn.net_flux_per_pixel_dir ] # Output patterns aren't actually used in the script-- here just for reference. output_pattern_list = [ cn.pattern_model_extent, cn.pattern_age_cat_IPCC, cn.pattern_annual_gain_AGB_IPCC_defaults, cn.pattern_annual_gain_BGB_IPCC_defaults, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults, cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types, cn.pattern_annual_gain_BGC_all_types, cn.pattern_annual_gain_AGC_BGC_all_types, cn.pattern_stdev_annual_gain_AGC_all_types, cn.pattern_gain_year_count, cn.pattern_cumul_gain_AGCO2_all_types, cn.pattern_cumul_gain_BGCO2_all_types, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types ] # Prepends the mangrove and US output pattern if mangroves are included if 'annual_removals_mangrove' in actual_stages: output_pattern_list = [ cn.pattern_annual_gain_AGB_mangrove, cn.pattern_annual_gain_BGB_mangrove, cn.pattern_stdev_annual_gain_AGB_mangrove ] + output_pattern_list if 'annual_removals_us' in actual_stages: output_pattern_list = [ cn.pattern_annual_gain_AGC_BGC_natrl_forest_US, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US ] + output_pattern_list # Adds the soil carbon patterns depending on which carbon emitted_pools are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: if 'loss' in carbon_pool_extent: output_pattern_list = output_pattern_list + [ cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year ] if '2000' in carbon_pool_extent: output_pattern_list = output_pattern_list + [ cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000, cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000 ] # Adds the biomass_soil output patterns or the soil_only output directories depending on the model run if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': output_pattern_list = output_pattern_list + [ cn.pattern_gross_emis_commod_biomass_soil, cn.pattern_gross_emis_shifting_ag_biomass_soil, cn.pattern_gross_emis_forestry_biomass_soil, cn.pattern_gross_emis_wildfire_biomass_soil, cn.pattern_gross_emis_urban_biomass_soil, cn.pattern_gross_emis_no_driver_biomass_soil, cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil, cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil ] else: output_pattern_list = output_pattern_list + [ cn.pattern_gross_emis_commod_soil_only, cn.pattern_gross_emis_shifting_ag_soil_only, cn.pattern_gross_emis_forestry_soil_only, cn.pattern_gross_emis_wildfire_soil_only, cn.pattern_gross_emis_urban_soil_only, cn.pattern_gross_emis_no_driver_soil_only, cn.pattern_gross_emis_all_gases_all_drivers_soil_only, cn.pattern_gross_emis_co2_only_all_drivers_soil_only, cn.pattern_gross_emis_non_co2_all_drivers_soil_only, cn.pattern_gross_emis_nodes_soil_only ] output_pattern_list = output_pattern_list + [ cn.pattern_net_flux, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel, cn.pattern_net_flux_per_pixel ] # Creates tiles of annual AGB and BGB gain rate and AGB stdev for mangroves using the standard model # removal function if 'annual_removals_mangrove' in actual_stages: uu.print_log(":::::Creating tiles of annual removals for mangrove") start = datetime.datetime.now() mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_mangrove:", elapsed_time, "\n") # Creates tiles of annual AGC+BGC gain rate and AGC stdev for US-specific removals using the standard model # removal function if 'annual_removals_us' in actual_stages: uu.print_log(":::::Creating tiles of annual removals for US") start = datetime.datetime.now() mp_US_removal_rates(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_us:", elapsed_time, "\n") # Creates model extent tiles if 'model_extent' in actual_stages: uu.print_log(":::::Creating tiles of model extent") start = datetime.datetime.now() mp_model_extent(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for model_extent:", elapsed_time, "\n", "\n") # Creates age category tiles for natural forests if 'forest_age_category_IPCC' in actual_stages: uu.print_log( ":::::Creating tiles of forest age categories for IPCC removal rates" ) start = datetime.datetime.now() mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for forest_age_category_IPCC:", elapsed_time, "\n", "\n") # Creates tiles of annual AGB and BGB gain rates using IPCC Table 4.9 defaults if 'annual_removals_IPCC' in actual_stages: uu.print_log( ":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults" ) start = datetime.datetime.now() mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_IPCC:", elapsed_time, "\n", "\n") # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types if 'annual_removals_all_forest_types' in actual_stages: uu.print_log( ":::::Creating tiles of annual aboveground and belowground removal rates for all forest types" ) start = datetime.datetime.now() mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log( ":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:", elapsed_time, "\n", "\n") # Creates tiles of the number of years of removals for all model pixels (across all forest types) if 'gain_year_count' in actual_stages: uu.print_log( ":::::Freeing up memory for gain year count creation by deleting unneeded tiles" ) tiles_to_delete = [] tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_BGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log( ":::::Creating tiles of gain year count for all removal pixels") start = datetime.datetime.now() mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gain_year_count:", elapsed_time, "\n", "\n") # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground) if 'gross_removals_all_forest_types' in actual_stages: uu.print_log( ":::::Creating gross removals for all forest types combined (above + belowground) tiles'" ) start = datetime.datetime.now() mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log( ":::::Processing time for gross_removals_all_forest_types:", elapsed_time, "\n", "\n") # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: uu.print_log( ":::::Freeing up memory for carbon pool creation by deleting unneeded tiles" ) tiles_to_delete = [] tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_model_extent))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_BGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_all_types))) tiles_to_delete.extend(glob.glob('*growth_years*tif')) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_gain_year_count))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating carbon pool tiles") start = datetime.datetime.now() mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for create_carbon_pools:", elapsed_time, "\n", "\n") # Creates gross emissions tiles by driver, gas, and all emissions combined if 'gross_emissions' in actual_stages: uu.print_log( ":::::Freeing up memory for gross emissions creation by deleting unneeded tiles" ) tiles_to_delete = [] # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) tiles_to_delete.extend(glob.glob('*{}*tif'.format( cn.pattern_AGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format( cn.pattern_BGC_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_litter_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_total_C_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_elevation))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") uu.print_log(tiles_to_delete) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating gross emissions tiles") start = datetime.datetime.now() mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gross_emissions:", elapsed_time, "\n", "\n") # Creates net flux tiles (gross emissions - gross removals) if 'net_flux' in actual_stages: uu.print_log( ":::::Freeing up memory for net flux creation by deleting unneeded tiles" ) tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_commod_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_shifting_ag_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_forestry_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_wildfire_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_urban_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_no_driver_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_nodes_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_peat_mask))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_climate_zone))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_burn_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating net flux tiles") start = datetime.datetime.now() mp_net_flux(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n", "\n") # Aggregates gross emissions, gross removals, and net flux to coarser resolution. # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux. if 'aggregate' in actual_stages: uu.print_log(":::::Creating 4x4 km aggregate maps") start = datetime.datetime.now() mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=std_net_flux, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n", "\n") # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters if 'per_pixel_results' in actual_stages: uu.print_log(":::::Creating per pixel versions of main model outputs") start = datetime.datetime.now() mp_output_per_pixel(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for per pixel raster creation:", elapsed_time, "\n", "\n") uu.print_log(":::::Counting tiles output to each folder") # Modifies output directory names to make them match those used during the model run. # The tiles in each of these directories and counted and logged. # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Modifying output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # Changes the date in the output directories. This date was used during the model run. # This replaces the date in constants_and_names. if run_date: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) for output in output_dir_list: tile_count = uu.count_tiles_s3(output) uu.print_log("Total tiles in", output, ": ", tile_count) script_end = datetime.datetime.now() script_elapsed_time = script_end - script_start uu.print_log(":::::Processing time for entire run:", script_elapsed_time, "\n")
def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=None, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], cn.net_flux_dir: [cn.pattern_net_flux] } # Checks whether the canopy cover argument is valid if thresh < 0 or thresh > 99: uu.exception_log( no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') if uu.check_aws_creds(): # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list) uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list) uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Model outputs to process are:", download_dict) # List of output directories. Modified later for sensitivity analysis. # Output pattern is determined later. output_dir_list = [cn.output_aggreg_dir] # If the model run isn't the standard one, the output directory is changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Iterates through the types of tiles to be processed for dir, download_pattern in list(download_dict.items()): download_pattern_name = download_pattern[0] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, sensit_type, tile_id_list) # Gets an actual tile id to use as a dummy in creating the actual tile pattern local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir, download_pattern_name) sample_tile_id = uu.get_tile_id(local_tile_list[0]) # Renames the tiles according to the sensitivity analysis before creating dummy tiles. # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few # lines later. tile_id = sample_tile_id # a dummy tile id (but it has to be a real tile id). It is removed later. output_pattern = uu.sensit_tile_rename(sensit_type, tile_id, download_pattern_name) pattern = output_pattern[9:-4] # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis if (sensit_type != 'std') & (sensit_type not in pattern): uu.print_log( "{} not a sensitivity analysis output. Skipping aggregation..." .format(pattern)) uu.print_log("") continue # Lists the tiles of the particular type that is being iterates through. # Excludes all intermediate files tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern)) # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring tile_list = [i for i in tile_list if not ('hanson_2013' in i)] tile_list = [i for i in tile_list if not ('rewindow' in i)] tile_list = [i for i in tile_list if not ('0_4deg' in i)] tile_list = [i for i in tile_list if not ('.ovr' in i)] # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif'] # test tiles uu.print_log("There are {0} tiles to process for pattern {1}".format( str(len(tile_list)), download_pattern) + "\n") uu.print_log("Processing:", dir, "; ", pattern) # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels, # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed. # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay. if cn.count == 96: if sensit_type == 'biomass_swap': processes = 12 # 12 processors = XXX GB peak else: processes = 16 # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 uu.print_log('Rewindow max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload), tile_list) # Added these in response to error12: Cannot allocate memory error. # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory pool.close() pool.join() # # For single processor use # for tile in tile_list: # # aggregate_results_to_4_km.rewindow(til, no_upload) # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel) # and sums those values in each 400x400 pixel window. # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at # 0.1x0.1 degree resolution (approximately 10m in the tropics). # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha). # The 0.1x0.1 degree tile is output. # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge if cn.count == 96: if sensit_type == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 12 # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 uu.print_log('Conversion to per pixel and aggregate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type, no_upload=no_upload), tile_list) pool.close() pool.join() # # For single processor use # for tile in tile_list: # # aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload) # Makes a vrt of all the output 10x10 tiles (10 km resolution) out_vrt = "{}_0_4deg.vrt".format(pattern) os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format( out_vrt, pattern)) # Creates the output name for the 10km map out_pattern = uu.name_aggregated_output(download_pattern_name, thresh, sensit_type) uu.print_log(out_pattern) # Produces a single raster of all the 10x10 tiles (0.4 degree resolution) cmd = [ 'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt, '{}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) # Adds metadata tags to output rasters uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern), sensit_type) # Units are different for annual removal factor, so metadata has to reflect that if 'annual_removal_factor' in out_pattern: cmd = [ 'gdal_edit.py', '-mo', 'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'scale=negative values are removals', '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation' .format(thresh), '{0}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) else: cmd = [ 'gdal_edit.py', '-mo', 'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation' .format(thresh), '{0}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) # If no_upload flag is not activated, output is uploaded if not no_upload: uu.print_log("Tiles processed. Uploading to s3 now...") uu.upload_final_set(output_dir_list[0], out_pattern) # Cleans up the folder before starting on the next raster type vrtList = glob.glob('*vrt') for vrt in vrtList: os.remove(vrt) for tile_name in tile_list: tile_id = uu.get_tile_id(tile_name) # os.remove('{0}_{1}.tif'.format(tile_id, pattern)) os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern)) os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern)) # Compares the net flux from the standard model and the sensitivity analysis in two ways. # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their # extents are different from the standard model's extent (tropics and US tiles vs. global). # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000. # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the # code below should work. if sensit_type not in [ 'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss' ]: if std_net_flux: uu.print_log( "Standard aggregated flux results provided. Creating comparison maps." ) # Copies the standard model aggregation outputs to s3. Only net flux is used, though. uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type) # Identifies the standard model net flux map std_aggreg_flux = os.path.split(std_net_flux)[1] try: # Identifies the sensitivity model net flux map sensit_aggreg_flux = glob.glob( 'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0] uu.print_log("Standard model net flux:", std_aggreg_flux) uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux) except: uu.print_log( 'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.' ) uu.print_log( "Creating map of percent difference between standard and {} net flux" .format(sensit_type)) aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) uu.print_log( "Creating map of which pixels change sign and which stay the same between standard and {}" .format(sensit_type)) aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff) uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change) else: uu.print_log( "No standard aggregated flux results provided. Not creating comparison maps." )
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None): os.chdir(cn.docker_base_dir) if (sensit_type != 'std') & (carbon_pool_extent != 'loss'): uu.exception_log("Sensitivity analysis run must use 'loss' extent") # Checks the validity of the carbon_pool_extent argument if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") # If a full model run is specified, the correct set of tiles for the particular script is listed. # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant. if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'): # Lists the tiles that have both model extent and loss pixels model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type) uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:") tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list)) # For runs generating carbon pools in 2000, all model extent tiles are relevant. if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'): tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") output_dir_list = [] output_pattern_list = [] # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated if '2000' in carbon_pool_extent: # List of output directories and output file name patterns output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir] output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000, cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000] # Files to download for this script download_dict = { cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated if 'loss' in carbon_pool_extent: # List of output directories and output file name patterns output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir] output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year] # Files to download for this script. This has the same items as the download_dict for 2000 pools plus # other tiles. download_dict = { cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) else: uu.print_log("Output directory list for standard model:", output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] uu.log_subprocess_output_full(cmd) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent)) if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 16 # 16 processors = XXX GB peak else: processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak else: # For 2000, or loss & 2000 processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak else: processes = 2 uu.print_log('AGC loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_AGC, sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) else: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) uu.upload_final_set(output_dir_list[6], output_pattern_list[6]) uu.check_storage() uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles") tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent)) # Creates a single filename pattern to pass to the multiprocessor call if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 30 # 30 processors = XXX GB peak else: processes = 38 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak else: # For 2000, or loss & 2000 processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak else: processes = 2 uu.print_log('BGC max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) else: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) uu.upload_final_set(output_dir_list[7], output_pattern_list[7]) uu.check_storage() # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent)) if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 14 # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak else: # For 2000, or loss & 2000 ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. ### There wouldn't have been enough room for all deadwood and litter otherwise. ### For example, when deadwood and litter generation started getting up to around 50N, I deleted ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak else: processes = 2 uu.print_log('Deadwood and litter max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, mang_litter_AGB_ratio=mang_litter_AGB_ratio, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood uu.upload_final_set(output_dir_list[3], output_pattern_list[3]) # litter else: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood uu.upload_final_set(output_dir_list[3], output_pattern_list[3]) # litter uu.upload_final_set(output_dir_list[8], output_pattern_list[8]) # deadwood uu.upload_final_set(output_dir_list[9], output_pattern_list[9]) # litter uu.check_storage() uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() if 'loss' in carbon_pool_extent: uu.print_log("Creating tiles of soil carbon in loss extent") # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. if '2000' not in carbon_pool_extent: pattern = output_pattern_list[4] else: pattern = output_pattern_list[10] if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 36 # 36 processors = XXX GB peak else: processes = 42 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak else: # For 2000, or loss & 2000 processes = 12 # 12 processors = XXX GB peak else: processes = 2 uu.print_log('Soil carbon loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type) # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. if '2000' not in carbon_pool_extent: uu.upload_final_set(output_dir_list[4], output_pattern_list[4]) else: uu.upload_final_set(output_dir_list[10], output_pattern_list[10]) uu.check_storage() if '2000' in carbon_pool_extent: uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.") uu.check_storage() # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: # Files to download for total C 2000. Previously deleted to save space download_dict = { cn.BGC_2000_dir: [cn.pattern_BGC_2000], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000] } for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Creating tiles of total carbon") if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 14 # 14 processors = XXX GB peak else: processes = 18 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak else: # For 2000, or loss & 2000 processes = 12 # 12 processors = XXX GB peak else: processes = 2 uu.print_log('Total carbon loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type) if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) else: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) uu.upload_final_set(output_dir_list[11], output_pattern_list[11]) uu.check_storage()