def main (): no_upload = False # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # The list of tiles to iterate through tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['00N_110E'] # test tile uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # By definition, this script is for the biomass swap analysis (replacing WHRC AGB with Saatchi/JPL AGB) sensit_type = 'biomass_swap' # Downloads a pan-tropical raster that has the erroneous integer values in the oceans removed uu.s3_file_download(cn.JPL_raw_dir, cn.JPL_raw_name, sensit_type) # Converts the Saatchi AGB vrt to Hansen tiles source_raster = cn.JPL_raw_name out_pattern = cn.pattern_JPL_unmasked_processed dt = 'Float32' pool = multiprocessing.Pool(cn.count-5) # count-5 peaks at 320GB of memory pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.JPL_processed_dir pattern = cn.pattern_JPL_unmasked_processed pool = multiprocessing.Pool(cn.count - 5) # count-5 peaks at 410GB of memory pool.map(partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
'-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_annual_gain_rate_mangrove(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date)
'-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload) mp_mangrove_processing(tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload)
pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() uu.print_log('\n') # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i]) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Create tiles of the annual AGB and BGB gain rates for mangrove forests') parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() tile_id_list = args.tile_id_list run_date = args.run_date # Create the output log uu.initiate_log(tile_id_list=tile_id_list, run_date=run_date) # Checks whether the tile_id_list argument is valid tile_id_list = uu.tile_id_list_check(tile_id_list) mp_prep_other_inputs(tile_id_list=tile_id_list, run_date=run_date)
def main(): # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads the Mekong loss folder. Each year of loss has its own raster uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir, sensit_type) # The list of all annual loss rasters annual_loss_list = glob.glob('Loss_20*tif') uu.print_log(annual_loss_list) uu.print_log( "Creating first year of loss Hansen tiles for Mekong region...") # Recodes raw loss rasters with their loss year (for model years only) pool = multiprocessing.Pool(int(cn.count / 2)) pool.map(Mekong_loss.recode_tiles, annual_loss_list) # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years, # the earlier loss gets) uu.print_log("Merging all loss years within model range...") loss_composite = "Mekong_loss_2001_2015.tif" cmd = [ 'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif", "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif", "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif", "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif", "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif", "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif", "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif", "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif" ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Creates Hansen tiles out of the composite Mekong loss source_raster = loss_composite out_pattern = cn.pattern_Mekong_loss_processed dt = 'Byte' pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # This is necessary for changing NoData values to 0s (so they are recognized as 0s) pool.map(Mekong_loss.recode_tiles, tile_id_list) # Only uploads tiles that actually have Mekong loss in them upload_dir = cn.Mekong_loss_processed_dir pattern = cn.pattern_Mekong_loss_processed pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
pool.join() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Create planted forest carbon gain rate tiles') parser.add_argument('--gadm-tile-index', '-gi', required=True, help='Shapefile of 1x1 degree tiles of countries that contain planted forests (i.e. countries with planted forests rasterized to 1x1 deg). If no shapefile, write None.') parser.add_argument('--planted-tile-index', '-pi', required=True, help='Shapefile of 1x1 degree tiles of that contain planted forests (i.e. planted forest extent rasterized to 1x1 deg). If no shapefile, write None.') # # This is the beginning of adding a way to have the model run on a selected area, rather than globally. I didn't finish implementing it, though. # parser.add_argument('--bounding-box', '-bb', required=False, type=int, nargs='+', # help='The bounding box of the tiles to be update, supplied in the order min-x, max-x, min-y, max-y. They must be at 10 degree increments.') args = parser.parse_args() # Creates the directory and shapefile names for the two possible arguments (index shapefiles) gadm_index = os.path.split(args.gadm_tile_index) gadm_index_path = gadm_index[0] gadm_index_shp = gadm_index[1] gadm_index_shp = gadm_index_shp[:-4] planted_index = os.path.split(args.planted_tile_index) planted_index_path = planted_index[0] planted_index_shp = planted_index[1] planted_index_shp = planted_index_shp[:-4] # Create the output log uu.initiate_log() mp_plantation_preparation(gadm_index_shp=gadm_index_shp, planted_index_shp=planted_index_shp)
required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list run_date = args.run_date no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_annual_gain_rate_IPCC_defaults(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date, no_upload=no_upload)
'-t', required=True, help='{}'.format(cn.model_type_arg_help)) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list emitted_pools = args.emitted_pools_to_use run_date = args.run_date # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, emitted_pools=emitted_pools) # Checks whether the sensitivity analysis argument is valid uu.check_sensit_type(sensit_type) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') else: tile_id_list = uu.tile_id_list_check(tile_id_list) mp_calculate_gross_emissions(sensit_type=sensit_type,
def main(): no_upload = False sensit_type = "legal_Amazon_loss" # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) Brazil_stages = ['all', 'create_forest_extent', 'create_loss'] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description= 'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES' ) parser.add_argument( '--stages', '-s', required=True, help= 'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}' .format(Brazil_stages)) parser.add_argument( '--run_through', '-r', required=True, help= 'Options: true or false. true: run named stage and following stages. false: run only named stage.' ) args = parser.parse_args() stage_input = args.stages run_through = args.run_through # Checks the validity of the two arguments. If either one is invalid, the script ends. if (stage_input not in Brazil_stages): uu.exception_log( no_upload, 'Invalid stage selection. Please provide a stage from', Brazil_stages) else: pass if (run_through not in ['true', 'false']): uu.exception_log( no_upload, 'Invalid run through option. Please enter true or false.') else: pass actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through, sensit_type) uu.print_log(actual_stages) # By definition, this script is for US-specific removals sensit_type = 'legal_Amazon_loss' # List of output directories and output file name patterns master_output_dir_list = [ cn.Brazil_forest_extent_2000_processed_dir, cn.Brazil_annual_loss_processed_dir ] master_output_pattern_list = [ cn.pattern_Brazil_forest_extent_2000_processed, cn.pattern_Brazil_annual_loss_processed ] # Creates forest extent 2000 raster from multiple PRODES forest extent rasters ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works. if 'create_forest_extent' in actual_stages: uu.print_log('Creating forest extent tiles') # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_base_dir, sensit_type) raw_forest_extent_inputs = glob.glob( '*_AMZ_warped_*tif') # The list of tiles to merge # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(pixelSizeX) uu.print_log(pixelSizeY) # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases. cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), raw_forest_extent_inputs[0], raw_forest_extent_inputs[1], raw_forest_extent_inputs[2], raw_forest_extent_inputs[3], raw_forest_extent_inputs[4], raw_forest_extent_inputs[5] ] uu.log_subprocess_output_full(cmd) # Uploads the merged forest extent raster to s3 for future reference uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir, cn.pattern_Brazil_forest_extent_2000_merged) # Creates legal Amazon extent 2000 tiles source_raster = '{}.tif'.format( cn.pattern_Brazil_forest_extent_2000_merged) out_pattern = cn.pattern_Brazil_forest_extent_2000_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = master_output_dir_list[0] pattern = master_output_pattern_list[0] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates annual loss raster for 2001-2019 from multiples PRODES rasters if 'create_loss' in actual_stages: uu.print_log('Creating annual PRODES loss tiles') tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them cmd = [ 'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.', '--recursive' ] uu.log_subprocess_output_full(cmd) uu.print_log( "Input loss rasters downloaded. Getting resolution of recent raster..." ) # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(" Recent raster resolution: {0} by {1}".format( pixelSizeX, pixelSizeY)) # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out # at about 180 GB, then go back to 0. # This took about 8 minutes. uu.print_log( "Merging input loss rasters into a composite for all years...") cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), 'Prodes2019_annual_loss_2008_2019.tif', 'Prodes2014_annual_loss_2001_2007.tif' ] uu.log_subprocess_output_full(cmd) uu.print_log(" Loss rasters combined into composite") # Uploads the merged loss raster to s3 for future reference uu.upload_final_set(cn.Brazil_annual_loss_merged_dir, cn.pattern_Brazil_annual_loss_merged) # Creates annual loss 2001-2015 tiles uu.print_log("Warping composite PRODES loss to Hansen tiles...") source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged) out_pattern = cn.pattern_Brazil_annual_loss_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) uu.print_log(" PRODES composite loss raster warped to Hansen tiles") # Checks if each tile has data in it. Only tiles with data are uploaded. # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough. upload_dir = master_output_dir_list[1] pattern = master_output_pattern_list[1] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates forest age category tiles if 'forest_age_category' in actual_stages: uu.print_log('Creating forest age category tiles') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[2] # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # With processes=30, peak usage was about 350 GB using WHRC AGB. # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(legal_AMZ_loss.legal_Amazon_forest_age_category, sensit_type=sensit_type, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern) # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) # Creates tiles of the number of years of removals if 'gain_year_count' in actual_stages: uu.print_log('Creating gain year count tiles for natural forest') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only, sensit_type=sensit_type), tile_id_list) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change, sensit_type=sensit_type), tile_id_list) pool.map( partial(legal_AMZ_loss. legal_Amazon_create_gain_year_count_loss_and_gain_standard, sensit_type=sensit_type), tile_id_list) pool = multiprocessing.Pool( int(cn.count / 8) ) # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory. pool.map( partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge, output_pattern=output_pattern), tile_id_list) # # For single processor use # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern) # Intermediate output tiles for checking outputs uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change") uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_and_gain") # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'annual_removals' in actual_stages: uu.print_log('Creating annual removals for natural forest') # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (annual removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[4:6]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[4:6]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Table 4.9 default gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20' ]) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. gain_table_con_eco_only = gain_table_cont_eco_age gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') gain_table_con_eco_only['value'] = 0 gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value age_dict = { 'growth_primary': 10000, 'growth_secondary_greater_20': 20000, 'growth_secondary_less_20': 30000 } # Creates a unique value for each continent-ecozone-age category gain_table_cont_eco_age = gain_table_cont_eco_age.replace( {"variable": age_dict}) gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[ 'gainEcoCon'] + gain_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes gain_table_all_combos = pd.concat( [gain_table_con_eco_only, gain_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary gain_table_dict = pd.Series( gain_table_all_combos.value.values, index=gain_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) gain_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in age_dict.items(): gain_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type gain_table_dict = { float(key): value for key, value in gain_table_dict.items() } uu.print_log(gain_table_dict) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(annual_gain_rate_natrl_forest.annual_gain_rate, sensit_type=sensit_type, gain_table_dict=gain_table_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # # annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'cumulative_removals' in actual_stages: uu.print_log('Creating cumulative removals for natural forest') # Files to download for this script. download_dict = { cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.gain_year_count_natrl_forest_dir: [cn.pattern_gain_year_count_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[6:8]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[6:8]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Calculates cumulative aboveground carbon gain in non-mangrove planted forests output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) # Calculates cumulative belowground carbon gain in non-mangrove planted forests pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type) # # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground) if 'removals_merged' in actual_stages: uu.print_log( 'Creating annual and cumulative removals for all forest types combined (above + belowground)' ) # Files to download for this script download_dict = { cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], cn.annual_gain_BGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.cumul_gain_BGCO2_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_mangrove], cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove], cn.cumul_gain_BGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_BGCO2_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[8:10]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[8:10]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # For multiprocessing output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(merge_cumulative_annual_gain_all_forest_types.gain_merge, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type) # Uploads output tiles to s3 for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: uu.print_log('Creating emissions year carbon emitted_pools') # Specifies that carbon emitted_pools are created for loss year rather than in 2000 extent = 'loss' # Files to download for this script download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [ cn.pattern_JPL_unmasked_processed ] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [ cn.pattern_WHRC_biomass_2000_unmasked ] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] else: download_dict[cn.loss_dir] = [''] tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs( sensit_type, master_output_dir_list[10:16]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[10:16]) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) if extent == 'loss': uu.print_log( "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)" ) # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 410-415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_emitted_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) elif extent == '2000': uu.print_log("Creating tiles of aboveground carbon in 2000") # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(processes=14) pool.map( partial(create_carbon_pools.create_2000_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of belowground carbon") # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[1] pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type) uu.upload_final_set(stage_output_dir_list[1], stage_output_pattern_list[1]) uu.print_log("Creating tiles of deadwood carbon") # processes=16 maxes out at about 430 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[2] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_deadwood, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type) uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) uu.print_log("Creating tiles of litter carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_litter, mang_litter_AGB_ratio=mang_litter_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type) uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) if extent == 'loss': uu.print_log("Creating tiles of soil carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[4] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(create_carbon_pools.create_soil, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type) uu.upload_final_set(stage_output_dir_list[4], stage_output_pattern_list[4]) elif extent == '2000': uu.print_log("Skipping soil for 2000 carbon pool calculation") else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of total carbon") # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check. # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[5] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_total_C, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type) uu.upload_final_set(stage_output_dir_list[5], stage_output_pattern_list[5])
def main(): os.chdir(cn.docker_base_dir) # List of possible model stages to run (not including mangrove and planted forest stages) model_stages = [ 'all', 'model_extent', 'forest_age_category_IPCC', 'annual_removals_IPCC', 'annual_removals_all_forest_types', 'gain_year_count', 'gross_removals_all_forest_types', 'carbon_pools', 'gross_emissions', 'net_flux', 'aggregate' ] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description='Run the full carbon flux model') parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) parser.add_argument( '--stages', '-s', required=True, help='Stages for running the flux model. Options are {}'.format( model_stages)) parser.add_argument( '--run-through', '-r', required=True, help= 'Options: true or false. true: run named stage and following stages. false: run only named stage.' ) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument( '--tile-id-list', '-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) parser.add_argument( '--carbon-pool-extent', '-ce', required=False, help= 'Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss' ) parser.add_argument( '--emitted-pools-to-use', '-p', required=False, help= 'Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.' ) parser.add_argument( '--tcd-threshold', '-tcd', required=False, help= 'Tree cover density threshold above which pixels will be included in the aggregation.' ) parser.add_argument( '--std-net-flux-aggreg', '-sagg', required=False, help= 'The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map' ) parser.add_argument( '--mangroves', '-ma', required=False, help= 'Include mangrove removal rate and standard deviation tile creation step (before model extent). true or false.' ) parser.add_argument( '--us-rates', '-us', required=False, help= 'Include US removal rate and standard deviation tile creation step (before model extent). true or false.' ) parser.add_argument( '--per-pixel-results', '-ppr', required=False, help= 'Include per pixel result calculations for gross emissions (all gases, all pools), gross removals, and net flux. true or false.' ) parser.add_argument('--log-note', '-ln', required=False, help='Note to include in log header about model run.') args = parser.parse_args() sensit_type = args.model_type stage_input = args.stages run_through = args.run_through run_date = args.run_date tile_id_list = args.tile_id_list carbon_pool_extent = args.carbon_pool_extent emitted_pools = args.emitted_pools_to_use thresh = args.tcd_threshold if thresh is not None: thresh = int(thresh) std_net_flux = args.std_net_flux_aggreg include_mangroves = args.mangroves include_us = args.us_rates include_per_pixel = args.per_pixel_results log_note = args.log_note # Start time for script script_start = datetime.datetime.now() # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, stage_input=stage_input, run_through=run_through, carbon_pool_extent=carbon_pool_extent, emitted_pools=emitted_pools, thresh=thresh, std_net_flux=std_net_flux, include_mangroves=include_mangroves, include_us=include_us, include_per_pixel=include_per_pixel, log_note=log_note) # Checks the validity of the model stage arguments. If either one is invalid, the script ends. if (stage_input not in model_stages): uu.exception_log( 'Invalid stage selection. Please provide a stage from', model_stages) else: pass if (run_through not in ['true', 'false']): uu.exception_log( 'Invalid run through option. Please enter true or false.') else: pass # Generates the list of stages to run actual_stages = uu.analysis_stages(model_stages, stage_input, run_through, include_mangroves=include_mangroves, include_us=include_us, include_per_pixel=include_per_pixel) uu.print_log("Analysis stages to run:", actual_stages) # Reports how much storage is being used with files uu.check_storage() # Checks whether the sensitivity analysis argument is valid uu.check_sensit_type(sensit_type) # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation. # Does this up front so the user knows before the run begins that information is missing. if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in [ 'loss', '2000', 'loss,2000', '2000,loss' ]): uu.exception_log( "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss." ) # Checks if the correct c++ script has been compiled for the pool option selected. # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary. if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format( cn.c_emis_compile_dst, sensit_type)): uu.print_log( "C++ for {} already compiled.".format(sensit_type)) else: uu.exception_log( 'Must compile standard {} model C++...'.format( sensit_type)) else: if os.path.exists( '{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log('Must compile generic emissions C++...') elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log('Must compile soil_only C++...') else: uu.exception_log( 'Pool and/or sensitivity analysis option not valid for gross emissions' ) # Checks whether the canopy cover argument is valid up front. if 'aggregate' in actual_stages: if thresh < 0 or thresh > 99: uu.exception_log( 'Invalid tcd. Please provide an integer between 0 and 99.') else: pass # If the tile_list argument is an s3 folder, the list of tiles in it is created if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))), "\n") # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed else: tile_id_list = uu.tile_id_list_check(tile_id_list) # List of output directories and output file name patterns. # The directory list is only used for counting tiles in output folders at the end of the model output_dir_list = [ cn.model_extent_dir, cn.age_cat_IPCC_dir, cn.annual_gain_AGB_IPCC_defaults_dir, cn.annual_gain_BGB_IPCC_defaults_dir, cn.stdev_annual_gain_AGB_IPCC_defaults_dir, cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir, cn.stdev_annual_gain_AGC_all_types_dir, cn.gain_year_count_dir, cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir ] # Prepends the mangrove and US output directories if mangroves are included if 'annual_removals_mangrove' in actual_stages: output_dir_list = [ cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir, cn.stdev_annual_gain_AGB_mangrove_dir ] + output_dir_list if 'annual_removals_us' in actual_stages: output_dir_list = [ cn.annual_gain_AGC_BGC_natrl_forest_US_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir ] + output_dir_list # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: if 'loss' in carbon_pool_extent: output_dir_list = output_dir_list + [ cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir ] if '2000' in carbon_pool_extent: output_dir_list = output_dir_list + [ cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir ] # Adds the biomass_soil output directories or the soil_only output directories depending on the model run if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': output_dir_list = output_dir_list + [ cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, cn.gross_emis_wildfire_biomass_soil_dir, cn.gross_emis_urban_biomass_soil_dir, cn.gross_emis_no_driver_biomass_soil_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, cn.gross_emis_nodes_biomass_soil_dir ] else: output_dir_list = output_dir_list + [ cn.gross_emis_commod_soil_only_dir, cn.gross_emis_shifting_ag_soil_only_dir, cn.gross_emis_forestry_soil_only_dir, cn.gross_emis_wildfire_soil_only_dir, cn.gross_emis_urban_soil_only_dir, cn.gross_emis_no_driver_soil_only_dir, cn.gross_emis_all_gases_all_drivers_soil_only_dir, cn.gross_emis_co2_only_all_drivers_soil_only_dir, cn.gross_emis_non_co2_all_drivers_soil_only_dir, cn.gross_emis_nodes_soil_only_dir ] output_dir_list = output_dir_list + [ cn.net_flux_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir, cn.net_flux_per_pixel_dir ] # Output patterns aren't actually used in the script-- here just for reference. output_pattern_list = [ cn.pattern_model_extent, cn.pattern_age_cat_IPCC, cn.pattern_annual_gain_AGB_IPCC_defaults, cn.pattern_annual_gain_BGB_IPCC_defaults, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults, cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types, cn.pattern_annual_gain_BGC_all_types, cn.pattern_annual_gain_AGC_BGC_all_types, cn.pattern_stdev_annual_gain_AGC_all_types, cn.pattern_gain_year_count, cn.pattern_cumul_gain_AGCO2_all_types, cn.pattern_cumul_gain_BGCO2_all_types, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types ] # Prepends the mangrove and US output pattern if mangroves are included if 'annual_removals_mangrove' in actual_stages: output_pattern_list = [ cn.pattern_annual_gain_AGB_mangrove, cn.pattern_annual_gain_BGB_mangrove, cn.pattern_stdev_annual_gain_AGB_mangrove ] + output_pattern_list if 'annual_removals_us' in actual_stages: output_pattern_list = [ cn.pattern_annual_gain_AGC_BGC_natrl_forest_US, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US ] + output_pattern_list # Adds the soil carbon patterns depending on which carbon emitted_pools are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: if 'loss' in carbon_pool_extent: output_pattern_list = output_pattern_list + [ cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year ] if '2000' in carbon_pool_extent: output_pattern_list = output_pattern_list + [ cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000, cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000 ] # Adds the biomass_soil output patterns or the soil_only output directories depending on the model run if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': output_pattern_list = output_pattern_list + [ cn.pattern_gross_emis_commod_biomass_soil, cn.pattern_gross_emis_shifting_ag_biomass_soil, cn.pattern_gross_emis_forestry_biomass_soil, cn.pattern_gross_emis_wildfire_biomass_soil, cn.pattern_gross_emis_urban_biomass_soil, cn.pattern_gross_emis_no_driver_biomass_soil, cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil, cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil ] else: output_pattern_list = output_pattern_list + [ cn.pattern_gross_emis_commod_soil_only, cn.pattern_gross_emis_shifting_ag_soil_only, cn.pattern_gross_emis_forestry_soil_only, cn.pattern_gross_emis_wildfire_soil_only, cn.pattern_gross_emis_urban_soil_only, cn.pattern_gross_emis_no_driver_soil_only, cn.pattern_gross_emis_all_gases_all_drivers_soil_only, cn.pattern_gross_emis_co2_only_all_drivers_soil_only, cn.pattern_gross_emis_non_co2_all_drivers_soil_only, cn.pattern_gross_emis_nodes_soil_only ] output_pattern_list = output_pattern_list + [ cn.pattern_net_flux, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel, cn.pattern_net_flux_per_pixel ] # Creates tiles of annual AGB and BGB gain rate and AGB stdev for mangroves using the standard model # removal function if 'annual_removals_mangrove' in actual_stages: uu.print_log(":::::Creating tiles of annual removals for mangrove") start = datetime.datetime.now() mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_mangrove:", elapsed_time, "\n") # Creates tiles of annual AGC+BGC gain rate and AGC stdev for US-specific removals using the standard model # removal function if 'annual_removals_us' in actual_stages: uu.print_log(":::::Creating tiles of annual removals for US") start = datetime.datetime.now() mp_US_removal_rates(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_us:", elapsed_time, "\n") # Creates model extent tiles if 'model_extent' in actual_stages: uu.print_log(":::::Creating tiles of model extent") start = datetime.datetime.now() mp_model_extent(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for model_extent:", elapsed_time, "\n", "\n") # Creates age category tiles for natural forests if 'forest_age_category_IPCC' in actual_stages: uu.print_log( ":::::Creating tiles of forest age categories for IPCC removal rates" ) start = datetime.datetime.now() mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for forest_age_category_IPCC:", elapsed_time, "\n", "\n") # Creates tiles of annual AGB and BGB gain rates using IPCC Table 4.9 defaults if 'annual_removals_IPCC' in actual_stages: uu.print_log( ":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults" ) start = datetime.datetime.now() mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_IPCC:", elapsed_time, "\n", "\n") # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types if 'annual_removals_all_forest_types' in actual_stages: uu.print_log( ":::::Creating tiles of annual aboveground and belowground removal rates for all forest types" ) start = datetime.datetime.now() mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log( ":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:", elapsed_time, "\n", "\n") # Creates tiles of the number of years of removals for all model pixels (across all forest types) if 'gain_year_count' in actual_stages: uu.print_log( ":::::Freeing up memory for gain year count creation by deleting unneeded tiles" ) tiles_to_delete = [] tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_BGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log( ":::::Creating tiles of gain year count for all removal pixels") start = datetime.datetime.now() mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gain_year_count:", elapsed_time, "\n", "\n") # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground) if 'gross_removals_all_forest_types' in actual_stages: uu.print_log( ":::::Creating gross removals for all forest types combined (above + belowground) tiles'" ) start = datetime.datetime.now() mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log( ":::::Processing time for gross_removals_all_forest_types:", elapsed_time, "\n", "\n") # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: uu.print_log( ":::::Freeing up memory for carbon pool creation by deleting unneeded tiles" ) tiles_to_delete = [] tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_model_extent))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_BGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_annual_gain_AGC_BGC_all_types))) tiles_to_delete.extend(glob.glob('*growth_years*tif')) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_gain_year_count))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_stdev_annual_gain_AGC_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating carbon pool tiles") start = datetime.datetime.now() mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for create_carbon_pools:", elapsed_time, "\n", "\n") # Creates gross emissions tiles by driver, gas, and all emissions combined if 'gross_emissions' in actual_stages: uu.print_log( ":::::Freeing up memory for gross emissions creation by deleting unneeded tiles" ) tiles_to_delete = [] # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) tiles_to_delete.extend(glob.glob('*{}*tif'.format( cn.pattern_AGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format( cn.pattern_BGC_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_litter_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_total_C_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_elevation))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") uu.print_log(tiles_to_delete) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating gross emissions tiles") start = datetime.datetime.now() mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gross_emissions:", elapsed_time, "\n", "\n") # Creates net flux tiles (gross emissions - gross removals) if 'net_flux' in actual_stages: uu.print_log( ":::::Freeing up memory for net flux creation by deleting unneeded tiles" ) tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_commod_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_shifting_ag_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_forestry_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_wildfire_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_urban_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_no_driver_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_gross_emis_nodes_biomass_soil))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_peat_mask))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend( glob.glob('*{}*tif'.format( cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_climate_zone))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_burn_year))) tiles_to_delete.extend( glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating net flux tiles") start = datetime.datetime.now() mp_net_flux(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n", "\n") # Aggregates gross emissions, gross removals, and net flux to coarser resolution. # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux. if 'aggregate' in actual_stages: uu.print_log(":::::Creating 4x4 km aggregate maps") start = datetime.datetime.now() mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=std_net_flux, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n", "\n") # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters if 'per_pixel_results' in actual_stages: uu.print_log(":::::Creating per pixel versions of main model outputs") start = datetime.datetime.now() mp_output_per_pixel(sensit_type, tile_id_list, run_date=run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for per pixel raster creation:", elapsed_time, "\n", "\n") uu.print_log(":::::Counting tiles output to each folder") # Modifies output directory names to make them match those used during the model run. # The tiles in each of these directories and counted and logged. # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Modifying output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # Changes the date in the output directories. This date was used during the model run. # This replaces the date in constants_and_names. if run_date: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) for output in output_dir_list: tile_count = uu.count_tiles_s3(output) uu.print_log("Total tiles in", output, ": ", tile_count) script_end = datetime.datetime.now() script_elapsed_time = script_end - script_start uu.print_log(":::::Processing time for entire run:", script_elapsed_time, "\n")
if __name__ == '__main__': parser = argparse.ArgumentParser( description= 'Create tiles of the annual AGB and BGB gain rates for mangrove forests' ) parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) parser.add_argument( '--tile_id_list', '-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list # Create the output log uu.initiate_log(sensit_type=sensit_type, tile_id_list=tile_id_list) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_tile_statistics(sensit_type=sensit_type, tile_id_list=tile_id_list)
def main(): # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # Files to download for this script. download_dict = { cn.gain_dir: [cn.pattern_gain], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] } # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles # List of output directories and output file name patterns output_dir_list = [ cn.US_annual_gain_AGB_natrl_forest_dir, cn.US_annual_gain_BGB_natrl_forest_dir ] output_pattern_list = [ cn.pattern_US_annual_gain_AGB_natrl_forest, cn.pattern_US_annual_gain_BGB_natrl_forest ] # By definition, this script is for US-specific removals sensit_type = 'US_removals' # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US. FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir) # Only creates FIA region tiles if they don't already exist on s3. if FIA_regions_tile_count == 16: uu.print_log("FIA region tiles already created. Copying to s3 now...") uu.s3_flexible_download(cn.FIA_regions_processed_dir, cn.pattern_FIA_regions_processed, cn.docker_base_dir, 'std', 'all') else: uu.print_log( "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..." ) uu.s3_file_download( os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, 'std') cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Converts the region shapefile to Hansen tiles pool = multiprocessing.Pool(int(cn.count / 2)) pool.map(US_removal_rates.prep_FIA_regions, tile_id_list) # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script. US_tile_list = uu.tile_list_spot_machine( cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed)) US_tile_id_list = [i[0:8] for i in US_tile_list] # US_tile_id_list = ['50N_130W'] # For testing uu.print_log(US_tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(US_tile_id_list))) + "\n") # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US. US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir) # Only creates FIA forest age category tiles if they don't already exist on s3. if US_age_tile_count == 16: uu.print_log( "Forest age category tiles already created. Copying to spot machine now..." ) uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir, cn.pattern_US_forest_age_cat_processed, '', 'std', US_tile_id_list) else: uu.print_log( "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..." ) uu.s3_file_download( os.path.join(cn.US_forest_age_cat_raw_dir, cn.name_US_forest_age_cat_raw), cn.docker_base_dir, 'std') # Converts the national forest age category raster to Hansen tiles source_raster = cn.name_US_forest_age_cat_raw out_pattern = cn.pattern_US_forest_age_cat_processed dt = 'Int16' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), US_tile_id_list) uu.upload_final_set(cn.US_forest_age_cat_processed_dir, cn.pattern_US_forest_age_cat_processed) # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US. FIA_forest_group_tile_count = uu.count_tiles_s3( cn.FIA_forest_group_processed_dir) # Only creates FIA forest group tiles if they don't already exist on s3. if FIA_forest_group_tile_count == 16: uu.print_log( "FIA forest group tiles already created. Copying to spot machine now..." ) uu.s3_flexible_download(cn.FIA_forest_group_processed_dir, cn.pattern_FIA_forest_group_processed, '', 'std', US_tile_id_list) else: uu.print_log( "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..." ) uu.s3_file_download( os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, 'std') # Converts the national forest group raster to Hansen forest group tiles source_raster = cn.name_FIA_forest_group_raw out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), US_tile_id_list) uu.upload_final_set(cn.FIA_forest_group_processed_dir, cn.pattern_FIA_forest_group_processed) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, US_tile_id_list) # Table with US-specific removal rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Imports the table with the region-group-age AGB removal rates gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate), sheet_name="US_rates_for_model") # Converts gain table from wide to long, so each region-group-age category has its own row gain_table_group_region_by_age = pd.melt( gain_table, id_vars=['FIA_region_code', 'forest_group_code'], value_vars=['growth_young', 'growth_middle', 'growth_old']) gain_table_group_region_by_age = gain_table_group_region_by_age.dropna() # In the forest age category raster, each category has this value age_dict = { 'growth_young': 1000, 'growth_middle': 2000, 'growth_old': 3000 } # Creates a unique value for each forest group-region-age category in the table. # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for # pixels that have Hansen gain (see below). gain_table_group_region_age = gain_table_group_region_by_age.replace( {"variable": age_dict}) gain_table_group_region_age[ 'age_cat'] = gain_table_group_region_age['variable'] * 10 gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \ gain_table_group_region_age['forest_group_code']*100 + \ gain_table_group_region_age['FIA_region_code'] # Converts the forest group-region-age codes and corresponding gain rates to a dictionary, # where the key is the unique group-region-age code and the value is the AGB removal rate. gain_table_group_region_age_dict = pd.Series( gain_table_group_region_age.value.values, index=gain_table_group_region_age.group_region_age_combined).to_dict() uu.print_log(gain_table_group_region_age_dict) # Creates a unique value for each forest group-region category using just young forest rates. # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the # forest age category raster. gain_table_group_region = gain_table_group_region_age.drop( gain_table_group_region_age[ gain_table_group_region_age.age_cat != 10000].index) gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \ gain_table_group_region['FIA_region_code'] # Converts the forest group-region codes and corresponding gain rates to a dictionary, # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate. gain_table_group_region_dict = pd.Series( gain_table_group_region.value.values, index=gain_table_group_region.group_region_combined).to_dict() uu.print_log(gain_table_group_region_dict) # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial( US_removal_rates.US_removal_rate_calc, gain_table_group_region_age_dict=gain_table_group_region_age_dict, gain_table_group_region_dict=gain_table_group_region_dict, output_pattern_list=output_pattern_list, sensit_type=sensit_type), US_tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in US_tile_id_list: # # US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict, # output_pattern_list, sensit_type) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
action='store_true', help='Disables uploading of outputs to s3') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list std_net_flux = args.std_net_flux_aggreg thresh = args.tcd_threshold thresh = int(thresh) no_upload = args.no_upload # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, thresh=thresh, std_net_flux=std_net_flux, no_upload=no_upload) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_aggregate_results_to_4_km(sensit_type=sensit_type, tile_id_list=tile_id_list, thresh=thresh, std_net_flux=std_net_flux, no_upload=no_upload)
uu.check_storage() if __name__ == '__main__': # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description='Create tiles of the number of years of carbon gain for mangrove forests') parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) parser.add_argument('--tile_id_list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--carbon_pool_extent', '-ce', required=True, help='Extent over which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list carbon_pool_extent = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only run_date = args.run_date # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, carbon_pool_extent=carbon_pool_extent) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_create_carbon_pools(sensit_type=sensit_type, tile_id_list=tile_id_list, carbon_pool_extent=carbon_pool_extent, run_date=run_date)
help='Extent over which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list carbon_pool_extent = args.carbon_pool_extent # Tells the pool creation functions to calculate carbon emitted_pools as they were at the year of loss in loss pixels only run_date = args.run_date no_upload = args.no_upload save_intermediates = args.save_intermediates # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, carbon_pool_extent=carbon_pool_extent, no_upload=no_upload, save_intermediates=save_intermediates) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_create_carbon_pools(sensit_type=sensit_type, tile_id_list=tile_id_list, carbon_pool_extent=carbon_pool_extent, run_date=run_date, no_upload=no_upload, save_intermediates=save_intermediates)