'-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_annual_gain_rate_mangrove(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date)
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) pd.options.mode.chained_assignment = None # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for # calculating mangrove gain mangrove_biomass_tile_list = uu.tile_list_s3( cn.mangrove_biomass_2000_dir) ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir) tile_id_list = list( set(mangrove_biomass_tile_list).intersection(ecozone_tile_list)) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") download_dict = { cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000] } # List of output directories and output file name patterns output_dir_list = [ cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir, cn.stdev_annual_gain_AGB_mangrove_dir ] output_pattern_list = [ cn.pattern_annual_gain_AGB_mangrove, cn.pattern_annual_gain_BGB_mangrove, cn.pattern_stdev_annual_gain_AGB_mangrove ] # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to # the "mangType" field in the gain rate spreadsheet. # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the # keys in this dictionary would need to change accordingly. type_ratio_dict = { '1': cn.below_to_above_trop_dry_mang, '2': cn.below_to_above_trop_wet_mang, '3': cn.below_to_above_subtrop_mang } type_ratio_dict_final = { int(k): float(v) for k, v in list(type_ratio_dict.items()) } # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get # a column of belowground annual gain rates by mangrove type gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[ 'mangType'].map(type_ratio_dict_final) gain_table_simplified[ 'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates gain_above_dict = pd.Series( gain_table_simplified.AGB_gain_tons_ha_yr.values, index=gain_table_simplified.gainEcoCon).to_dict() gain_below_dict = pd.Series( gain_table_simplified.BGB_annual_rate.values, index=gain_table_simplified.gainEcoCon).to_dict() # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent) gain_above_dict[0] = 0 gain_below_dict[0] = 0 # Converts all the keys (continent-ecozone codes) to float type gain_above_dict = { float(key): value for key, value in gain_above_dict.items() } gain_below_dict = { float(key): value for key, value in gain_below_dict.items() } ### To make the removal factor standard deviation dictionary # Imports the table with the ecozone-continent codes and the carbon gain rates stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove stdev, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs stdev_dict = pd.Series( stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values, index=stdev_table_simplified.gainEcoCon).to_dict() # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent) stdev_dict[0] = 0 # Converts all the keys (continent-ecozone codes) to float type stdev_dict = {float(key): value for key, value in stdev_dict.items()} # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # Ran with 18 processors on r4.16xlarge (430 GB memory peak) if cn.count == 96: processes = 20 #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak else: processes = 4 uu.print_log('Mangrove annual gain rate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(annual_gain_rate_mangrove.annual_gain_rate, sensit_type=sensit_type, output_pattern_list=output_pattern_list, gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # # annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list, # gain_above_dict, gain_below_dict, stdev_dict) for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) pd.options.mode.chained_assignment = None # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.cont_eco_dir: [cn.pattern_cont_eco_processed] } # List of output directories and output file name patterns output_dir_list = [ cn.annual_gain_AGB_IPCC_defaults_dir, cn.annual_gain_BGB_IPCC_defaults_dir, cn.stdev_annual_gain_AGB_IPCC_defaults_dir ] output_pattern_list = [ cn.pattern_annual_gain_AGB_IPCC_defaults, cn.pattern_annual_gain_BGB_IPCC_defaults, cn.pattern_stdev_annual_gain_AGB_IPCC_defaults ] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) if uu.check_aws_creds(): # Table with IPCC Table 4.9 default gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 if sensit_type == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, no_prim_gain") uu.print_log( "Using no_primary_gain IPCC default rates for tile creation") # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass gain rates gain_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20' ]) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. gain_table_con_eco_only = gain_table_cont_eco_age gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') gain_table_con_eco_only['value'] = 0 gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value rate_age_dict = { 'growth_secondary_less_20': 10000, 'growth_secondary_greater_20': 20000, 'growth_primary': 30000 } # Creates a unique value for each continent-ecozone-age category gain_table_cont_eco_age = gain_table_cont_eco_age.replace( {"variable": rate_age_dict}) gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[ 'gainEcoCon'] + gain_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes gain_table_all_combos = pd.concat( [gain_table_con_eco_only, gain_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary gain_table_dict = pd.Series( gain_table_all_combos.value.values, index=gain_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) gain_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in rate_age_dict.items(): gain_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type gain_table_dict = { float(key): value for key, value in gain_table_dict.items() } ### To make the removal factor standard deviation dictionary # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0 if sensit_type == 'no_primary_gain': # Imports the table with the ecozone-continent codes and the carbon gain rates stdev_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores stdv, no_prim_gain") uu.print_log( "Using no_primary_gain IPCC default standard deviations for tile creation" ) # All other analyses use the standard removal rates else: # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations stdev_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores stdv, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row stdev_table_cont_eco_age = pd.melt(stdev_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'stdev_primary', 'stdev_secondary_greater_20', 'stdev_secondary_less_20' ]) stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. stdev_table_con_eco_only = stdev_table_cont_eco_age stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') stdev_table_con_eco_only['value'] = 0 stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value stdev_age_dict = { 'stdev_secondary_less_20': 10000, 'stdev_secondary_greater_20': 20000, 'stdev_primary': 30000 } # Creates a unique value for each continent-ecozone-age category stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace( {"variable": stdev_age_dict}) stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[ 'gainEcoCon'] + stdev_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes stdev_table_all_combos = pd.concat( [stdev_table_con_eco_only, stdev_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary stdev_table_dict = pd.Series( stdev_table_all_combos.value.values, index=stdev_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) stdev_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in stdev_age_dict.items(): stdev_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type stdev_table_dict = { float(key): value for key, value in stdev_table_dict.items() } # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: if sensit_type == 'biomass_swap': processes = 24 # 24 processors = 590 GB peak else: processes = 30 # 30 processors = 725 GB peak else: processes = 2 uu.print_log('Annual gain rate natural forest max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(annual_gain_rate_IPCC_defaults.annual_gain_rate, sensit_type=sensit_type, gain_table_dict=gain_table_dict, stdev_table_dict=stdev_table_dict, output_pattern_list=output_pattern_list, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type, # gain_table_dict, stdev_table_dict, output_pattern_list, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_US_removal_rates(sensit_type, tile_id_list, run_date): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.gain_dir: [cn.pattern_gain], cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed], cn.FIA_forest_group_processed_dir: [cn.pattern_FIA_forest_group_processed], cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US] } # List of output directories and output file name patterns output_dir_list = [ cn.annual_gain_AGC_BGC_natrl_forest_US_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir ] output_pattern_list = [ cn.pattern_annual_gain_AGC_BGC_natrl_forest_US, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Table with US-specific removal rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir ] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Imports the table with the region-group-age AGC+BGC removal rates gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate), sheet_name="US_rates_AGC+BGC") # Converts gain table from wide to long, so each region-group-age category has its own row gain_table_group_region_by_age = pd.melt( gain_table, id_vars=['FIA_region_code', 'forest_group_code'], value_vars=['growth_young', 'growth_middle', 'growth_old']) gain_table_group_region_by_age = gain_table_group_region_by_age.dropna() # In the forest age category raster, each category has this value age_dict = { 'growth_young': 1000, 'growth_middle': 2000, 'growth_old': 3000 } # Creates a unique value for each forest group-region-age category in the table. # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for # pixels that have Hansen gain (see below). gain_table_group_region_age = gain_table_group_region_by_age.replace( {"variable": age_dict}) gain_table_group_region_age[ 'age_cat'] = gain_table_group_region_age['variable'] * 10 gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \ gain_table_group_region_age['forest_group_code']*100 + \ gain_table_group_region_age['FIA_region_code'] # Converts the forest group-region-age codes and corresponding gain rates to a dictionary, # where the key is the unique group-region-age code and the value is the AGB removal rate. gain_table_group_region_age_dict = pd.Series( gain_table_group_region_age.value.values, index=gain_table_group_region_age.group_region_age_combined).to_dict() uu.print_log(gain_table_group_region_age_dict) # Creates a unique value for each forest group-region category using just young forest rates. # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the # forest age category raster. gain_table_group_region = gain_table_group_region_age.drop( gain_table_group_region_age[ gain_table_group_region_age.age_cat != 10000].index) gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \ gain_table_group_region['FIA_region_code'] # Converts the forest group-region codes and corresponding gain rates to a dictionary, # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate. gain_table_group_region_dict = pd.Series( gain_table_group_region.value.values, index=gain_table_group_region.group_region_combined).to_dict() uu.print_log(gain_table_group_region_dict) ### To make the removal factor standard deviation dictionaries # Converts gain table from wide to long, so each region-group-age category has its own row stdev_table_group_region_by_age = pd.melt( gain_table, id_vars=['FIA_region_code', 'forest_group_code'], value_vars=['SD_young', 'SD_middle', 'SD_old']) stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna() # In the forest age category raster, each category has this value stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000} # Creates a unique value for each forest group-region-age category in the table. # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for # pixels that have Hansen gain (see below). stdev_table_group_region_age = stdev_table_group_region_by_age.replace( {"variable": stdev_dict}) stdev_table_group_region_age[ 'age_cat'] = stdev_table_group_region_age['variable'] * 10 stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \ stdev_table_group_region_age['forest_group_code'] * 100 + \ stdev_table_group_region_age['FIA_region_code'] # Converts the forest group-region-age codes and corresponding gain rates to a dictionary, # where the key is the unique group-region-age code and the value is the AGB removal rate. stdev_table_group_region_age_dict = pd.Series( stdev_table_group_region_age.value.values, index=stdev_table_group_region_age.group_region_age_combined).to_dict( ) uu.print_log(stdev_table_group_region_age_dict) # Creates a unique value for each forest group-region category using just young forest rates. # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the # forest age category raster. stdev_table_group_region = stdev_table_group_region_age.drop( stdev_table_group_region_age[ stdev_table_group_region_age.age_cat != 10000].index) stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \ stdev_table_group_region['FIA_region_code'] # Converts the forest group-region codes and corresponding gain rates to a dictionary, # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate. stdev_table_group_region_dict = pd.Series( stdev_table_group_region.value.values, index=stdev_table_group_region.group_region_combined).to_dict() uu.print_log(stdev_table_group_region_dict) if cn.count == 96: processes = 68 # 68 processors (only 16 tiles though) = 310 GB peak else: processes = 24 uu.print_log('US natural forest AGC+BGC removal rate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial( US_removal_rates.US_removal_rate_calc, gain_table_group_region_age_dict=gain_table_group_region_age_dict, gain_table_group_region_dict=gain_table_group_region_dict, stdev_table_group_region_age_dict=stdev_table_group_region_age_dict, stdev_table_group_region_dict=stdev_table_group_region_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # US_removal_rates.US_removal_rate_calc(tile_id, # gain_table_group_region_age_dict, # gain_table_group_region_dict, # stdev_table_group_region_age_dict, # stdev_table_group_region_dict, # output_pattern_list) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) tile_id_list_outer = tile_id_list # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list_outer == 'all': # List of tiles to run in the model tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type) uu.print_log(tile_id_list_outer) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list_outer))) + "\n") # Files to download for this script download_dict = { cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], cn.net_flux_dir: [cn.pattern_net_flux] } # List of output directories and output file name patterns. # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs. # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent. output_dir_list = [ cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn. gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn. gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir, cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir, cn.net_flux_per_pixel_forest_extent_dir ] output_pattern_list = [ cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent, cn. pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent, cn. pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent, cn.pattern_net_flux_per_pixel_full_extent, cn.pattern_net_flux_forest_extent, cn.pattern_net_flux_per_pixel_forest_extent ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): # Pixel area tiles-- necessary for calculating per pixel values uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list_outer) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list_outer) uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list_outer) uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list_outer) uu.print_log("Model outputs to process are:", download_dict) # If the model run isn't the standard one, the output directory is changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Iterates through input tile sets for key, values in download_dict.items(): # Sets the directory and pattern for the input being processed input_dir = key input_pattern = values[0] # If a full model run is specified, the correct set of tiles for the particular script is listed. # A new list is named so that tile_id_list stays as the command line argument. if tile_id_list == 'all': # List of tiles to run in the model tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type) else: tile_id_list_input = tile_id_list_outer uu.print_log(tile_id_list_input) uu.print_log("There are {} tiles to process".format( str(len(tile_id_list_input))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): uu.print_log("Downloading tiles from", input_dir) uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir, sensit_type, tile_id_list_input) # Blank list of output patterns, populated below output_patterns = [] # Matches the output patterns with the input pattern. # This requires that the output patterns be grouped by input pattern and be in the order described in # the comment above. if "gross_removals" in input_pattern: output_patterns = output_pattern_list[0:3] elif "gross_emis" in input_pattern: output_patterns = output_pattern_list[3:6] elif "net_flux" in input_pattern: output_patterns = output_pattern_list[6:9] else: uu.exception_log( no_upload, "No output patterns found for input pattern. Please check.") uu.print_log("Input pattern:", input_pattern) uu.print_log("Output patterns:", output_patterns) # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak if cn.count == 96: processes = 18 else: processes = 2 uu.print_log( "Creating derivative outputs for {0} with {1} processors...". format(input_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(create_supplementary_outputs.create_supplementary_outputs, input_pattern=input_pattern, output_patterns=output_patterns, sensit_type=sensit_type, no_upload=no_upload), tile_id_list_input) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list_input: # create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload) # Checks the two forest extent output tiles created from each input tile for whether there is data in them. # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent # version may not have pixels in the forest extent version. for output_pattern in output_patterns[1:3]: if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list_input) pool.close() pool.join() else: processes = 55 # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list_input) pool.close() pool.join() # If no_upload flag is not activated, output is uploaded if not no_upload: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # No point in making gain year count tiles for tiles that don't have annual removals tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be # changed for a sensitivity analysis. This does not need to change based on what run is being done; # this assignment should be true for all sensitivity analyses and the standard model. download_dict = { cn.gain_dir: [cn.pattern_gain], cn.model_extent_dir: [cn.pattern_model_extent] } # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] output_dir_list = [cn.gain_year_count_dir] output_pattern_list = [cn.pattern_gain_year_count] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] # Creates gain year count tiles using only pixels that had only loss # count/3 maxes out at about 300 GB if cn.count == 96: processes = 90 # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count loss only pixels max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) if cn.count == 96: processes = 90 # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count gain only pixels max processors=', processes) pool = multiprocessing.Pool(processes) if sensit_type == 'maxgain': # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) if sensit_type == 'legal_Amazon_loss': uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.") else: # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) # Creates gain year count tiles using only pixels that had neither loss nor gain pixels if cn.count == 96: processes = 90 # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count no change pixels max processors=', processes) pool = multiprocessing.Pool(processes) if sensit_type == 'legal_Amazon_loss': pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) else: pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) if cn.count == 96: processes = 90 # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak else: processes = int(cn.count/2) uu.print_log('Gain year count loss & gain pixels max processors=', processes) pool = multiprocessing.Pool(processes) if sensit_type == 'maxgain': # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) else: # Creates gain year count tiles using only pixels that had only gain pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) # Combines the four above gain year count tiles for each Hansen tile into a single output tile if cn.count == 96: processes = 84 # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak elif cn.count < 4: processes = 1 else: processes = int(cn.count/4) uu.print_log('Gain year count gain merge all combos max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload) # # for tile_id in tile_id_list: # if sensit_type == 'maxgain': # gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload) # else: # gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload) # # for tile_id in tile_id_list: # gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload) # # for tile_id in tile_id_list: # if sensit_type == 'maxgain': # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload) # else: # gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload) # # for tile_id in tile_id_list: # gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: # Intermediate output tiles for checking outputs uu.upload_final_set(output_dir_list[0], "growth_years_loss_only") uu.upload_final_set(output_dir_list[0], "growth_years_gain_only") uu.upload_final_set(output_dir_list[0], "growth_years_no_change") uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain") # This is the final output used later in the model uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=None, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], cn.net_flux_dir: [cn.pattern_net_flux] } # Checks whether the canopy cover argument is valid if thresh < 0 or thresh > 99: uu.exception_log( no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') if uu.check_aws_creds(): # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area, cn.docker_base_dir, sensit_type, tile_id_list) # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir, sensit_type, tile_id_list) uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir, sensit_type, tile_id_list) uu.s3_flexible_download(cn.mangrove_biomass_2000_dir, cn.pattern_mangrove_biomass_2000, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Model outputs to process are:", download_dict) # List of output directories. Modified later for sensitivity analysis. # Output pattern is determined later. output_dir_list = [cn.output_aggreg_dir] # If the model run isn't the standard one, the output directory is changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Iterates through the types of tiles to be processed for dir, download_pattern in list(download_dict.items()): download_pattern_name = download_pattern[0] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): uu.s3_flexible_download(dir, download_pattern_name, cn.docker_base_dir, sensit_type, tile_id_list) # Gets an actual tile id to use as a dummy in creating the actual tile pattern local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir, download_pattern_name) sample_tile_id = uu.get_tile_id(local_tile_list[0]) # Renames the tiles according to the sensitivity analysis before creating dummy tiles. # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few # lines later. tile_id = sample_tile_id # a dummy tile id (but it has to be a real tile id). It is removed later. output_pattern = uu.sensit_tile_rename(sensit_type, tile_id, download_pattern_name) pattern = output_pattern[9:-4] # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis if (sensit_type != 'std') & (sensit_type not in pattern): uu.print_log( "{} not a sensitivity analysis output. Skipping aggregation..." .format(pattern)) uu.print_log("") continue # Lists the tiles of the particular type that is being iterates through. # Excludes all intermediate files tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern)) # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring tile_list = [i for i in tile_list if not ('hanson_2013' in i)] tile_list = [i for i in tile_list if not ('rewindow' in i)] tile_list = [i for i in tile_list if not ('0_4deg' in i)] tile_list = [i for i in tile_list if not ('.ovr' in i)] # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif'] # test tiles uu.print_log("There are {0} tiles to process for pattern {1}".format( str(len(tile_list)), download_pattern) + "\n") uu.print_log("Processing:", dir, "; ", pattern) # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels, # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed. # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay. if cn.count == 96: if sensit_type == 'biomass_swap': processes = 12 # 12 processors = XXX GB peak else: processes = 16 # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 uu.print_log('Rewindow max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload), tile_list) # Added these in response to error12: Cannot allocate memory error. # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory pool.close() pool.join() # # For single processor use # for tile in tile_list: # # aggregate_results_to_4_km.rewindow(til, no_upload) # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel) # and sums those values in each 400x400 pixel window. # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at # 0.1x0.1 degree resolution (approximately 10m in the tropics). # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha). # The 0.1x0.1 degree tile is output. # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge if cn.count == 96: if sensit_type == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 12 # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out) else: processes = 8 uu.print_log('Conversion to per pixel and aggregate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(aggregate_results_to_4_km.aggregate, thresh=thresh, sensit_type=sensit_type, no_upload=no_upload), tile_list) pool.close() pool.join() # # For single processor use # for tile in tile_list: # # aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload) # Makes a vrt of all the output 10x10 tiles (10 km resolution) out_vrt = "{}_0_4deg.vrt".format(pattern) os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format( out_vrt, pattern)) # Creates the output name for the 10km map out_pattern = uu.name_aggregated_output(download_pattern_name, thresh, sensit_type) uu.print_log(out_pattern) # Produces a single raster of all the 10x10 tiles (0.4 degree resolution) cmd = [ 'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt, '{}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) # Adds metadata tags to output rasters uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern), sensit_type) # Units are different for annual removal factor, so metadata has to reflect that if 'annual_removal_factor' in out_pattern: cmd = [ 'gdal_edit.py', '-mo', 'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'scale=negative values are removals', '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation' .format(thresh), '{0}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) else: cmd = [ 'gdal_edit.py', '-mo', 'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees', '-mo', 'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels', '-mo', 'extent=Global', '-mo', 'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation' .format(thresh), '{0}.tif'.format(out_pattern) ] uu.log_subprocess_output_full(cmd) # If no_upload flag is not activated, output is uploaded if not no_upload: uu.print_log("Tiles processed. Uploading to s3 now...") uu.upload_final_set(output_dir_list[0], out_pattern) # Cleans up the folder before starting on the next raster type vrtList = glob.glob('*vrt') for vrt in vrtList: os.remove(vrt) for tile_name in tile_list: tile_id = uu.get_tile_id(tile_name) # os.remove('{0}_{1}.tif'.format(tile_id, pattern)) os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern)) os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern)) # Compares the net flux from the standard model and the sensitivity analysis in two ways. # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their # extents are different from the standard model's extent (tropics and US tiles vs. global). # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000. # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the # code below should work. if sensit_type not in [ 'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss' ]: if std_net_flux: uu.print_log( "Standard aggregated flux results provided. Creating comparison maps." ) # Copies the standard model aggregation outputs to s3. Only net flux is used, though. uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type) # Identifies the standard model net flux map std_aggreg_flux = os.path.split(std_net_flux)[1] try: # Identifies the sensitivity model net flux map sensit_aggreg_flux = glob.glob( 'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0] uu.print_log("Standard model net flux:", std_aggreg_flux) uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux) except: uu.print_log( 'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.' ) uu.print_log( "Creating map of percent difference between standard and {} net flux" .format(sensit_type)) aggregate_results_to_4_km.percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) uu.print_log( "Creating map of which pixels change sign and which stay the same between standard and {}" .format(sensit_type)) aggregate_results_to_4_km.sign_change(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_perc_diff) uu.upload_final_set(output_dir_list[0], cn.pattern_aggreg_sensit_sign_change) else: uu.print_log( "No standard aggregated flux results provided. Not creating comparison maps." )
def mp_model_extent(sensit_type, tile_id_list, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis if sensit_type == 'biomass_swap': tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type) elif sensit_type == 'legal_Amazon_loss': tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir, sensit_type) else: tile_id_list = uu.create_combined_tile_list( cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, cn.gain_dir, cn.tcd_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.gain_dir: [cn.pattern_gain], cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [ cn.pattern_Brazil_forest_extent_2000_processed ] else: download_dict[cn.tcd_dir] = [cn.pattern_tcd] if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [ cn.pattern_JPL_unmasked_processed ] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [ cn.pattern_WHRC_biomass_2000_unmasked ] # List of output directories and output file name patterns output_dir_list = [cn.model_extent_dir] output_pattern_list = [cn.pattern_model_extent] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Creates a single filename pattern to pass to the multiprocessor call pattern = output_pattern_list[0] # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: if sensit_type == 'biomass_swap': processes = 38 else: processes = 42 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases); # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak else: processes = 3 uu.print_log('Model extent processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # model_extent.model_extent(tile_id, pattern, sensit_type, no_upload) output_pattern = output_pattern_list[0] if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = 58 # 50 processors = 620 GB peak; 55 = 640 GB; 58 = 650 GB (continues to increase very slowly several hundred tiles in) uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors...". format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # If no_upload flag is not activated, output is uploaded if not no_upload: uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def main (): os.chdir(cn.docker_base_dir) # List of possible model stages to run (not including mangrove and planted forest stages) model_stages = ['all', 'model_extent', 'forest_age_category_IPCC', 'annual_removals_IPCC', 'annual_removals_all_forest_types', 'gain_year_count', 'gross_removals_all_forest_types', 'carbon_pools', 'gross_emissions', 'net_flux', 'aggregate', 'create_supplementary_outputs'] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser(description='Run the full carbon flux model') parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help)) parser.add_argument('--stages', '-s', required=True, help='Stages for running the flux model. Options are {}'.format(model_stages)) parser.add_argument('--run-through', '-r', action='store_true', help='If activated, run named stage and all following stages. If not activated, run the selected stage only.') parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') parser.add_argument('--tile-id-list', '-l', required=True, help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.') parser.add_argument('--carbon-pool-extent', '-ce', required=False, help='Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss') parser.add_argument('--emitted-pools-to-use', '-p', required=False, help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.') parser.add_argument('--tcd-threshold', '-tcd', required=False, help='Tree cover density threshold above which pixels will be included in the aggregation.') parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False, help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map') parser.add_argument('--mangroves', '-ma', action='store_true', help='Include mangrove removal rate and standard deviation tile creation step (before model extent).') parser.add_argument('--us-rates', '-us', action='store_true', help='Include US removal rate and standard deviation tile creation step (before model extent).') parser.add_argument('--no-upload', '-nu', action='store_true', help='Disables uploading of outputs to s3') parser.add_argument('--save-intermediates', '-si', action='store_true', help='Saves intermediate model outputs rather than deleting them to save storage') parser.add_argument('--log-note', '-ln', required=False, help='Note to include in log header about model run.') args = parser.parse_args() sensit_type = args.model_type stage_input = args.stages run_through = args.run_through run_date = args.run_date tile_id_list = args.tile_id_list carbon_pool_extent = args.carbon_pool_extent emitted_pools = args.emitted_pools_to_use thresh = args.tcd_threshold if thresh is not None: thresh = int(thresh) std_net_flux = args.std_net_flux_aggreg include_mangroves = args.mangroves include_us = args.us_rates no_upload = args.no_upload save_intermediates = args.save_intermediates log_note = args.log_note # Start time for script script_start = datetime.datetime.now() # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload, save_intermediates=save_intermediates, stage_input=stage_input, run_through=run_through, carbon_pool_extent=carbon_pool_extent, emitted_pools=emitted_pools, thresh=thresh, std_net_flux=std_net_flux, include_mangroves=include_mangroves, include_us=include_us, log_note=log_note) # Checks the validity of the model stage arguments. If either one is invalid, the script ends. if (stage_input not in model_stages): uu.exception_log(no_upload, 'Invalid stage selection. Please provide a stage from', model_stages) else: pass # Generates the list of stages to run actual_stages = uu.analysis_stages(model_stages, stage_input, run_through, sensit_type, include_mangroves = include_mangroves, include_us=include_us) uu.print_log("Analysis stages to run:", actual_stages) # Reports how much storage is being used with files uu.check_storage() # Checks whether the sensitivity analysis argument is valid uu.check_sensit_type(sensit_type) # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation. # Does this up front so the user knows before the run begins that information is missing. if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") # Checks if the correct c++ script has been compiled for the pool option selected. # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary. if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type)): uu.print_log("C++ for {} already compiled.".format(sensit_type)) else: uu.exception_log(no_upload, 'Must compile standard {} model C++...'.format(sensit_type)) else: if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log(no_upload, 'Must compile generic emissions C++...') elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log(no_upload, 'Must compile soil_only C++...') else: uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid for gross emissions') # Checks whether the canopy cover argument is valid up front. if 'aggregate' in actual_stages: if thresh < 0 or thresh > 99: uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.') else: pass # If the tile_list argument is an s3 folder, the list of tiles in it is created if 's3://' in tile_id_list: tile_id_list = uu.tile_list_s3(tile_id_list, 'std') uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))), "\n") # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed else: tile_id_list = uu.tile_id_list_check(tile_id_list) # List of output directories and output file name patterns. # The directory list is only used for counting tiles in output folders at the end of the model output_dir_list = [ cn.model_extent_dir, cn.age_cat_IPCC_dir, cn.annual_gain_AGB_IPCC_defaults_dir, cn.annual_gain_BGB_IPCC_defaults_dir, cn.stdev_annual_gain_AGB_IPCC_defaults_dir, cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir, cn.stdev_annual_gain_AGC_all_types_dir, cn.gain_year_count_dir, cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir ] # Prepends the mangrove and US output directories if mangroves are included if 'annual_removals_mangrove' in actual_stages: output_dir_list = [cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir, cn.stdev_annual_gain_AGB_mangrove_dir] + output_dir_list if 'annual_removals_us' in actual_stages: output_dir_list = [cn.annual_gain_AGC_BGC_natrl_forest_US_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir] + output_dir_list # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year if 'carbon_pools' in actual_stages: if 'loss' in carbon_pool_extent: output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir] if '2000' in carbon_pool_extent: output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir] # Adds the biomass_soil output directories or the soil_only output directories depending on the model run if 'gross_emissions' in actual_stages: if emitted_pools == 'biomass_soil': output_dir_list = output_dir_list + [cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, cn.gross_emis_wildfire_biomass_soil_dir, cn.gross_emis_urban_biomass_soil_dir, cn.gross_emis_no_driver_biomass_soil_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, cn.gross_emis_nodes_biomass_soil_dir] else: output_dir_list = output_dir_list + [cn.gross_emis_commod_soil_only_dir, cn.gross_emis_shifting_ag_soil_only_dir, cn.gross_emis_forestry_soil_only_dir, cn.gross_emis_wildfire_soil_only_dir, cn.gross_emis_urban_soil_only_dir, cn.gross_emis_no_driver_soil_only_dir, cn.gross_emis_all_gases_all_drivers_soil_only_dir, cn.gross_emis_co2_only_all_drivers_soil_only_dir, cn.gross_emis_non_co2_all_drivers_soil_only_dir, cn.gross_emis_nodes_soil_only_dir] output_dir_list = output_dir_list + [cn.net_flux_dir] if 'create_supplementary_outputs' in actual_stages: output_dir_list = output_dir_list + \ [cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir, cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir, cn.net_flux_per_pixel_forest_extent_dir] # Creates tiles of annual AGB and BGB gain rate and AGB stdev for mangroves using the standard model # removal function if 'annual_removals_mangrove' in actual_stages: uu.print_log(":::::Creating tiles of annual removals for mangrove") start = datetime.datetime.now() mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_mangrove:", elapsed_time, "\n") # Creates tiles of annual AGC+BGC gain rate and AGC stdev for US-specific removals using the standard model # removal function if 'annual_removals_us' in actual_stages: uu.print_log(":::::Creating tiles of annual removals for US") start = datetime.datetime.now() mp_US_removal_rates(sensit_type, tile_id_list, run_date = run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_us:", elapsed_time, "\n") # Creates model extent tiles if 'model_extent' in actual_stages: uu.print_log(":::::Creating tiles of model extent") start = datetime.datetime.now() mp_model_extent(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for model_extent:", elapsed_time, "\n", "\n") # Creates age category tiles for natural forests if 'forest_age_category_IPCC' in actual_stages: uu.print_log(":::::Creating tiles of forest age categories for IPCC removal rates") start = datetime.datetime.now() mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for forest_age_category_IPCC:", elapsed_time, "\n", "\n") # Creates tiles of annual AGB and BGB gain rates using IPCC Table 4.9 defaults if 'annual_removals_IPCC' in actual_stages: uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults") start = datetime.datetime.now() mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_IPCC:", elapsed_time, "\n", "\n") # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types if 'annual_removals_all_forest_types' in actual_stages: uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates for all forest types") start = datetime.datetime.now() mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:", elapsed_time, "\n", "\n") # Creates tiles of the number of years of removals for all model pixels (across all forest types) if 'gain_year_count' in actual_stages: if not save_intermediates: uu.print_log(":::::Freeing up memory for gain year count creation by deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating tiles of gain year count for all removal pixels") start = datetime.datetime.now() mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = run_date) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gain_year_count:", elapsed_time, "\n", "\n") # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground) if 'gross_removals_all_forest_types' in actual_stages: uu.print_log(":::::Creating gross removals for all forest types combined (above + belowground) tiles'") start = datetime.datetime.now() mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gross_removals_all_forest_types:", elapsed_time, "\n", "\n") # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: if not save_intermediates: uu.print_log(":::::Freeing up memory for carbon pool creation by deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_model_extent))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) tiles_to_delete.extend(glob.glob('*growth_years*tif')) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain_year_count))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating carbon pool tiles") start = datetime.datetime.now() mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date=run_date, no_upload=no_upload, save_intermediates=save_intermediates) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for create_carbon_pools:", elapsed_time, "\n", "\n") # Creates gross emissions tiles by driver, gas, and all emissions combined if 'gross_emissions' in actual_stages: if not save_intermediates: uu.print_log(":::::Freeing up memory for gross emissions creation by deleting unneeded tiles") tiles_to_delete = [] # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") uu.print_log(tiles_to_delete) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating gross emissions tiles") start = datetime.datetime.now() mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for gross_emissions:", elapsed_time, "\n", "\n") # Creates net flux tiles (gross emissions - gross removals) if 'net_flux' in actual_stages: if not save_intermediates: uu.print_log(":::::Freeing up memory for net flux creation by deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_commod_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_shifting_ag_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_forestry_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_wildfire_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_urban_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_no_driver_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_nodes_biomass_soil))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_peat_mask))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_climate_zone))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_burn_year))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating net flux tiles") start = datetime.datetime.now() mp_net_flux(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n", "\n") # Aggregates gross emissions, gross removals, and net flux to coarser resolution. # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux. if 'aggregate' in actual_stages: # aux.xml files need to be deleted because otherwise they'll be included in the aggregation iteration. # They are created by using check_and_delete_if_empty_light() uu.print_log(":::::Deleting any aux.xml files") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*aux.xml')) for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted {0} aux.xml files: {1}".formt(len(tiles_to_delete), tiles_to_delete), "\n") uu.print_log(":::::Creating 4x4 km aggregate maps") start = datetime.datetime.now() mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=std_net_flux, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n", "\n") # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters if 'create_supplementary_outputs' in actual_stages: if not save_intermediates: uu.print_log(":::::Deleting rewindowed tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*rewindow*tif')) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log(":::::Creating supplementary versions of main model outputs (forest extent, per pixel)") start = datetime.datetime.now() mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload) end = datetime.datetime.now() elapsed_time = end - start uu.check_storage() uu.print_log(":::::Processing time for supplementary output raster creation:", elapsed_time, "\n", "\n") # If no_upload flag is activated, tiles on s3 aren't counted if not no_upload: uu.print_log(":::::Counting tiles output to each folder") # Modifies output directory names to make them match those used during the model run. # The tiles in each of these directories and counted and logged. # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Modifying output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) # Changes the date in the output directories. This date was used during the model run. # This replaces the date in constants_and_names. if run_date: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) for output in output_dir_list: tile_count = uu.count_tiles_s3(output) uu.print_log("Total tiles in", output, ": ", tile_count) script_end = datetime.datetime.now() script_elapsed_time = script_end - script_start uu.print_log(":::::Processing time for entire run:", script_elapsed_time, "\n") # If no_upload flag is not activated, output is uploaded if not no_upload: uu.upload_log()
def mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) folder = cn.docker_base_dir # If a full model run is specified, the correct set of tiles for the particular script is listed # If the tile_list argument is an s3 folder, the list of tiles in it is created if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script download_dict = { cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000], cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000], cn.peat_mask_dir: [cn.pattern_peat_mask], cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.drivers_processed_dir: [cn.pattern_drivers], cn.climate_zone_processed_dir: [cn.pattern_climate_zone], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.burn_year_dir: [cn.pattern_burn_year] } # Special loss tiles for the Brazil and Mekong sensitivity analyses if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [ cn.pattern_Mekong_loss_processed ] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Checks the validity of the emitted_pools argument if (emitted_pools not in ['soil_only', 'biomass_soil']): uu.exception_log( no_upload, 'Invalid pool input. Please choose soil_only or biomass_soil.') # Checks if the correct c++ script has been compiled for the pool option selected if emitted_pools == 'biomass_soil': # Output file directories for biomass+soil. Must be in same order as output pattern directories. output_dir_list = [ cn.gross_emis_commod_biomass_soil_dir, cn.gross_emis_shifting_ag_biomass_soil_dir, cn.gross_emis_forestry_biomass_soil_dir, cn.gross_emis_wildfire_biomass_soil_dir, cn.gross_emis_urban_biomass_soil_dir, cn.gross_emis_no_driver_biomass_soil_dir, cn.gross_emis_all_gases_all_drivers_biomass_soil_dir, cn.gross_emis_co2_only_all_drivers_biomass_soil_dir, cn.gross_emis_non_co2_all_drivers_biomass_soil_dir, cn.gross_emis_nodes_biomass_soil_dir ] output_pattern_list = [ cn.pattern_gross_emis_commod_biomass_soil, cn.pattern_gross_emis_shifting_ag_biomass_soil, cn.pattern_gross_emis_forestry_biomass_soil, cn.pattern_gross_emis_wildfire_biomass_soil, cn.pattern_gross_emis_urban_biomass_soil, cn.pattern_gross_emis_no_driver_biomass_soil, cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil, cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil, cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil, cn.pattern_gross_emis_nodes_biomass_soil ] # Some sensitivity analyses have specific gross emissions scripts. # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script. if sensit_type in ['no_shifting_ag', 'convert_to_grassland']: # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)): if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format( cn.c_emis_compile_dst, sensit_type)): uu.print_log( "C++ for {} already compiled.".format(sensit_type)) else: uu.exception_log( no_upload, 'Must compile {} model C++...'.format(sensit_type)) else: if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for generic emissions already compiled.") else: uu.exception_log(no_upload, 'Must compile generic emissions C++...') elif (emitted_pools == 'soil_only') & (sensit_type == 'std'): if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst)): uu.print_log("C++ for soil_only already compiled.") # Output file directories for soil_only. Must be in same order as output pattern directories. output_dir_list = [ cn.gross_emis_commod_soil_only_dir, cn.gross_emis_shifting_ag_soil_only_dir, cn.gross_emis_forestry_soil_only_dir, cn.gross_emis_wildfire_soil_only_dir, cn.gross_emis_urban_soil_only_dir, cn.gross_emis_no_driver_soil_only_dir, cn.gross_emis_all_gases_all_drivers_soil_only_dir, cn.gross_emis_co2_only_all_drivers_soil_only_dir, cn.gross_emis_non_co2_all_drivers_soil_only_dir, cn.gross_emis_nodes_soil_only_dir ] output_pattern_list = [ cn.pattern_gross_emis_commod_soil_only, cn.pattern_gross_emis_shifting_ag_soil_only, cn.pattern_gross_emis_forestry_soil_only, cn.pattern_gross_emis_wildfire_soil_only, cn.pattern_gross_emis_urban_soil_only, cn.pattern_gross_emis_no_driver_soil_only, cn.pattern_gross_emis_all_gases_all_drivers_soil_only, cn.pattern_gross_emis_co2_only_all_drivers_soil_only, cn.pattern_gross_emis_non_co2_all_drivers_soil_only, cn.pattern_gross_emis_nodes_soil_only ] else: uu.exception_log(no_upload, 'Must compile soil_only C++...') else: uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) uu.print_log(output_dir_list) uu.print_log(output_pattern_list) # The C++ code expects certain tiles for every input 10x10. # However, not all Hansen tiles have all of these inputs. # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles. # That way, the C++ script gets all the necessary input files. # If it doesn't get the necessary inputs, it skips that tile. uu.print_log("Making blank tiles for inputs that don't currently exist") # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools pattern_list = [ cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask, cn.pattern_ifl_primary, cn.pattern_drivers, cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year, cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000 ] # textfile that stores the names of the blank tiles that are created for processing. # This will be iterated through to delete the tiles at the end of the script. uu.create_blank_tile_txt() for pattern in pattern_list: pool = multiprocessing.Pool( processes=80) # 60 = 100 GB peak; 80 = XXX GB peak pool.map( partial(uu.make_blank_tile, pattern=pattern, folder=folder, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for pattern in pattern_list: # for tile in tile_id_list: # uu.make_blank_tile(tile, pattern, folder, sensit_type) # Calculates gross emissions for each tile # count/4 uses about 390 GB on a r4.16xlarge spot machine. # processes=18 uses about 440 GB on an r4.16xlarge spot machine. if cn.count == 96: if sensit_type == 'biomass_swap': processes = 15 # 15 processors = XXX GB peak else: processes = 19 # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak else: processes = 9 uu.print_log('Gross emissions max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(calculate_gross_emissions.calc_emissions, emitted_pools=emitted_pools, sensit_type=sensit_type, folder=folder, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder, no_upload) # Print the list of blank created tiles, delete the tiles, and delete their text file uu.list_and_delete_blank_tiles() for i in range(0, len(output_pattern_list)): pattern = output_pattern_list[i] uu.print_log("Adding metadata tags for pattern {}".format(pattern)) if cn.count == 96: processes = 75 # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak else: processes = 9 uu.print_log('Adding metadata tags max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(calculate_gross_emissions.add_metadata_tags, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # for tile_id in tile_id_list: # calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type) # If no_upload flag is not activated, output is uploaded if not no_upload: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=None, no_upload=True): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir, sensit_type=sensit_type) annual_removals_tile_id_list = uu.tile_list_s3( cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type) tile_id_list = list( set(gain_year_count_tile_id_list).intersection( annual_removals_tile_id_list)) uu.print_log( "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:" ) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.annual_gain_BGC_all_types_dir: [cn.pattern_annual_gain_BGC_all_types], cn.gain_year_count_dir: [cn.pattern_gain_year_count] } # List of output directories and output file name patterns output_dir_list = [ cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_dir ] output_pattern_list = [ cn.pattern_cumul_gain_AGCO2_all_types, cn.pattern_cumul_gain_BGCO2_all_types, cn.pattern_cumul_gain_AGCO2_BGCO2_all_types ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Calculates gross removals if cn.count == 96: if sensit_type == 'biomass_swap': processes = 18 else: processes = 22 # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak else: processes = 2 uu.print_log('Gross removals max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial( gross_removals_all_forest_types.gross_removals_all_forest_types, output_pattern_list=output_pattern_list, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload) # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = 55 # 55 processors = 670 GB peak uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # If no_upload flag is not activated, output is uploaded if not no_upload: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None, no_upload = None, save_intermediates = None): os.chdir(cn.docker_base_dir) if (sensit_type != 'std') & (carbon_pool_extent != 'loss'): uu.exception_log(no_upload, "Sensitivity analysis run must use 'loss' extent") # Checks the validity of the carbon_pool_extent argument if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']): uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.") # If a full model run is specified, the correct set of tiles for the particular script is listed. # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant # because there must be loss pixels for emissions-year carbon pools to exist. if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'): # Lists the tiles that have both model extent and loss pixels, both being necessary precursors for emissions model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type) uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:") tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list)) # For runs generating carbon pools in 2000, all model extent tiles are relevant. if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'): tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") output_dir_list = [] output_pattern_list = [] # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated if '2000' in carbon_pool_extent: # List of output directories and output file name patterns output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir, cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir] output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000, cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000] # Files to download for this script download_dict = { cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated if 'loss' in carbon_pool_extent: # List of output directories and output file name patterns output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir] output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year] # Files to download for this script. This has the same items as the download_dict for 2000 pools plus # other tiles. download_dict = { cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed] elif sensit_type == 'Mekong_loss': download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed] else: download_dict[cn.loss_dir] = [cn.pattern_loss] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) else: uu.print_log("Output directory list for standard model:", output_dir_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) if uu.check_aws_creds(): # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir] uu.log_subprocess_output_full(cmd) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent)) if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 16 # 16 processors = XXX GB peak else: processes = 20 # 25 processors > 750 GB peak; 16 = 560 GB peak; # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 690 GB peak (stops at 600, then increases slowly); 21 > 750 GB peak else: # For 2000, or loss & 2000 processes = 15 # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak else: processes = 2 uu.print_log('AGC loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_AGC, sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) else: uu.upload_final_set(output_dir_list[0], output_pattern_list[0]) uu.upload_final_set(output_dir_list[6], output_pattern_list[6]) uu.check_storage() if not save_intermediates: uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles") tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent)) # Creates a single filename pattern to pass to the multiprocessor call if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 30 # 30 processors = XXX GB peak else: processes = 39 # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 690 GB peak; 39 = XXX GB peak else: # For 2000, or loss & 2000 processes = 30 # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak else: processes = 2 uu.print_log('BGC max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) else: uu.upload_final_set(output_dir_list[1], output_pattern_list[1]) uu.upload_final_set(output_dir_list[7], output_pattern_list[7]) uu.check_storage() # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent)) if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 10 # 10 processors = XXX GB peak else: processes = 15 # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); 15 = 700 GB peak else: # For 2000, or loss & 2000 ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced. ### There wouldn't have been enough room for all deadwood and litter otherwise. ### For example, when deadwood and litter generation started getting up to around 50N, I deleted ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S. processes = 16 # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak else: processes = 2 uu.print_log('Deadwood and litter max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, mang_litter_AGB_ratio=mang_litter_AGB_ratio, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood uu.upload_final_set(output_dir_list[3], output_pattern_list[3]) # litter else: uu.upload_final_set(output_dir_list[2], output_pattern_list[2]) # deadwood uu.upload_final_set(output_dir_list[3], output_pattern_list[3]) # litter uu.upload_final_set(output_dir_list[8], output_pattern_list[8]) # deadwood uu.upload_final_set(output_dir_list[9], output_pattern_list[9]) # litter uu.check_storage() if not save_intermediates: uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") for tile_to_delete in tiles_to_delete: os.remove(tile_to_delete) uu.print_log(":::::Deleted unneeded tiles") uu.check_storage() if 'loss' in carbon_pool_extent: uu.print_log("Creating tiles of soil carbon in loss extent") # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. if '2000' not in carbon_pool_extent: pattern = output_pattern_list[4] else: pattern = output_pattern_list[10] if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 36 # 36 processors = XXX GB peak else: processes = 44 # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 44 = XXX GB peak else: # For 2000, or loss & 2000 processes = 12 # 12 processors = XXX GB peak else: processes = 2 uu.print_log('Soil carbon loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: # If pools in 2000 weren't generated, soil carbon in emissions extent is 4. # If pools in 2000 were generated, soil carbon in emissions extent is 10. if '2000' not in carbon_pool_extent: uu.upload_final_set(output_dir_list[4], output_pattern_list[4]) else: uu.upload_final_set(output_dir_list[10], output_pattern_list[10]) uu.check_storage() if '2000' in carbon_pool_extent: uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.") uu.check_storage() # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on. # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine # for total C 2000 calculation. if '2000' in carbon_pool_extent: # Files to download for total C 2000. Previously deleted to save space download_dict = { cn.BGC_2000_dir: [cn.pattern_BGC_2000], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000] } for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Creating tiles of total carbon") if cn.count == 96: # More processors can be used for loss carbon pools than for 2000 carbon pools if carbon_pool_extent == 'loss': if sensit_type == 'biomass_swap': processes = 14 # 14 processors = XXX GB peak else: processes = 19 # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = 660 GB peak; 19 = XXX GB peak else: # For 2000, or loss & 2000 processes = 12 # 12 processors = XXX GB peak else: processes = 2 uu.print_log('Total carbon loss year max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload) # If no_upload flag is not activated, output is uploaded if not no_upload: if carbon_pool_extent in ['loss', '2000']: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) else: uu.upload_final_set(output_dir_list[5], output_pattern_list[5]) uu.upload_final_set(output_dir_list[11], output_pattern_list[11]) uu.check_storage()
def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Files to download for this script. download_dict = { cn.model_extent_dir: [cn.pattern_model_extent], cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe], cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked], cn.annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US], cn.annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_annual_gain_AGC_natrl_forest_young], cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.stdev_annual_gain_AGB_mangrove_dir: [cn.pattern_stdev_annual_gain_AGB_mangrove], cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe], cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked], cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US], cn.stdev_annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young], cn.stdev_annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults] } # List of output directories and output file name patterns output_dir_list = [ cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir, cn.stdev_annual_gain_AGC_all_types_dir ] output_pattern_list = [ cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types, cn.pattern_annual_gain_BGC_all_types, cn.pattern_annual_gain_AGC_BGC_all_types, cn.pattern_stdev_annual_gain_AGC_all_types ] # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html if cn.count == 96: if sensit_type == 'biomass_swap': processes = 13 else: processes = 17 # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak else: processes = 2 uu.print_log('Removal factor processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(annual_gain_rate_AGC_BGC_all_forest_types. annual_gain_rate_AGC_BGC_all_forest_types, output_pattern_list=output_pattern_list, sensit_type=sensit_type, no_upload=no_upload), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type, no_upload) # Checks the gross removals outputs for tiles with no data for output_pattern in output_pattern_list: if cn.count <= 2: # For local tests processes = 1 uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors using light function..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = 55 # 50 processors = XXX GB peak uu.print_log( "Checking for empty tiles of {0} pattern with {1} processors..." .format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # If no_upload flag is not activated, output is uploaded if not no_upload: for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])