Beispiel #1
0
        '-l',
        required=True,
        help=
        'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.'
    )
    parser.add_argument('--run-date',
                        '-d',
                        required=False,
                        help='Date of run. Must be format YYYYMMDD.')
    args = parser.parse_args()
    sensit_type = args.model_type
    tile_id_list = args.tile_id_list
    run_date = args.run_date

    # Disables upload to s3 if no AWS credentials are found in environment
    if not uu.check_aws_creds():
        no_upload = True
        uu.print_log("s3 credentials not found. Uploading to s3 disabled.")

    # Create the output log
    uu.initiate_log(tile_id_list=tile_id_list,
                    sensit_type=sensit_type,
                    run_date=run_date)

    # Checks whether the sensitivity analysis and tile_id_list arguments are valid
    uu.check_sensit_type(sensit_type)
    tile_id_list = uu.tile_id_list_check(tile_id_list)

    mp_annual_gain_rate_mangrove(sensit_type=sensit_type,
                                 tile_id_list=tile_id_list,
                                 run_date=run_date)
Beispiel #2
0
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                      tile_id_list,
                                      run_date=None,
                                      no_upload=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    if uu.check_aws_creds():

        # Table with IPCC Table 4.9 default gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]
        uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="natrl fores gain, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default rates for tile creation")

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                      id_vars=['gainEcoCon'],
                                      value_vars=[
                                          'growth_primary',
                                          'growth_secondary_greater_20',
                                          'growth_secondary_less_20'
                                      ])
    gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    gain_table_con_eco_only = gain_table_cont_eco_age
    gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    gain_table_con_eco_only['value'] = 0
    gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    rate_age_dict = {
        'growth_secondary_less_20': 10000,
        'growth_secondary_greater_20': 20000,
        'growth_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
        {"variable": rate_age_dict})
    gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
        'gainEcoCon'] + gain_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    gain_table_all_combos = pd.concat(
        [gain_table_con_eco_only, gain_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    gain_table_dict = pd.Series(
        gain_table_all_combos.value.values,
        index=gain_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    gain_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in rate_age_dict.items():

        gain_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    gain_table_dict = {
        float(key): value
        for key, value in gain_table_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default standard deviations for tile creation"
        )

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    stdev_table_cont_eco_age = pd.melt(stdev_table_simplified,
                                       id_vars=['gainEcoCon'],
                                       value_vars=[
                                           'stdev_primary',
                                           'stdev_secondary_greater_20',
                                           'stdev_secondary_less_20'
                                       ])
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    stdev_table_con_eco_only = stdev_table_cont_eco_age
    stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    stdev_table_con_eco_only['value'] = 0
    stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    stdev_age_dict = {
        'stdev_secondary_less_20': 10000,
        'stdev_secondary_greater_20': 20000,
        'stdev_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace(
        {"variable": stdev_age_dict})
    stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[
        'gainEcoCon'] + stdev_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    stdev_table_all_combos = pd.concat(
        [stdev_table_con_eco_only, stdev_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    stdev_table_dict = pd.Series(
        stdev_table_all_combos.value.values,
        index=stdev_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    stdev_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in stdev_age_dict.items():

        stdev_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    stdev_table_dict = {
        float(key): value
        for key, value in stdev_table_dict.items()
    }

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 24  # 24 processors = 590 GB peak
        else:
            processes = 30  # 30 processors = 725 GB peak
    else:
        processes = 2
    uu.print_log('Annual gain rate natural forest max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_IPCC_defaults.annual_gain_rate,
                sensit_type=sensit_type,
                gain_table_dict=gain_table_dict,
                stdev_table_dict=stdev_table_dict,
                output_pattern_list=output_pattern_list,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type,
    #       gain_table_dict, stdev_table_dict, output_pattern_list, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_US_removal_rates(sensit_type, tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed],
        cn.FIA_forest_group_processed_dir:
        [cn.pattern_FIA_forest_group_processed],
        cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
        cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the region-group-age AGC+BGC removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_AGC+BGC")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    ### To make the removal factor standard deviation dictionaries

    # Converts gain table from wide to long, so each region-group-age category has its own row
    stdev_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['SD_young', 'SD_middle', 'SD_old'])
    stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000}

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    stdev_table_group_region_age = stdev_table_group_region_by_age.replace(
        {"variable": stdev_dict})
    stdev_table_group_region_age[
        'age_cat'] = stdev_table_group_region_age['variable'] * 10
    stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \
                                                               stdev_table_group_region_age['forest_group_code'] * 100 + \
                                                               stdev_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    stdev_table_group_region_age_dict = pd.Series(
        stdev_table_group_region_age.value.values,
        index=stdev_table_group_region_age.group_region_age_combined).to_dict(
        )
    uu.print_log(stdev_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    stdev_table_group_region = stdev_table_group_region_age.drop(
        stdev_table_group_region_age[
            stdev_table_group_region_age.age_cat != 10000].index)
    stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \
                                                       stdev_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    stdev_table_group_region_dict = pd.Series(
        stdev_table_group_region.value.values,
        index=stdev_table_group_region.group_region_combined).to_dict()
    uu.print_log(stdev_table_group_region_dict)

    if cn.count == 96:
        processes = 68  # 68 processors (only 16 tiles though) = 310 GB peak
    else:
        processes = 24
    uu.print_log('US natural forest AGC+BGC removal rate max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            stdev_table_group_region_age_dict=stdev_table_group_region_age_dict,
            stdev_table_group_region_dict=stdev_table_group_region_dict,
            output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id,
    #       gain_table_group_region_age_dict,
    #       gain_table_group_region_dict,
    #       stdev_table_group_region_age_dict,
    #       stdev_table_group_region_dict,
    #       output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_supplementary_outputs(sensit_type,
                                    tile_id_list,
                                    run_date=None,
                                    no_upload=None):

    os.chdir(cn.docker_base_dir)

    tile_id_list_outer = tile_id_list

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list_outer == 'all':
        # List of tiles to run in the model
        tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list_outer)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list_outer))) +
        "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # List of output directories and output file name patterns.
    # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs.
    # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent.
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir,
        cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir,
        cn.net_flux_per_pixel_forest_extent_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent,
        cn.pattern_net_flux_per_pixel_full_extent,
        cn.pattern_net_flux_forest_extent,
        cn.pattern_net_flux_per_pixel_forest_extent
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating per pixel values
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type,
                                tile_id_list_outer)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list_outer)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type,
                                tile_id_list_outer)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type,
                                tile_id_list_outer)

    uu.print_log("Model outputs to process are:", download_dict)

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed.
        # A new list is named so that tile_id_list stays as the command line argument.
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type)
        else:
            tile_id_list_input = tile_id_list_outer

        uu.print_log(tile_id_list_input)
        uu.print_log("There are {} tiles to process".format(
            str(len(tile_id_list_input))) + "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.print_log("Downloading tiles from", input_dir)
            uu.s3_flexible_download(input_dir, input_pattern,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list_input)

        # Blank list of output patterns, populated below
        output_patterns = []

        # Matches the output patterns with the input pattern.
        # This requires that the output patterns be grouped by input pattern and be in the order described in
        # the comment above.
        if "gross_removals" in input_pattern:
            output_patterns = output_pattern_list[0:3]
        elif "gross_emis" in input_pattern:
            output_patterns = output_pattern_list[3:6]
        elif "net_flux" in input_pattern:
            output_patterns = output_pattern_list[6:9]
        else:
            uu.exception_log(
                no_upload,
                "No output patterns found for input pattern. Please check.")

        uu.print_log("Input pattern:", input_pattern)
        uu.print_log("Output patterns:", output_patterns)

        # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak
        # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak
        if cn.count == 96:
            processes = 18
        else:
            processes = 2
        uu.print_log(
            "Creating derivative outputs for {0} with {1} processors...".
            format(input_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(create_supplementary_outputs.create_supplementary_outputs,
                    input_pattern=input_pattern,
                    output_patterns=output_patterns,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_id_list_input)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list_input:
        #     create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type, no_upload)

        # Checks the two forest extent output tiles created from each input tile for whether there is data in them.
        # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent
        # version may not have pixels in the forest extent version.
        for output_pattern in output_patterns[1:3]:
            if cn.count <= 2:  # For local tests
                processes = 1
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty_light,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()
            else:
                processes = 55  # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #6
0
def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # No point in making gain year count tiles for tiles that don't have annual removals
        tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be
    # changed for a sensitivity analysis. This does not need to change based on what run is being done;
    # this assignment should be true for all sensitivity analyses and the standard model.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.model_extent_dir: [cn.pattern_model_extent]
    }
    
    # Adds the correct loss tile to the download dictionary depending on the model run
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]
    
    
    output_dir_list = [cn.gain_year_count_dir]
    output_pattern_list = [cn.pattern_gain_year_count]


    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # Creates gain year count tiles using only pixels that had only loss
    # count/3 maxes out at about 300 GB
    if cn.count == 96:
        processes = 90   # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only,
                     sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count gain only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    if sensit_type == 'legal_Amazon_loss':
        uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.")
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Creates gain year count tiles using only pixels that had neither loss nor gain pixels
    if cn.count == 96:
        processes = 90   # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count no change pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'legal_Amazon_loss':
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss & gain pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Combines the four above gain year count tiles for each Hansen tile into a single output tile
    if cn.count == 96:
        processes = 84   # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak
    elif cn.count < 4:
        processes = 1
    else:
        processes = int(cn.count/4)
    uu.print_log('Gain year count gain merge all combos max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge,
                     pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()


    # # For single processor use
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_gain_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_no_change")
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain")

        # This is the final output used later in the model
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )
Beispiel #8
0
def mp_model_extent(sensit_type, tile_id_list, run_date=None, no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis
        if sensit_type == 'biomass_swap':
            tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type)
        elif sensit_type == 'legal_Amazon_loss':
            tile_id_list = uu.tile_list_s3(
                cn.Brazil_forest_extent_2000_processed_dir, sensit_type)
        else:
            tile_id_list = uu.create_combined_tile_list(
                cn.WHRC_biomass_2000_unmasked_dir,
                cn.mangrove_biomass_2000_dir, cn.gain_dir, cn.tcd_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
        cn.gain_dir: [cn.pattern_gain],
        cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
    }

    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [
            cn.pattern_Brazil_forest_extent_2000_processed
        ]
    else:
        download_dict[cn.tcd_dir] = [cn.pattern_tcd]

    if sensit_type == 'biomass_swap':
        download_dict[cn.JPL_processed_dir] = [
            cn.pattern_JPL_unmasked_processed
        ]
    else:
        download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [
            cn.pattern_WHRC_biomass_2000_unmasked
        ]

    # List of output directories and output file name patterns
    output_dir_list = [cn.model_extent_dir]
    output_pattern_list = [cn.pattern_model_extent]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 38
        else:
            processes = 42  # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases);
            # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak
    else:
        processes = 3
    uu.print_log('Model extent processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(model_extent.model_extent,
                pattern=pattern,
                sensit_type=sensit_type,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     model_extent.model_extent(tile_id, pattern, sensit_type, no_upload)

    output_pattern = output_pattern_list[0]
    if cn.count <= 2:  # For local tests
        processes = 1
        uu.print_log(
            "Checking for empty tiles of {0} pattern with {1} processors using light function..."
            .format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(uu.check_and_delete_if_empty_light,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()
    else:
        processes = 58  # 50 processors = 620 GB peak; 55 = 640 GB; 58 = 650 GB (continues to increase very slowly several hundred tiles in)
        uu.print_log(
            "Checking for empty tiles of {0} pattern with {1} processors...".
            format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(uu.check_and_delete_if_empty,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Beispiel #9
0
def main ():

    os.chdir(cn.docker_base_dir)

    # List of possible model stages to run (not including mangrove and planted forest stages)
    model_stages = ['all', 'model_extent', 'forest_age_category_IPCC', 'annual_removals_IPCC',
                    'annual_removals_all_forest_types', 'gain_year_count', 'gross_removals_all_forest_types',
                    'carbon_pools', 'gross_emissions',
                    'net_flux', 'aggregate', 'create_supplementary_outputs']


    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(description='Run the full carbon flux model')
    parser.add_argument('--model-type', '-t', required=True, help='{}'.format(cn.model_type_arg_help))
    parser.add_argument('--stages', '-s', required=True,
                        help='Stages for running the flux model. Options are {}'.format(model_stages))
    parser.add_argument('--run-through', '-r', action='store_true',
                        help='If activated, run named stage and all following stages. If not activated, run the selected stage only.')
    parser.add_argument('--run-date', '-d', required=False,
                        help='Date of run. Must be format YYYYMMDD.')
    parser.add_argument('--tile-id-list', '-l', required=True,
                        help='List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.')
    parser.add_argument('--carbon-pool-extent', '-ce', required=False,
                        help='Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss')
    parser.add_argument('--emitted-pools-to-use', '-p', required=False,
                        help='Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.')
    parser.add_argument('--tcd-threshold', '-tcd', required=False,
                        help='Tree cover density threshold above which pixels will be included in the aggregation.')
    parser.add_argument('--std-net-flux-aggreg', '-sagg', required=False,
                        help='The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map')
    parser.add_argument('--mangroves', '-ma', action='store_true',
                        help='Include mangrove removal rate and standard deviation tile creation step (before model extent).')
    parser.add_argument('--us-rates', '-us', action='store_true',
                        help='Include US removal rate and standard deviation tile creation step (before model extent).')
    parser.add_argument('--no-upload', '-nu', action='store_true',
                       help='Disables uploading of outputs to s3')
    parser.add_argument('--save-intermediates', '-si', action='store_true',
                        help='Saves intermediate model outputs rather than deleting them to save storage')
    parser.add_argument('--log-note', '-ln', required=False,
                        help='Note to include in log header about model run.')
    args = parser.parse_args()

    sensit_type = args.model_type
    stage_input = args.stages
    run_through = args.run_through
    run_date = args.run_date
    tile_id_list = args.tile_id_list
    carbon_pool_extent = args.carbon_pool_extent
    emitted_pools = args.emitted_pools_to_use
    thresh = args.tcd_threshold
    if thresh is not None:
        thresh = int(thresh)
    std_net_flux = args.std_net_flux_aggreg
    include_mangroves = args.mangroves
    include_us = args.us_rates
    no_upload = args.no_upload
    save_intermediates = args.save_intermediates
    log_note = args.log_note

    # Start time for script
    script_start = datetime.datetime.now()

    # Disables upload to s3 if no AWS credentials are found in environment
    if not uu.check_aws_creds():
        no_upload = True
        uu.print_log("s3 credentials not found. Uploading to s3 disabled.")

    # Create the output log
    uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date, no_upload=no_upload,
                    save_intermediates=save_intermediates,
                    stage_input=stage_input, run_through=run_through, carbon_pool_extent=carbon_pool_extent,
                    emitted_pools=emitted_pools, thresh=thresh, std_net_flux=std_net_flux,
                    include_mangroves=include_mangroves, include_us=include_us, log_note=log_note)


    # Checks the validity of the model stage arguments. If either one is invalid, the script ends.
    if (stage_input not in model_stages):
        uu.exception_log(no_upload, 'Invalid stage selection. Please provide a stage from', model_stages)
    else:
        pass

    # Generates the list of stages to run
    actual_stages = uu.analysis_stages(model_stages, stage_input, run_through, sensit_type,
                                       include_mangroves = include_mangroves, include_us=include_us)
    uu.print_log("Analysis stages to run:", actual_stages)

    # Reports how much storage is being used with files
    uu.check_storage()

    # Checks whether the sensitivity analysis argument is valid
    uu.check_sensit_type(sensit_type)

    # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation.
    # Does this up front so the user knows before the run begins that information is missing.
    if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")

    # Checks if the correct c++ script has been compiled for the pool option selected.
    # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary.
    if 'gross_emissions' in actual_stages:

        if emitted_pools == 'biomass_soil':
            # Some sensitivity analyses have specific gross emissions scripts.
            # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
            if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
                if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type)):
                    uu.print_log("C++ for {} already compiled.".format(sensit_type))
                else:
                    uu.exception_log(no_upload, 'Must compile standard {} model C++...'.format(sensit_type))
            else:
                if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(cn.c_emis_compile_dst)):
                    uu.print_log("C++ for generic emissions already compiled.")
                else:
                    uu.exception_log(no_upload, 'Must compile generic emissions C++...')

        elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
            if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log(no_upload, 'Must compile soil_only C++...')

        else:
            uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid for gross emissions')

    # Checks whether the canopy cover argument is valid up front.
    if 'aggregate' in actual_stages:
        if thresh < 0 or thresh > 99:
            uu.exception_log(no_upload, 'Invalid tcd. Please provide an integer between 0 and 99.')
        else:
            pass

    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if 's3://' in tile_id_list:
        tile_id_list = uu.tile_list_s3(tile_id_list, 'std')
        uu.print_log(tile_id_list)
        uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))), "\n")
    # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed
    else:
        tile_id_list = uu.tile_id_list_check(tile_id_list)


    # List of output directories and output file name patterns.
    # The directory list is only used for counting tiles in output folders at the end of the model
    output_dir_list = [
        cn.model_extent_dir,
        cn.age_cat_IPCC_dir,
        cn.annual_gain_AGB_IPCC_defaults_dir, cn.annual_gain_BGB_IPCC_defaults_dir, cn.stdev_annual_gain_AGB_IPCC_defaults_dir,
        cn.removal_forest_type_dir,
        cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir,
        cn.annual_gain_AGC_BGC_all_types_dir, cn.stdev_annual_gain_AGC_all_types_dir,
        cn.gain_year_count_dir,
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]

    # Prepends the mangrove and US output directories if mangroves are included
    if 'annual_removals_mangrove' in actual_stages:

        output_dir_list = [cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
                           cn.stdev_annual_gain_AGB_mangrove_dir] + output_dir_list

    if 'annual_removals_us' in actual_stages:

        output_dir_list = [cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
                           cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir] + output_dir_list

    # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year
    if 'carbon_pools' in actual_stages:
        if 'loss' in carbon_pool_extent:
            output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir,
                                                 cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir,
                                                 cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]

        if '2000' in carbon_pool_extent:
            output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir,
                                                 cn.deadwood_2000_dir, cn.litter_2000_dir,
                                                 cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]

    # Adds the biomass_soil output directories or the soil_only output directories depending on the model run
    if 'gross_emissions' in actual_stages:
        if emitted_pools == 'biomass_soil':
            output_dir_list = output_dir_list + [cn.gross_emis_commod_biomass_soil_dir,
                               cn.gross_emis_shifting_ag_biomass_soil_dir,
                               cn.gross_emis_forestry_biomass_soil_dir,
                               cn.gross_emis_wildfire_biomass_soil_dir,
                               cn.gross_emis_urban_biomass_soil_dir,
                               cn.gross_emis_no_driver_biomass_soil_dir,
                               cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
                               cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
                               cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
                               cn.gross_emis_nodes_biomass_soil_dir]

        else:
            output_dir_list = output_dir_list + [cn.gross_emis_commod_soil_only_dir,
                                   cn.gross_emis_shifting_ag_soil_only_dir,
                                   cn.gross_emis_forestry_soil_only_dir,
                                   cn.gross_emis_wildfire_soil_only_dir,
                                   cn.gross_emis_urban_soil_only_dir,
                                   cn.gross_emis_no_driver_soil_only_dir,
                                   cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                                   cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                                   cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                                   cn.gross_emis_nodes_soil_only_dir]

    output_dir_list = output_dir_list + [cn.net_flux_dir]

    if 'create_supplementary_outputs' in actual_stages:
        output_dir_list = output_dir_list + \
                        [cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir,
                        cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir,
                        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir,
                        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir,
                        cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir,
                        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir,
                        cn.net_flux_per_pixel_full_extent_dir,
                        cn.net_flux_forest_extent_dir,
                        cn.net_flux_per_pixel_forest_extent_dir]


    # Creates tiles of annual AGB and BGB gain rate and AGB stdev for mangroves using the standard model
    # removal function
    if 'annual_removals_mangrove' in actual_stages:

        uu.print_log(":::::Creating tiles of annual removals for mangrove")
        start = datetime.datetime.now()

        mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date = run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_mangrove:", elapsed_time, "\n")


    # Creates tiles of annual AGC+BGC gain rate and AGC stdev for US-specific removals using the standard model
    # removal function
    if 'annual_removals_us' in actual_stages:

        uu.print_log(":::::Creating tiles of annual removals for US")
        start = datetime.datetime.now()

        mp_US_removal_rates(sensit_type, tile_id_list, run_date = run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_us:", elapsed_time, "\n")


    # Creates model extent tiles
    if 'model_extent' in actual_stages:

        uu.print_log(":::::Creating tiles of model extent")
        start = datetime.datetime.now()

        mp_model_extent(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for model_extent:", elapsed_time, "\n", "\n")


    # Creates age category tiles for natural forests
    if 'forest_age_category_IPCC' in actual_stages:

        uu.print_log(":::::Creating tiles of forest age categories for IPCC removal rates")
        start = datetime.datetime.now()

        mp_forest_age_category_IPCC(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for forest_age_category_IPCC:", elapsed_time, "\n", "\n")


    # Creates tiles of annual AGB and BGB gain rates using IPCC Table 4.9 defaults
    if 'annual_removals_IPCC' in actual_stages:

        uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults")
        start = datetime.datetime.now()

        mp_annual_gain_rate_IPCC_defaults(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_IPCC:", elapsed_time, "\n", "\n")


    # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types
    if 'annual_removals_all_forest_types' in actual_stages:
        uu.print_log(":::::Creating tiles of annual aboveground and belowground removal rates for all forest types")
        start = datetime.datetime.now()

        mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:", elapsed_time, "\n", "\n")


    # Creates tiles of the number of years of removals for all model pixels (across all forest types)
    if 'gain_year_count' in actual_stages:

        if not save_intermediates:

            uu.print_log(":::::Freeing up memory for gain year count creation by deleting unneeded tiles")
            tiles_to_delete = []
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_natrl_forest_young)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_IPCC_defaults)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_all_types)))
            uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

            for tile_to_delete in tiles_to_delete:
                os.remove(tile_to_delete)
            uu.print_log(":::::Deleted unneeded tiles")

        uu.check_storage()

        uu.print_log(":::::Creating tiles of gain year count for all removal pixels")
        start = datetime.datetime.now()

        mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gain_year_count:", elapsed_time, "\n", "\n")


    # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground)
    if 'gross_removals_all_forest_types' in actual_stages:

        uu.print_log(":::::Creating gross removals for all forest types combined (above + belowground) tiles'")
        start = datetime.datetime.now()

        mp_gross_removals_all_forest_types(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gross_removals_all_forest_types:", elapsed_time, "\n", "\n")


    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        if not save_intermediates:

            uu.print_log(":::::Freeing up memory for carbon pool creation by deleting unneeded tiles")
            tiles_to_delete = []
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_model_extent)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types)))
            tiles_to_delete.extend(glob.glob('*growth_years*tif'))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain_year_count)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_BGCO2_all_types)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_natrl_forest_young)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_IPCC_defaults)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_all_types)))
            uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

            for tile_to_delete in tiles_to_delete:
                os.remove(tile_to_delete)
            uu.print_log(":::::Deleted unneeded tiles")

        uu.check_storage()

        uu.print_log(":::::Creating carbon pool tiles")
        start = datetime.datetime.now()

        mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date=run_date, no_upload=no_upload,
                               save_intermediates=save_intermediates)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for create_carbon_pools:", elapsed_time, "\n", "\n")


    # Creates gross emissions tiles by driver, gas, and all emissions combined
    if 'gross_emissions' in actual_stages:

        if not save_intermediates:

            uu.print_log(":::::Freeing up memory for gross emissions creation by deleting unneeded tiles")
            tiles_to_delete = []
            # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
            uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

            uu.print_log(tiles_to_delete)

            for tile_to_delete in tiles_to_delete:
                os.remove(tile_to_delete)
            uu.print_log(":::::Deleted unneeded tiles")

        uu.check_storage()

        uu.print_log(":::::Creating gross emissions tiles")
        start = datetime.datetime.now()

        mp_calculate_gross_emissions(sensit_type, tile_id_list, emitted_pools, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gross_emissions:", elapsed_time, "\n", "\n")


    # Creates net flux tiles (gross emissions - gross removals)
    if 'net_flux' in actual_stages:

        if not save_intermediates:

            uu.print_log(":::::Freeing up memory for net flux creation by deleting unneeded tiles")
            tiles_to_delete = []
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_commod_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_shifting_ag_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_forestry_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_wildfire_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_urban_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_no_driver_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gross_emis_nodes_biomass_soil)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_peat_mask)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_climate_zone)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_burn_year)))
            tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
            uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

            for tile_to_delete in tiles_to_delete:
                os.remove(tile_to_delete)
            uu.print_log(":::::Deleted unneeded tiles")

        uu.check_storage()

        uu.print_log(":::::Creating net flux tiles")
        start = datetime.datetime.now()

        mp_net_flux(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n", "\n")


    # Aggregates gross emissions, gross removals, and net flux to coarser resolution.
    # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux.
    if 'aggregate' in actual_stages:

        # aux.xml files need to be deleted because otherwise they'll be included in the aggregation iteration.
        # They are created by using check_and_delete_if_empty_light()
        uu.print_log(":::::Deleting any aux.xml files")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*aux.xml'))

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted {0} aux.xml files: {1}".formt(len(tiles_to_delete), tiles_to_delete), "\n")


        uu.print_log(":::::Creating 4x4 km aggregate maps")
        start = datetime.datetime.now()

        mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux=std_net_flux,
                                     run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n", "\n")


    # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters
    if 'create_supplementary_outputs' in actual_stages:

        if not save_intermediates:

            uu.print_log(":::::Deleting rewindowed tiles")
            tiles_to_delete = []
            tiles_to_delete.extend(glob.glob('*rewindow*tif'))
            uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

            for tile_to_delete in tiles_to_delete:
                os.remove(tile_to_delete)
            uu.print_log(":::::Deleted unneeded tiles")

        uu.check_storage()

        uu.print_log(":::::Creating supplementary versions of main model outputs (forest extent, per pixel)")
        start = datetime.datetime.now()

        mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=run_date, no_upload=no_upload)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for supplementary output raster creation:", elapsed_time, "\n", "\n")


    # If no_upload flag is activated, tiles on s3 aren't counted
    if not no_upload:

        uu.print_log(":::::Counting tiles output to each folder")

        # Modifies output directory names to make them match those used during the model run.
        # The tiles in each of these directories and counted and logged.
        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log("Modifying output directory and file name pattern based on sensitivity analysis")
            output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

        # Changes the date in the output directories. This date was used during the model run.
        # This replaces the date in constants_and_names.
        if run_date:
            output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

        for output in output_dir_list:

            tile_count = uu.count_tiles_s3(output)
            uu.print_log("Total tiles in", output, ": ", tile_count)


    script_end = datetime.datetime.now()
    script_elapsed_time = script_end - script_start
    uu.print_log(":::::Processing time for entire run:", script_elapsed_time, "\n")

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_log()
def mp_calculate_gross_emissions(sensit_type,
                                 tile_id_list,
                                 emitted_pools,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    folder = cn.docker_base_dir

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year],
        cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year],
        cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000],
        cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000],
        cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000],
        cn.peat_mask_dir: [cn.pattern_peat_mask],
        cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary],
        cn.planted_forest_type_unmasked_dir:
        [cn.pattern_planted_forest_type_unmasked],
        cn.drivers_processed_dir: [cn.pattern_drivers],
        cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        cn.burn_year_dir: [cn.pattern_burn_year]
    }

    # Special loss tiles for the Brazil and Mekong sensitivity analyses
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [
            cn.pattern_Brazil_annual_loss_processed
        ]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [
            cn.pattern_Mekong_loss_processed
        ]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Checks the validity of the emitted_pools argument
    if (emitted_pools not in ['soil_only', 'biomass_soil']):
        uu.exception_log(
            no_upload,
            'Invalid pool input. Please choose soil_only or biomass_soil.')

    # Checks if the correct c++ script has been compiled for the pool option selected
    if emitted_pools == 'biomass_soil':

        # Output file directories for biomass+soil. Must be in same order as output pattern directories.
        output_dir_list = [
            cn.gross_emis_commod_biomass_soil_dir,
            cn.gross_emis_shifting_ag_biomass_soil_dir,
            cn.gross_emis_forestry_biomass_soil_dir,
            cn.gross_emis_wildfire_biomass_soil_dir,
            cn.gross_emis_urban_biomass_soil_dir,
            cn.gross_emis_no_driver_biomass_soil_dir,
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
            cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
            cn.gross_emis_nodes_biomass_soil_dir
        ]

        output_pattern_list = [
            cn.pattern_gross_emis_commod_biomass_soil,
            cn.pattern_gross_emis_shifting_ag_biomass_soil,
            cn.pattern_gross_emis_forestry_biomass_soil,
            cn.pattern_gross_emis_wildfire_biomass_soil,
            cn.pattern_gross_emis_urban_biomass_soil,
            cn.pattern_gross_emis_no_driver_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
            cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
            cn.pattern_gross_emis_nodes_biomass_soil
        ]

        # Some sensitivity analyses have specific gross emissions scripts.
        # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
        if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
            # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)):
            if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                    cn.c_emis_compile_dst, sensit_type)):
                uu.print_log(
                    "C++ for {} already compiled.".format(sensit_type))
            else:
                uu.exception_log(
                    no_upload,
                    'Must compile {} model C++...'.format(sensit_type))
        else:
            if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log(no_upload,
                                 'Must compile generic emissions C++...')

    elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst)):
            uu.print_log("C++ for soil_only already compiled.")

            # Output file directories for soil_only. Must be in same order as output pattern directories.
            output_dir_list = [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

            output_pattern_list = [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

        else:
            uu.exception_log(no_upload, 'Must compile soil_only C++...')

    else:
        uu.exception_log(no_upload,
                         'Pool and/or sensitivity analysis option not valid')

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    uu.print_log(output_dir_list)
    uu.print_log(output_pattern_list)

    # The C++ code expects certain tiles for every input 10x10.
    # However, not all Hansen tiles have all of these inputs.
    # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles.
    # That way, the C++ script gets all the necessary input files.
    # If it doesn't get the necessary inputs, it skips that tile.
    uu.print_log("Making blank tiles for inputs that don't currently exist")
    # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools
    pattern_list = [
        cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask,
        cn.pattern_ifl_primary, cn.pattern_drivers,
        cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year,
        cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000
    ]

    # textfile that stores the names of the blank tiles that are created for processing.
    # This will be iterated through to delete the tiles at the end of the script.
    uu.create_blank_tile_txt()

    for pattern in pattern_list:
        pool = multiprocessing.Pool(
            processes=80)  # 60 = 100 GB peak; 80 =  XXX GB peak
        pool.map(
            partial(uu.make_blank_tile,
                    pattern=pattern,
                    folder=folder,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

    # # For single processor use
    # for pattern in pattern_list:
    #     for tile in tile_id_list:
    #         uu.make_blank_tile(tile, pattern, folder, sensit_type)

    # Calculates gross emissions for each tile
    # count/4 uses about 390 GB on a r4.16xlarge spot machine.
    # processes=18 uses about 440 GB on an r4.16xlarge spot machine.
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 15  # 15 processors = XXX GB peak
        else:
            processes = 19  # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak
    else:
        processes = 9
    uu.print_log('Gross emissions max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(calculate_gross_emissions.calc_emissions,
                emitted_pools=emitted_pools,
                sensit_type=sensit_type,
                folder=folder,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #       calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder, no_upload)

    # Print the list of blank created tiles, delete the tiles, and delete their text file
    uu.list_and_delete_blank_tiles()

    for i in range(0, len(output_pattern_list)):
        pattern = output_pattern_list[i]

        uu.print_log("Adding metadata tags for pattern {}".format(pattern))

        if cn.count == 96:
            processes = 75  # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(calculate_gross_emissions.add_metadata_tags,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #11
0
def mp_gross_removals_all_forest_types(sensit_type,
                                       tile_id_list,
                                       run_date=None,
                                       no_upload=True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)
        gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir,
                                                       sensit_type=sensit_type)
        annual_removals_tile_id_list = uu.tile_list_s3(
            cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type)
        tile_id_list = list(
            set(gain_year_count_tile_id_list).intersection(
                annual_removals_tile_id_list))
        uu.print_log(
            "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:"
        )

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.annual_gain_BGC_all_types_dir:
        [cn.pattern_annual_gain_BGC_all_types],
        cn.gain_year_count_dir: [cn.pattern_gain_year_count]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Calculates gross removals
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 18
        else:
            processes = 22  # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak
    else:
        processes = 2
    uu.print_log('Gross removals max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            gross_removals_all_forest_types.gross_removals_all_forest_types,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type,
            no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload)

    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 55 processors = 670 GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #12
0
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None, no_upload = None,
                           save_intermediates = None):

    os.chdir(cn.docker_base_dir)

    if (sensit_type != 'std') & (carbon_pool_extent != 'loss'):
        uu.exception_log(no_upload, "Sensitivity analysis run must use 'loss' extent")

    # Checks the validity of the carbon_pool_extent argument
    if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log(no_upload, "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")


    # If a full model run is specified, the correct set of tiles for the particular script is listed.
    # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant
    # because there must be loss pixels for emissions-year carbon pools to exist.
    if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'):
        # Lists the tiles that have both model extent and loss pixels, both being necessary precursors for emissions
        model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)
        loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type)
        uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:")
        tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list))

    # For runs generating carbon pools in 2000, all model extent tiles are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'):
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)


    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    output_dir_list = []
    output_pattern_list = []

    # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated
    if '2000' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                           cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000,
                               cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000]

        # Files to download for this script
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated
    if 'loss' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir,
                           cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000,
                               cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year]

        # Files to download for this script. This has the same items as the download_dict for 2000 pools plus
        # other tiles.
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
            cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types]
       }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]


    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)
    else:
        uu.print_log("Output directory list for standard model:", output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    if uu.check_aws_creds():

        # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
        cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir]
        uu.log_subprocess_output_full(cmd)

    pd.options.mode.chained_assignment = None

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first')

    mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                         cn.below_to_above_trop_dry_mang,
                                                                                         cn.below_to_above_trop_wet_mang,
                                                                                         cn.below_to_above_subtrop_mang)

    mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                              cn.deadwood_to_above_trop_dry_mang,
                                                                                              cn.deadwood_to_above_trop_wet_mang,
                                                                                              cn.deadwood_to_above_subtrop_mang)

    mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                            cn.litter_to_above_trop_dry_mang,
                                                                                            cn.litter_to_above_trop_wet_mang,
                                                                                            cn.litter_to_above_subtrop_mang)

    uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 16  # 16 processors = XXX GB peak
            else:
                processes = 20  # 25 processors > 750 GB peak; 16 = 560 GB peak;
                # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 690 GB peak (stops at 600, then increases slowly); 21 > 750 GB peak
        else: # For 2000, or loss & 2000
            processes = 15  # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak
    else:
        processes = 2
    uu.print_log('AGC loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_AGC,
                     sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        if carbon_pool_extent in ['loss', '2000']:
            uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
        else:
            uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
            uu.upload_final_set(output_dir_list[6], output_pattern_list[6])

    uu.check_storage()

    if not save_intermediates:

        uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles")
        tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent))
    # Creates a single filename pattern to pass to the multiprocessor call
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 30  # 30 processors = XXX GB peak
            else:
                processes = 39  # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 690 GB peak; 39 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 30  # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak
    else:
        processes = 2
    uu.print_log('BGC max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                     carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        if carbon_pool_extent in ['loss', '2000']:
            uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
        else:
            uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
            uu.upload_final_set(output_dir_list[7], output_pattern_list[7])

    uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:
        uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000)))

        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 15  # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 685 GB peak (stops around 600, then increases very very slowly); 15 = 700 GB peak
        else: # For 2000, or loss & 2000
            ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced.
            ### There wouldn't have been enough room for all deadwood and litter otherwise.
            ### For example, when deadwood and litter generation started getting up to around 50N, I deleted
            ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S.
            processes = 16  # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Deadwood and litter max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                carbon_pool_extent=carbon_pool_extent,
                sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        if carbon_pool_extent in ['loss', '2000']:
            uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
            uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
        else:
            uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
            uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
            uu.upload_final_set(output_dir_list[8], output_pattern_list[8])  # deadwood
            uu.upload_final_set(output_dir_list[9], output_pattern_list[9])  # litter

    uu.check_storage()

    if not save_intermediates:

        uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    if 'loss' in carbon_pool_extent:

        uu.print_log("Creating tiles of soil carbon in loss extent")

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            pattern = output_pattern_list[4]
        else:
            pattern = output_pattern_list[10]

        if cn.count == 96:
            # More processors can be used for loss carbon pools than for 2000 carbon pools
            if carbon_pool_extent == 'loss':
                if sensit_type == 'biomass_swap':
                    processes = 36  # 36 processors = XXX GB peak
                else:
                    processes = 44  # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = 640 GB peak; 44 = XXX GB peak
            else: # For 2000, or loss & 2000
                processes = 12  # 12 processors = XXX GB peak
        else:
            processes = 2
        uu.print_log('Soil carbon loss year max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
            # If pools in 2000 were generated, soil carbon in emissions extent is 10.
            if '2000' not in carbon_pool_extent:
                uu.upload_final_set(output_dir_list[4], output_pattern_list[4])
            else:
                uu.upload_final_set(output_dir_list[10], output_pattern_list[10])

        uu.check_storage()

    if '2000' in carbon_pool_extent:
        uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.")
        uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:

        # Files to download for total C 2000. Previously deleted to save space
        download_dict = {
            cn.BGC_2000_dir: [cn.pattern_BGC_2000],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000]
        }

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    uu.print_log("Creating tiles of total carbon")
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 14  # 14 processors = XXX GB peak
            else:
                processes = 19  # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = 660 GB peak; 19 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 12  # 12 processors = XXX GB peak
    else:
        processes = 2
    uu.print_log('Total carbon loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        if carbon_pool_extent in ['loss', '2000']:
            uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
        else:
            uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
            uu.upload_final_set(output_dir_list[11], output_pattern_list[11])

        uu.check_storage()
Beispiel #13
0
def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                 tile_id_list,
                                                 run_date=None,
                                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.model_extent_dir: [cn.pattern_model_extent],
        cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove],
        cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove],
        cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US],
        cn.annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_annual_gain_AGC_natrl_forest_young],
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults],
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        cn.stdev_annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young],
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir,
        cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 13
        else:
            processes = 17  # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = >680 GB peak
    else:
        processes = 2
    uu.print_log('Removal factor processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_AGC_BGC_all_forest_types.
                annual_gain_rate_AGC_BGC_all_forest_types,
                output_pattern_list=output_pattern_list,
                sensit_type=sensit_type,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type, no_upload)

    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 50 processors = XXX GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])