def mp_continent_ecozone_tiles(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.pattern_WHRC_biomass_2000_non_mang_non_planted,
            cn.mangrove_biomass_2000_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped
    uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std')

    # Unzips ecozone shapefile
    cmd = ['unzip', cn.cont_eco_zip]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # List of output directories and output file name patterns
    output_dir_list = [cn.cont_eco_raw_dir, cn.cont_eco_dir]
    output_pattern_list = [
        cn.pattern_cont_eco_raw, cn.pattern_cont_eco_processed
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # For multiprocessor use
    processes = int(cn.count / 4)
    uu.print_log('Continent-ecozone tile creation max processors=', processes)
    pool.map(continent_ecozone_tiles.create_continent_ecozone_tiles,
             tile_id_list)

    # Uploads the continent-ecozone tile to s3 before the codes are expanded to pixels in 1024x1024 windows that don't have codes.
    # These are not used for the model. They are for reference and completeness.
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 2
0
def mp_create_supplementary_outputs(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list_outer = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list_outer)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list_outer))) +
        "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # List of output directories and output file name patterns.
    # Outputs must be in the same order as the download dictionary above, and then follow the same order for all outputs.
    # Currently, it's: per pixel full extent, per hectare forest extent, per pixel forest extent.
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_forest_extent_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_forest_extent_dir, cn.
        gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent_dir,
        cn.net_flux_per_pixel_full_extent_dir, cn.net_flux_forest_extent_dir,
        cn.net_flux_per_pixel_forest_extent_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_full_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_forest_extent,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_full_extent,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_forest_extent,
        cn.
        pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_forest_extent,
        cn.pattern_net_flux_per_pixel_full_extent,
        cn.pattern_net_flux_forest_extent,
        cn.pattern_net_flux_per_pixel_forest_extent
    ]

    # Pixel area tiles-- necessary for calculating per pixel values
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, sensit_type,
                            tile_id_list_outer)
    # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for masking to forest extent
    uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                            sensit_type, tile_id_list_outer)
    uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain, cn.docker_base_dir,
                            sensit_type, tile_id_list_outer)
    uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                            cn.pattern_mangrove_biomass_2000,
                            cn.docker_base_dir, sensit_type,
                            tile_id_list_outer)

    uu.print_log("Model outputs to process are:", download_dict)

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed.
        # A new list is named so that tile_id_list stays as the command line argument.
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list_input = uu.tile_list_s3(input_dir, sensit_type)
        else:
            tile_id_list_input = tile_id_list

        uu.print_log(tile_id_list_input)
        uu.print_log("There are {} tiles to process".format(
            str(len(tile_id_list_input))) + "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list_input)

        # Blank list of output patterns, populated below
        output_patterns = []

        # Matches the output patterns with the input pattern.
        # This requires that the output patterns be grouped by input pattern and be in the order described in
        # the comment above.
        if "gross_removals" in input_pattern:
            output_patterns = output_pattern_list[0:3]
        elif "gross_emis" in input_pattern:
            output_patterns = output_pattern_list[3:6]
        elif "net_flux" in input_pattern:
            output_patterns = output_pattern_list[6:9]
        else:
            uu.exception_log(
                "No output patterns found for input pattern. Please check.")

        uu.print_log("Input pattern:", input_pattern)
        uu.print_log("Output patterns:", output_patterns)

        # Gross removals: 20 processors = >740 GB peak; 15 = 570 GB peak; 17 = 660 GB peak; 18 = 670 GB peak
        # Gross emissions: 17 processors = 660 GB peak; 18 = 710 GB peak
        if cn.count == 96:
            processes = 18
        else:
            processes = 2
        uu.print_log(
            "Creating derivative outputs for {0} with {1} processors...".
            format(input_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(create_supplementary_outputs.create_supplementary_outputs,
                    input_pattern=input_pattern,
                    output_patterns=output_patterns,
                    sensit_type=sensit_type), tile_id_list_input)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list_input:
        #     create_supplementary_outputs.create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit_type)

        # Checks the two forest extent output tiles created from each input tile for whether there is data in them.
        # Because the extent is restricted in the forest extent pixels, some tiles with pixels in the full extent
        # version may not have pixels in the forest extent version.
        for output_pattern in output_patterns[1:3]:
            if cn.count <= 2:  # For local tests
                processes = 1
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty_light,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()
            else:
                processes = 55  # 50 processors = 560 GB peak for gross removals; 55 = XXX GB peak
                uu.print_log(
                    "Checking for empty tiles of {0} pattern with {1} processors..."
                    .format(output_pattern, processes))
                pool = multiprocessing.Pool(processes)
                pool.map(
                    partial(uu.check_and_delete_if_empty,
                            output_pattern=output_pattern), tile_id_list_input)
                pool.close()
                pool.join()

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 3
0
def mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                      tile_id_list,
                                      run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # Table with IPCC Table 4.9 default gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="natrl fores gain, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default rates for tile creation")

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                      id_vars=['gainEcoCon'],
                                      value_vars=[
                                          'growth_primary',
                                          'growth_secondary_greater_20',
                                          'growth_secondary_less_20'
                                      ])
    gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    gain_table_con_eco_only = gain_table_cont_eco_age
    gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    gain_table_con_eco_only['value'] = 0
    gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    rate_age_dict = {
        'growth_secondary_less_20': 10000,
        'growth_secondary_greater_20': 20000,
        'growth_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
        {"variable": rate_age_dict})
    gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
        'gainEcoCon'] + gain_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    gain_table_all_combos = pd.concat(
        [gain_table_con_eco_only, gain_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    gain_table_dict = pd.Series(
        gain_table_all_combos.value.values,
        index=gain_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    gain_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in rate_age_dict.items():

        gain_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    gain_table_dict = {
        float(key): value
        for key, value in gain_table_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default standard deviations for tile creation"
        )

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    stdev_table_cont_eco_age = pd.melt(stdev_table_simplified,
                                       id_vars=['gainEcoCon'],
                                       value_vars=[
                                           'stdev_primary',
                                           'stdev_secondary_greater_20',
                                           'stdev_secondary_less_20'
                                       ])
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    stdev_table_con_eco_only = stdev_table_cont_eco_age
    stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    stdev_table_con_eco_only['value'] = 0
    stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    stdev_age_dict = {
        'stdev_secondary_less_20': 10000,
        'stdev_secondary_greater_20': 20000,
        'stdev_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace(
        {"variable": stdev_age_dict})
    stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[
        'gainEcoCon'] + stdev_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    stdev_table_all_combos = pd.concat(
        [stdev_table_con_eco_only, stdev_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    stdev_table_dict = pd.Series(
        stdev_table_all_combos.value.values,
        index=stdev_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    stdev_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in stdev_age_dict.items():

        stdev_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    stdev_table_dict = {
        float(key): value
        for key, value in stdev_table_dict.items()
    }

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 24  # 24 processors = 590 GB peak
        else:
            processes = 30  # 30 processors = 725 GB peak
    else:
        processes = 2
    uu.print_log('Annual gain rate natural forest max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_IPCC_defaults.annual_gain_rate,
                sensit_type=sensit_type,
                gain_table_dict=gain_table_dict,
                stdev_table_dict=stdev_table_dict,
                output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type,
    #       gain_table_dict, stdev_table_dict, output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_peatland_processing(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.peat_mask_dir]
    output_pattern_list = [cn.pattern_peat_mask]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Download SoilGrids250 most probable soil class rasters.
    # There are 459 tiles and it takes about 20 minutes to download them
    cmd = [
        'wget', '--recursive', '--no-parent', '-nH', '--cut-dirs=7',
        '--accept', '*.geotiff', '{}'.format(cn.soilgrids250_peat_url)
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making SoilGrids250 most likely soil class vrt...")
    check_call('gdalbuildvrt most_likely_soil_class.vrt *{}*'.format(
        cn.pattern_soilgrids_most_likely_class),
               shell=True)
    uu.print_log("Done making SoilGrids250 most likely soil class vrt")

    # Downloads peat layers
    uu.s3_file_download(
        os.path.join(cn.peat_unprocessed_dir, cn.cifor_peat_file),
        cn.docker_base_dir, sensit_type)
    uu.s3_file_download(
        os.path.join(cn.peat_unprocessed_dir, cn.jukka_peat_zip),
        cn.docker_base_dir, sensit_type)

    # Unzips the Jukka peat shapefile (IDN and MYS)
    cmd = ['unzip', '-o', '-j', cn.jukka_peat_zip]
    uu.log_subprocess_output_full(cmd)

    jukka_tif = 'jukka_peat.tif'

    # Converts the Jukka peat shapefile to a raster
    uu.print_log('Rasterizing jukka peat...')
    cmd = [
        'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr',
        '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot',
        'Byte', '-a_nodata', '0', cn.jukka_peat_shp, jukka_tif
    ]
    uu.log_subprocess_output_full(cmd)
    uu.print_log('   Jukka peat rasterized')

    # For multiprocessor use
    # count-10 maxes out at about 100 GB on an r5d.16xlarge
    processes = cn.count - 5
    uu.print_log('Peatland preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(peatland_processing.create_peat_mask_tiles, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     peatland_processing.create_peat_mask_tiles(tile_id)

    output_pattern = output_pattern_list[0]
    processes = 50  # 50 processors = XXX GB peak
    uu.print_log(
        "Checking for empty tiles of {0} pattern with {1} processors...".
        format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_delete_if_empty, output_pattern=output_pattern),
        tile_id_list)
    pool.close()
    pool.join()

    uu.print_log("Uploading output files")
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_calculate_gross_emissions(sensit_type,
                                 tile_id_list,
                                 emitted_pools,
                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    folder = cn.docker_base_dir

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year],
        cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year],
        cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000],
        cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000],
        cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000],
        cn.peat_mask_dir: [cn.pattern_peat_mask],
        cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary],
        cn.planted_forest_type_unmasked_dir:
        [cn.pattern_planted_forest_type_unmasked],
        cn.drivers_processed_dir: [cn.pattern_drivers],
        cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        cn.burn_year_dir: [cn.pattern_burn_year]
    }

    # Special loss tiles for the Brazil and Mekong sensitivity analyses
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [
            cn.pattern_Brazil_annual_loss_processed
        ]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [
            cn.pattern_Mekong_loss_processed
        ]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Checks the validity of the emitted_pools argument
    if (emitted_pools not in ['soil_only', 'biomass_soil']):
        uu.exception_log(
            'Invalid pool input. Please choose soil_only or biomass_soil.')

    # Checks if the correct c++ script has been compiled for the pool option selected
    if emitted_pools == 'biomass_soil':

        # Output file directories for biomass+soil. Must be in same order as output pattern directories.
        output_dir_list = [
            cn.gross_emis_commod_biomass_soil_dir,
            cn.gross_emis_shifting_ag_biomass_soil_dir,
            cn.gross_emis_forestry_biomass_soil_dir,
            cn.gross_emis_wildfire_biomass_soil_dir,
            cn.gross_emis_urban_biomass_soil_dir,
            cn.gross_emis_no_driver_biomass_soil_dir,
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
            cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
            cn.gross_emis_nodes_biomass_soil_dir
        ]

        output_pattern_list = [
            cn.pattern_gross_emis_commod_biomass_soil,
            cn.pattern_gross_emis_shifting_ag_biomass_soil,
            cn.pattern_gross_emis_forestry_biomass_soil,
            cn.pattern_gross_emis_wildfire_biomass_soil,
            cn.pattern_gross_emis_urban_biomass_soil,
            cn.pattern_gross_emis_no_driver_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
            cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
            cn.pattern_gross_emis_nodes_biomass_soil
        ]

        # Some sensitivity analyses have specific gross emissions scripts.
        # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
        if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
            # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)):
            if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                    cn.c_emis_compile_dst, sensit_type)):
                uu.print_log(
                    "C++ for {} already compiled.".format(sensit_type))
            else:
                uu.exception_log(
                    'Must compile {} model C++...'.format(sensit_type))
        else:
            if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log('Must compile generic emissions C++...')

    elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst)):
            uu.print_log("C++ for soil_only already compiled.")

            # Output file directories for soil_only. Must be in same order as output pattern directories.
            output_dir_list = [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

            output_pattern_list = [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

        else:
            uu.exception_log('Must compile soil_only C++...')

    else:
        uu.exception_log('Pool and/or sensitivity analysis option not valid')

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, folder, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)
        uu.print_log(output_dir_list)
        uu.print_log(output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # The C++ code expects certain tiles for every input 10x10.
    # However, not all Hansen tiles have all of these inputs.
    # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles.
    # That way, the C++ script gets all the necessary input files.
    # If it doesn't get the necessary inputs, it skips that tile.
    uu.print_log("Making blank tiles for inputs that don't currently exist")
    # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools
    pattern_list = [
        cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask,
        cn.pattern_ifl_primary, cn.pattern_drivers,
        cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year,
        cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000
    ]

    # textfile that stores the names of the blank tiles that are created for processing.
    # This will be iterated through to delete the tiles at the end of the script.
    uu.create_blank_tile_txt()

    for pattern in pattern_list:
        pool = multiprocessing.Pool(processes=60)  # 60 = 100 GB peak
        pool.map(
            partial(uu.make_blank_tile,
                    pattern=pattern,
                    folder=folder,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

    # # For single processor use
    # for pattern in pattern_list:
    #     for tile in tile_id_list:
    #         uu.make_blank_tile(tile, pattern, folder, sensit_type)

    # Calculates gross emissions for each tile
    # count/4 uses about 390 GB on a r4.16xlarge spot machine.
    # processes=18 uses about 440 GB on an r4.16xlarge spot machine.
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 15  # 15 processors = XXX GB peak
        else:
            processes = 19  # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 714 GB peak
    else:
        processes = 9
    uu.print_log('Gross emissions max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(calculate_gross_emissions.calc_emissions,
                emitted_pools=emitted_pools,
                sensit_type=sensit_type,
                folder=folder), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #       calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder)

    # Print the list of blank created tiles, delete the tiles, and delete their text file
    uu.list_and_delete_blank_tiles()

    for i in range(0, len(output_pattern_list)):
        pattern = output_pattern_list[i]

        uu.print_log("Adding metadata tags for pattern {}".format(pattern))

        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(calculate_gross_emissions.add_metadata_tags,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type)

    # Uploads emissions to appropriate directory for the carbon emitted_pools chosen
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 6
0
def mp_gain_year_count_all_forest_types(sensit_type, tile_id_list, run_date = None, no_upload = True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # No point in making gain year count tiles for tiles that don't have annual removals
        tile_id_list = uu.tile_list_s3(cn.annual_gain_AGC_all_types_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be
    # changed for a sensitivity analysis. This does not need to change based on what run is being done;
    # this assignment should be true for all sensitivity analyses and the standard model.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.model_extent_dir: [cn.pattern_model_extent]
    }
    
    # Adds the correct loss tile to the download dictionary depending on the model run
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]
    
    
    output_dir_list = [cn.gain_year_count_dir]
    output_pattern_list = [cn.pattern_gain_year_count]


    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # Creates gain year count tiles using only pixels that had only loss
    # count/3 maxes out at about 300 GB
    if cn.count == 96:
        processes = 90   # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_only,
                     sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count gain only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    if sensit_type == 'legal_Amazon_loss':
        uu.print_log("Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step.")
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Creates gain year count tiles using only pixels that had neither loss nor gain pixels
    if cn.count == 96:
        processes = 90   # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count no change pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'legal_Amazon_loss':
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_legal_Amazon_loss,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_no_change_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    if cn.count == 96:
        processes = 90   # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Gain year count loss & gain pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard,
                         sensit_type=sensit_type, no_upload=no_upload), tile_id_list)

    # Combines the four above gain year count tiles for each Hansen tile into a single output tile
    if cn.count == 96:
        processes = 84   # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak
    elif cn.count < 4:
        processes = 1
    else:
        processes = int(cn.count/4)
    uu.print_log('Gain year count gain merge all combos max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(gain_year_count_all_forest_types.create_gain_year_count_merge,
                     pattern=pattern, sensit_type=sensit_type, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()


    # # For single processor use
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id, no_upload)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id, no_upload)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_gain_only")
        uu.upload_final_set(output_dir_list[0], "growth_years_no_change")
        uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain")

        # This is the final output used later in the model
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Ejemplo n.º 7
0
def mp_burn_year(tile_id_list, run_date = None, no_upload = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # A date can optionally be provided.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    global_grid_hv = ["h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10", "h01v11", "h02v06",
                      "h02v08", "h02v09", "h02v10", "h02v11", "h03v06", "h03v07", "h03v09", "h03v10", "h03v11",
                      "h04v09", "h04v10", "h04v11", "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03",
                      "h07v05", "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07", "h08v08",
                      "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05", "h09v06", "h09v07", "h09v08",
                      "h09v09", "h10v02", "h10v03", "h10v04", "h10v05", "h10v06", "h10v07", "h10v08", "h10v09",
                      "h10v10", "h10v11", "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
                      "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04", "h12v05", "h12v07",
                      "h12v08", "h12v09", "h12v10", "h12v11", "h12v12", "h12v13", "h13v02", "h13v03", "h13v04",
                      "h13v08", "h13v09", "h13v10", "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03",
                      "h14v04", "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05", "h15v07",
                      "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08", "h16v09", "h17v02", "h17v03",
                      "h17v04", "h17v05", "h17v06", "h17v07", "h17v08", "h17v10", "h17v12", "h17v13", "h18v02",
                      "h18v03", "h18v04", "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
                      "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10", "h19v11", "h19v12",
                      "h20v02", "h20v03", "h20v04", "h20v05", "h20v06", "h20v07", "h20v08", "h20v09", "h20v10",
                      "h20v11", "h20v12", "h20v13", "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07",
                      "h21v08", "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04", "h22v05",
                      "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11", "h22v13", "h23v02", "h23v03",
                      "h23v04", "h23v05", "h23v06", "h23v07", "h23v08", "h23v09", "h23v10", "h23v11", "h24v02",
                      "h24v03", "h24v04", "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
                      "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03", "h26v04", "h26v05",
                      "h26v06", "h26v07", "h26v08", "h27v03", "h27v04", "h27v05", "h27v06", "h27v07", "h27v08",
                      "h27v09", "h27v10", "h27v11", "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07",
                      "h28v08", "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05", "h29v06",
                      "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12", "h29v13", "h30v06", "h30v07",
                      "h30v08", "h30v09", "h30v10", "h30v11", "h30v12", "h30v13", "h31v06", "h31v07", "h31v08",
                      "h31v09", "h31v10", "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
                      "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11", "h34v07", "h34v08",
                      "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"]


    # Step 1: download hdf files for relevant year(s) from sftp site.
    # This only needs to be done for the most recent year of data.

    '''
    Downloading the hdf files from the sftp burned area site is done outside the script in the sftp shell on the command line.
    This will download all the 2020 hdfs to the spot machine. It will take a few minutes before the first
    hdf is downloaded but then it should go quickly.
    Change 2020 to other year for future years of downloads. 
    https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf, page 24, section 4.1.3

    sftp [email protected]
    [For password] burnt
    cd data/MODIS/C6/MCD64A1/HDF
    ls [to check that it's the folder with all the tile folders]
    get h??v??/MCD64A1.A2020*
    bye    //exits the stfp shell
    '''

    # Uploads the latest year of raw burn area hdfs to s3.
    # All hdfs go in this folder
    cmd = ['aws', 's3', 'cp', '{0}/burn_date/'.format(cn.docker_app), cn.burn_year_hdf_raw_dir, '--recursive', '--exclude', '*', '--include', '*hdf']
    uu.log_subprocess_output_full(cmd)


    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile.
    # This only needs to be done for the most recent year of data (set in stach_ba_hv).
    uu.print_log("Stacking hdf into MODIS burned area tifs by year and MODIS hv tile...")

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # # For single processor use
    # for hv_tile in global_grid_hv:
    #     stack_ba_hv.stack_ba_hv(hv_tile)


    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2021 goes from 2001 to 2020).
    # This only needs to be done for the most recent year of data.
    # NOTE: The first time I ran this for the 2020 TCL update, I got an error about uploading the log to s3
    # after most of the tiles were processed. I didn't know why it happened, so I reran the step and it went fine.
    for year in range(2020, 2021):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = ['aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir, year_tifs_folder]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = ['gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr', '.00025', '.00025', '-overwrite',
               vrt_name, vrt_wgs84]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count-5)
        pool.map(partial(clip_year_tiles.clip_year_tiles, no_upload=no_upload), tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year, no_upload)

        uu.print_log("Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss
    # or preceded TCL by one year.
    # This needs to be done on all years each time burned area is updated.

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log("Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned area), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(partial(hansen_burnyear_final.hansen_burnyear, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Ejemplo n.º 8
0
def mp_prep_other_inputs(tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)
    sensit_type='std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
                       cn.drivers_processed_dir, cn.ifl_primary_processed_dir,
                       cn.annual_gain_AGC_natrl_forest_young_dir,
                       cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
                       cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.FIA_forest_group_processed_dir,
                       cn.age_cat_natrl_forest_US_dir,
                       cn.FIA_regions_processed_dir]
    output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
                           cn.pattern_drivers, cn.pattern_ifl_primary,
                           cn.pattern_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_FIA_forest_group_processed,
                           cn.pattern_age_cat_natrl_forest_US,
                           cn.pattern_FIA_regions_processed]


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)


    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # Thus, using wget instead.
    cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
    uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    uu.log_subprocess_output_full(cmd)

    uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Unzipping pre-2000 plantations...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping drivers...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)]
    uu.log_subprocess_output_full(cmd)


    # Creates tree cover loss driver tiles
    source_raster = '{}.tif'.format(cn.pattern_drivers_raw)
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates young natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates young natural forest removal rate standard deviation tiles
    source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates pre-2000 oil palm plantation tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    pool.close()
    pool.join()


    # Creates climate zone tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest standard deviation of removal rate tiles
    source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    uu.print_log("Creating vrt of humid tropial primary forest...")
    primary_vrt = 'primary_2001.vrt'
    os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    uu.print_log("  Humid tropical primary forest vrt created")

    # Creates primary forest tiles
    source_raster = primary_vrt
    out_pattern = 'primary_2001'
    dt = 'Byte'
    if cn.count == 96:
        processes = 45  # 45 processors = 650 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a combined IFL/primary forest raster
    # Uses very little memory since it's just file renaming
    if cn.count == 96:
        processes = 60  # 60 processors = 10 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    pool.close()
    pool.join()


    # Creates forest age category tiles for US forests
    source_raster = cn.name_age_cat_natrl_forest_US_raw
    out_pattern = cn.pattern_age_cat_natrl_forest_US
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates forest groups for US forests
    source_raster = cn.name_FIA_forest_group_raw
    out_pattern = cn.pattern_FIA_forest_group_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates FIA regions for US forests
    source_raster = cn.name_FIA_regions_raw
    out_pattern = cn.pattern_FIA_regions_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2: # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')


    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 9
0
def mp_model_extent(sensit_type, tile_id_list, run_date = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis
        if sensit_type == 'biomass_swap':
            tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type)
        elif sensit_type == 'legal_Amazon_loss':
            tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type)
        else:
            tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             cn.gain_dir, cn.tcd_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # Files to download for this script.
    download_dict = {
                    cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
                    cn.gain_dir: [cn.pattern_gain],
                    cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
    }

    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed]
    else:
        download_dict[cn.tcd_dir] = [cn.pattern_tcd]

    if sensit_type == 'biomass_swap':
        download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
    else:
        download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

    # List of output directories and output file name patterns
    output_dir_list = [cn.model_extent_dir]
    output_pattern_list = [cn.pattern_model_extent]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 38
        else:
            processes = 42 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases);
            # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak
    else:
        processes = 3
    uu.print_log('Removal model forest extent processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     model_extent.model_extent(tile_id, pattern, sensit_type)

    output_pattern = output_pattern_list[0]
    if cn.count <= 2:  # For local tests
        processes = 1
        uu.print_log(
            "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()
    else:
        processes = 50  # 50 processors = XXX GB peak
        uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()


    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Ejemplo n.º 10
0
def mp_prep_other_inputs(tile_id_list, run_date, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        ### BUG: THIS SHOULD ALSO INCLUDE cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir IN ITS LIST
        tile_id_list = uu.create_combined_tile_list(
            cn.WHRC_biomass_2000_unmasked_dir,
            cn.mangrove_biomass_2000_dir,
            set3=cn.gain_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")
    '''
    Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. 
    gdal_warp is producing a weird output, so I did it in ArcMap for the 2020 update, 
    with the output cell size being 0.01 x 0.01 degree and the method being nearest.
    
    arcpy.ProjectRaster_management(in_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/FinalClassification_2020_v2__from_Jimmy_MacCarthy_20210323.tif", 
    out_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/Final_Classification_2020__reproj_nearest_0-005_0-005_deg__20210323.tif", 
    out_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", 
    resampling_type="NEAREST", cell_size="0.005 0.005", geographic_transform="", 
    Registration_Point="", 
    in_coor_system="PROJCS['WGS_1984_Goode_Homolosine',GEOGCS['GCS_unknown',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Goode_Homolosine'],PARAMETER['False_Easting',0.0],PARAMETER['False_Northing',0.0],PARAMETER['Central_Meridian',0.0],PARAMETER['Option',1.0],UNIT['Meter',1.0]]", 
    vertical="NO_VERTICAL")
    '''

    # List of output directories and output file name patterns
    output_dir_list = [
        # cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
        cn.drivers_processed_dir
        # cn.ifl_primary_processed_dir,
        # cn.annual_gain_AGC_natrl_forest_young_dir,
        # cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
        # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
        # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
        # cn.FIA_forest_group_processed_dir,
        # cn.age_cat_natrl_forest_US_dir,
        # cn.FIA_regions_processed_dir
    ]
    output_pattern_list = [
        # cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
        cn.pattern_drivers
        # cn.pattern_ifl_primary,
        # cn.pattern_annual_gain_AGC_natrl_forest_young,
        # cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
        # cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
        # cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
        # cn.pattern_FIA_forest_group_processed,
        # cn.pattern_age_cat_natrl_forest_US,
        # cn.pattern_FIA_regions_processed
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    # uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(
        os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw),
        cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # # Thus, using wget instead.
    # cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    # process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    # with process.stdout:
    #     uu.log_subprocess_output(process.stdout)
    # uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    # cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    # uu.log_subprocess_output_full(cmd)
    #
    # uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)
    #
    # uu.print_log("Unzipping pre-2000 plantations...")
    # cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    # uu.log_subprocess_output_full(cmd)

    # Creates tree cover loss driver tiles.
    # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers.
    # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers
    # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results.
    source_raster = cn.pattern_drivers_raw
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 87  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log(
        "Creating tree cover loss driver tiles with {} processors...".format(
            processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # Creates young natural forest removal rate tiles
    # source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    # out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates young natural forest removal rate standard deviation tiles
    # source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    # out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates pre-2000 oil palm plantation tiles
    # if cn.count == 96:
    #     processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates climate zone tiles
    # if cn.count == 96:
    #     processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates European natural forest removal rate tiles
    # source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    # out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates European natural forest standard deviation of removal rate tiles
    # source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    # out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    # uu.print_log("Creating vrt of humid tropial primary forest...")
    # primary_vrt = 'primary_2001.vrt'
    # os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    # uu.print_log("  Humid tropical primary forest vrt created")
    #
    # # Creates primary forest tiles
    # source_raster = primary_vrt
    # out_pattern = 'primary_2001'
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 45  # 45 processors = 650 GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates a combined IFL/primary forest raster
    # # Uses very little memory since it's just file renaming
    # if cn.count == 96:
    #     processes = 60  # 60 processors = 10 GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates forest age category tiles for US forests
    # source_raster = cn.name_age_cat_natrl_forest_US_raw
    # out_pattern = cn.pattern_age_cat_natrl_forest_US
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates forest groups for US forests
    # source_raster = cn.name_FIA_forest_group_raw
    # out_pattern = cn.pattern_FIA_forest_group_processed
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates FIA regions for US forests
    # source_raster = cn.name_FIA_regions_raw
    # out_pattern = cn.pattern_FIA_regions_processed
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    for output_pattern in [
            cn.pattern_drivers
            # ,cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    ]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [
                cn.pattern_annual_gain_AGC_natrl_forest_young,
                cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
        ]:
            processes = int(cn.count / 2)
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )
Ejemplo n.º 12
0
def mp_net_flux(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.cumul_gain_AGCO2_BGCO2_all_types_dir,
            sensit_type=sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil]
    }

    # List of output directories and output file name patterns
    output_dir_list = [cn.net_flux_dir]
    output_pattern_list = [cn.pattern_net_flux]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 32  # 32 processors = XXX GB peak
        else:
            processes = 40  # 38 = 690 GB peak; 40 = 715 GB peak
    else:
        processes = 9
    uu.print_log('Net flux max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(net_flux.net_calc, pattern=pattern, sensit_type=sensit_type),
        tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     net_flux.net_calc(tile_id, output_pattern_list[0], sensit_type)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Ejemplo n.º 13
0
def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                 tile_id_list,
                                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.model_extent_dir: [cn.pattern_model_extent],
        cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove],
        cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove],
        cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US],
        cn.annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_annual_gain_AGC_natrl_forest_young],
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults],
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        cn.stdev_annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young],
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir,
        cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 13
        else:
            processes = 17  # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Removal factor processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_AGC_BGC_all_forest_types.
                annual_gain_rate_AGC_BGC_all_forest_types,
                output_pattern_list=output_pattern_list,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 14
0
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None):

    os.chdir(cn.docker_base_dir)

    if (sensit_type != 'std') & (carbon_pool_extent != 'loss'):
        uu.exception_log("Sensitivity analysis run must use 'loss' extent")

    # Checks the validity of the carbon_pool_extent argument
    if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")


    # If a full model run is specified, the correct set of tiles for the particular script is listed.
    # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'):
        # Lists the tiles that have both model extent and loss pixels
        model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)
        loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type)
        uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:")
        tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list))

    # For runs generating carbon pools in 2000, all model extent tiles are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'):
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)


    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    output_dir_list = []
    output_pattern_list = []

    # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated
    if '2000' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                           cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000,
                               cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000]

        # Files to download for this script
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated
    if 'loss' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir,
                           cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000,
                               cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year]

        # Files to download for this script. This has the same items as the download_dict for 2000 pools plus
        # other tiles.
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
            cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types]
       }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]


    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)
    else:
        uu.print_log("Output directory list for standard model:", output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    pd.options.mode.chained_assignment = None

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first')

    mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                         cn.below_to_above_trop_dry_mang,
                                                                                         cn.below_to_above_trop_wet_mang,
                                                                                         cn.below_to_above_subtrop_mang)

    mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                              cn.deadwood_to_above_trop_dry_mang,
                                                                                              cn.deadwood_to_above_trop_wet_mang,
                                                                                              cn.deadwood_to_above_subtrop_mang)

    mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                            cn.litter_to_above_trop_dry_mang,
                                                                                            cn.litter_to_above_trop_wet_mang,
                                                                                            cn.litter_to_above_subtrop_mang)

    uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 16  # 16 processors = XXX GB peak
            else:
                processes = 20  # 25 processors > 750 GB peak; 16 = 560 GB peak;
                # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak
        else: # For 2000, or loss & 2000
            processes = 15  # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak
    else:
        processes = 2
    uu.print_log('AGC loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_AGC,
                     sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
    else:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
        uu.upload_final_set(output_dir_list[6], output_pattern_list[6])
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles")
    tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent))
    # Creates a single filename pattern to pass to the multiprocessor call
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 30  # 30 processors = XXX GB peak
            else:
                processes = 38  # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak
        else: # For 2000, or loss & 2000
            processes = 30  # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak
    else:
        processes = 2
    uu.print_log('BGC max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                     carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
    else:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
        uu.upload_final_set(output_dir_list[7], output_pattern_list[7])
    uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:
        uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000)))

        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 14  # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak
        else: # For 2000, or loss & 2000
            ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced.
            ### There wouldn't have been enough room for all deadwood and litter otherwise.
            ### For example, when deadwood and litter generation started getting up to around 50N, I deleted
            ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S.
            processes = 16  # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Deadwood and litter max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                carbon_pool_extent=carbon_pool_extent,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
    else:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
        uu.upload_final_set(output_dir_list[8], output_pattern_list[8])  # deadwood
        uu.upload_final_set(output_dir_list[9], output_pattern_list[9])  # litter
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles")
    tiles_to_delete = []
    tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    if 'loss' in carbon_pool_extent:

        uu.print_log("Creating tiles of soil carbon in loss extent")

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            pattern = output_pattern_list[4]
        else:
            pattern = output_pattern_list[10]

        if cn.count == 96:
            # More processors can be used for loss carbon pools than for 2000 carbon pools
            if carbon_pool_extent == 'loss':
                if sensit_type == 'biomass_swap':
                    processes = 36  # 36 processors = XXX GB peak
                else:
                    processes = 42  # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak
            else: # For 2000, or loss & 2000
                processes = 12  # 12 processors = XXX GB peak
        else:
            processes = 2
        uu.print_log('Soil carbon loss year max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern,
                         sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type)

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            uu.upload_final_set(output_dir_list[4], output_pattern_list[4])
        else:
            uu.upload_final_set(output_dir_list[10], output_pattern_list[10])

        uu.check_storage()

    if '2000' in carbon_pool_extent:
        uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.")
        uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:

        # Files to download for total C 2000. Previously deleted to save space
        download_dict = {
            cn.BGC_2000_dir: [cn.pattern_BGC_2000],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000]
        }

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    uu.print_log("Creating tiles of total carbon")
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 14  # 14 processors = XXX GB peak
            else:
                processes = 18  # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 12  # 12 processors = XXX GB peak
    else:
        processes = 2
    uu.print_log('Total carbon loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
    else:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
        uu.upload_final_set(output_dir_list[11], output_pattern_list[11])
    uu.check_storage()
Ejemplo n.º 15
0
def mp_gross_removals_all_forest_types(sensit_type,
                                       tile_id_list,
                                       run_date=None,
                                       no_upload=True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)
        gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir,
                                                       sensit_type=sensit_type)
        annual_removals_tile_id_list = uu.tile_list_s3(
            cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type)
        tile_id_list = list(
            set(gain_year_count_tile_id_list).intersection(
                annual_removals_tile_id_list))
        uu.print_log(
            "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:"
        )

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.annual_gain_BGC_all_types_dir:
        [cn.pattern_annual_gain_BGC_all_types],
        cn.gain_year_count_dir: [cn.pattern_gain_year_count]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Calculates gross removals
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 18
        else:
            processes = 22  # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak
    else:
        processes = 2
    uu.print_log('Gross removals max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            gross_removals_all_forest_types.gross_removals_all_forest_types,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type,
            no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload)

    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 55 processors = 670 GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_US_removal_rates(sensit_type, tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed],
        cn.FIA_forest_group_processed_dir:
        [cn.pattern_FIA_forest_group_processed],
        cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
        cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the region-group-age AGC+BGC removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_AGC+BGC")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    ### To make the removal factor standard deviation dictionaries

    # Converts gain table from wide to long, so each region-group-age category has its own row
    stdev_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['SD_young', 'SD_middle', 'SD_old'])
    stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000}

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    stdev_table_group_region_age = stdev_table_group_region_by_age.replace(
        {"variable": stdev_dict})
    stdev_table_group_region_age[
        'age_cat'] = stdev_table_group_region_age['variable'] * 10
    stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \
                                                               stdev_table_group_region_age['forest_group_code'] * 100 + \
                                                               stdev_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    stdev_table_group_region_age_dict = pd.Series(
        stdev_table_group_region_age.value.values,
        index=stdev_table_group_region_age.group_region_age_combined).to_dict(
        )
    uu.print_log(stdev_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    stdev_table_group_region = stdev_table_group_region_age.drop(
        stdev_table_group_region_age[
            stdev_table_group_region_age.age_cat != 10000].index)
    stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \
                                                       stdev_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    stdev_table_group_region_dict = pd.Series(
        stdev_table_group_region.value.values,
        index=stdev_table_group_region.group_region_combined).to_dict()
    uu.print_log(stdev_table_group_region_dict)

    if cn.count == 96:
        processes = 68  # 68 processors (only 16 tiles though) = 310 GB peak
    else:
        processes = 24
    uu.print_log('US natural forest AGC+BGC removal rate max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            stdev_table_group_region_age_dict=stdev_table_group_region_age_dict,
            stdev_table_group_region_dict=stdev_table_group_region_dict,
            output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id,
    #       gain_table_group_region_age_dict,
    #       gain_table_group_region_dict,
    #       stdev_table_group_region_age_dict,
    #       stdev_table_group_region_dict,
    #       output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 17
0
def main():

    os.chdir(cn.docker_base_dir)

    # List of possible model stages to run (not including mangrove and planted forest stages)
    model_stages = [
        'all', 'model_extent', 'forest_age_category_IPCC',
        'annual_removals_IPCC', 'annual_removals_all_forest_types',
        'gain_year_count', 'gross_removals_all_forest_types', 'carbon_pools',
        'gross_emissions', 'net_flux', 'aggregate'
    ]

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description='Run the full carbon flux model')
    parser.add_argument('--model-type',
                        '-t',
                        required=True,
                        help='{}'.format(cn.model_type_arg_help))
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help='Stages for running the flux model. Options are {}'.format(
            model_stages))
    parser.add_argument(
        '--run-through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    parser.add_argument('--run-date',
                        '-d',
                        required=False,
                        help='Date of run. Must be format YYYYMMDD.')
    parser.add_argument(
        '--tile-id-list',
        '-l',
        required=True,
        help=
        'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.'
    )
    parser.add_argument(
        '--carbon-pool-extent',
        '-ce',
        required=False,
        help=
        'Time period for which carbon emitted_pools should be calculated: loss, 2000, loss,2000, or 2000,loss'
    )
    parser.add_argument(
        '--emitted-pools-to-use',
        '-p',
        required=False,
        help=
        'Options are soil_only or biomass_soil. Former only considers emissions from soil. Latter considers emissions from biomass and soil.'
    )
    parser.add_argument(
        '--tcd-threshold',
        '-tcd',
        required=False,
        help=
        'Tree cover density threshold above which pixels will be included in the aggregation.'
    )
    parser.add_argument(
        '--std-net-flux-aggreg',
        '-sagg',
        required=False,
        help=
        'The s3 standard model net flux aggregated tif, for comparison with the sensitivity analysis map'
    )
    parser.add_argument(
        '--mangroves',
        '-ma',
        required=False,
        help=
        'Include mangrove removal rate and standard deviation tile creation step (before model extent). true or false.'
    )
    parser.add_argument(
        '--us-rates',
        '-us',
        required=False,
        help=
        'Include US removal rate and standard deviation tile creation step (before model extent). true or false.'
    )
    parser.add_argument(
        '--per-pixel-results',
        '-ppr',
        required=False,
        help=
        'Include per pixel result calculations for gross emissions (all gases, all pools), gross removals, and net flux. true or false.'
    )
    parser.add_argument('--log-note',
                        '-ln',
                        required=False,
                        help='Note to include in log header about model run.')
    args = parser.parse_args()

    sensit_type = args.model_type
    stage_input = args.stages
    run_through = args.run_through
    run_date = args.run_date
    tile_id_list = args.tile_id_list
    carbon_pool_extent = args.carbon_pool_extent
    emitted_pools = args.emitted_pools_to_use
    thresh = args.tcd_threshold
    if thresh is not None:
        thresh = int(thresh)
    std_net_flux = args.std_net_flux_aggreg
    include_mangroves = args.mangroves
    include_us = args.us_rates
    include_per_pixel = args.per_pixel_results
    log_note = args.log_note

    # Start time for script
    script_start = datetime.datetime.now()

    # Create the output log
    uu.initiate_log(tile_id_list=tile_id_list,
                    sensit_type=sensit_type,
                    run_date=run_date,
                    stage_input=stage_input,
                    run_through=run_through,
                    carbon_pool_extent=carbon_pool_extent,
                    emitted_pools=emitted_pools,
                    thresh=thresh,
                    std_net_flux=std_net_flux,
                    include_mangroves=include_mangroves,
                    include_us=include_us,
                    include_per_pixel=include_per_pixel,
                    log_note=log_note)

    # Checks the validity of the model stage arguments. If either one is invalid, the script ends.
    if (stage_input not in model_stages):
        uu.exception_log(
            'Invalid stage selection. Please provide a stage from',
            model_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    # Generates the list of stages to run
    actual_stages = uu.analysis_stages(model_stages,
                                       stage_input,
                                       run_through,
                                       include_mangroves=include_mangroves,
                                       include_us=include_us,
                                       include_per_pixel=include_per_pixel)
    uu.print_log("Analysis stages to run:", actual_stages)

    # Reports how much storage is being used with files
    uu.check_storage()

    # Checks whether the sensitivity analysis argument is valid
    uu.check_sensit_type(sensit_type)

    # Checks if the carbon pool type is specified if the stages to run includes carbon pool generation.
    # Does this up front so the user knows before the run begins that information is missing.
    if ('carbon_pools' in actual_stages) & (carbon_pool_extent not in [
            'loss', '2000', 'loss,2000', '2000,loss'
    ]):
        uu.exception_log(
            "Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss."
        )

    # Checks if the correct c++ script has been compiled for the pool option selected.
    # Does this up front so that the user is prompted to compile the C++ before the script starts running, if necessary.
    if 'gross_emissions' in actual_stages:

        if emitted_pools == 'biomass_soil':
            # Some sensitivity analyses have specific gross emissions scripts.
            # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
            if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
                if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                        cn.c_emis_compile_dst, sensit_type)):
                    uu.print_log(
                        "C++ for {} already compiled.".format(sensit_type))
                else:
                    uu.exception_log(
                        'Must compile standard {} model C++...'.format(
                            sensit_type))
            else:
                if os.path.exists(
                        '{0}/calc_gross_emissions_generic.exe'.format(
                            cn.c_emis_compile_dst)):
                    uu.print_log("C++ for generic emissions already compiled.")
                else:
                    uu.exception_log('Must compile generic emissions C++...')

        elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
            if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log('Must compile soil_only C++...')

        else:
            uu.exception_log(
                'Pool and/or sensitivity analysis option not valid for gross emissions'
            )

    # Checks whether the canopy cover argument is valid up front.
    if 'aggregate' in actual_stages:
        if thresh < 0 or thresh > 99:
            uu.exception_log(
                'Invalid tcd. Please provide an integer between 0 and 99.')
        else:
            pass

    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if 's3://' in tile_id_list:
        tile_id_list = uu.tile_list_s3(tile_id_list, 'std')
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))),
            "\n")
    # Otherwise, check that the tile list argument is valid. "all" is the way to specify that all tiles should be processed
    else:
        tile_id_list = uu.tile_id_list_check(tile_id_list)

    # List of output directories and output file name patterns.
    # The directory list is only used for counting tiles in output folders at the end of the model
    output_dir_list = [
        cn.model_extent_dir, cn.age_cat_IPCC_dir,
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir, cn.removal_forest_type_dir,
        cn.annual_gain_AGC_all_types_dir, cn.annual_gain_BGC_all_types_dir,
        cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir, cn.gain_year_count_dir,
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]

    # Prepends the mangrove and US output directories if mangroves are included
    if 'annual_removals_mangrove' in actual_stages:

        output_dir_list = [
            cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
            cn.stdev_annual_gain_AGB_mangrove_dir
        ] + output_dir_list

    if 'annual_removals_us' in actual_stages:

        output_dir_list = [
            cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
            cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
        ] + output_dir_list

    # Adds the carbon directories depending on which carbon emitted_pools are being generated: 2000 and/or emissions year
    if 'carbon_pools' in actual_stages:
        if 'loss' in carbon_pool_extent:
            output_dir_list = output_dir_list + [
                cn.AGC_emis_year_dir, cn.BGC_emis_year_dir,
                cn.deadwood_emis_year_2000_dir, cn.litter_emis_year_2000_dir,
                cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir
            ]

        if '2000' in carbon_pool_extent:
            output_dir_list = output_dir_list + [
                cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir,
                cn.total_C_2000_dir
            ]

    # Adds the biomass_soil output directories or the soil_only output directories depending on the model run
    if 'gross_emissions' in actual_stages:
        if emitted_pools == 'biomass_soil':
            output_dir_list = output_dir_list + [
                cn.gross_emis_commod_biomass_soil_dir,
                cn.gross_emis_shifting_ag_biomass_soil_dir,
                cn.gross_emis_forestry_biomass_soil_dir,
                cn.gross_emis_wildfire_biomass_soil_dir,
                cn.gross_emis_urban_biomass_soil_dir,
                cn.gross_emis_no_driver_biomass_soil_dir,
                cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
                cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
                cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
                cn.gross_emis_nodes_biomass_soil_dir
            ]

        else:
            output_dir_list = output_dir_list + [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

    output_dir_list = output_dir_list + [
        cn.net_flux_dir, cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir,
        cn.net_flux_per_pixel_dir
    ]

    # Output patterns aren't actually used in the script-- here just for reference.
    output_pattern_list = [
        cn.pattern_model_extent, cn.pattern_age_cat_IPCC,
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults,
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types, cn.pattern_gain_year_count,
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Prepends the mangrove and US output pattern if mangroves are included
    if 'annual_removals_mangrove' in actual_stages:

        output_pattern_list = [
            cn.pattern_annual_gain_AGB_mangrove,
            cn.pattern_annual_gain_BGB_mangrove,
            cn.pattern_stdev_annual_gain_AGB_mangrove
        ] + output_pattern_list

    if 'annual_removals_us' in actual_stages:

        output_pattern_list = [
            cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
            cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
        ] + output_pattern_list

    # Adds the soil carbon patterns depending on which carbon emitted_pools are being generated: 2000 and/or emissions year
    if 'carbon_pools' in actual_stages:
        if 'loss' in carbon_pool_extent:
            output_pattern_list = output_pattern_list + [
                cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year,
                cn.pattern_deadwood_emis_year_2000,
                cn.pattern_litter_emis_year_2000,
                cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year
            ]

        if '2000' in carbon_pool_extent:
            output_pattern_list = output_pattern_list + [
                cn.pattern_AGC_2000, cn.pattern_BGC_2000,
                cn.pattern_deadwood_2000, cn.pattern_litter_2000,
                cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000
            ]

    # Adds the biomass_soil output patterns or the soil_only output directories depending on the model run
    if 'gross_emissions' in actual_stages:
        if emitted_pools == 'biomass_soil':
            output_pattern_list = output_pattern_list + [
                cn.pattern_gross_emis_commod_biomass_soil,
                cn.pattern_gross_emis_shifting_ag_biomass_soil,
                cn.pattern_gross_emis_forestry_biomass_soil,
                cn.pattern_gross_emis_wildfire_biomass_soil,
                cn.pattern_gross_emis_urban_biomass_soil,
                cn.pattern_gross_emis_no_driver_biomass_soil,
                cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
                cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
                cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil
            ]

        else:
            output_pattern_list = output_pattern_list + [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

    output_pattern_list = output_pattern_list + [
        cn.pattern_net_flux,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel,
        cn.pattern_net_flux_per_pixel
    ]

    # Creates tiles of annual AGB and BGB gain rate and AGB stdev for mangroves using the standard model
    # removal function
    if 'annual_removals_mangrove' in actual_stages:

        uu.print_log(":::::Creating tiles of annual removals for mangrove")
        start = datetime.datetime.now()

        mp_annual_gain_rate_mangrove(sensit_type,
                                     tile_id_list,
                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_mangrove:",
                     elapsed_time, "\n")

    # Creates tiles of annual AGC+BGC gain rate and AGC stdev for US-specific removals using the standard model
    # removal function
    if 'annual_removals_us' in actual_stages:

        uu.print_log(":::::Creating tiles of annual removals for US")
        start = datetime.datetime.now()

        mp_US_removal_rates(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_us:",
                     elapsed_time, "\n")

    # Creates model extent tiles
    if 'model_extent' in actual_stages:

        uu.print_log(":::::Creating tiles of model extent")
        start = datetime.datetime.now()

        mp_model_extent(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for model_extent:", elapsed_time,
                     "\n", "\n")

    # Creates age category tiles for natural forests
    if 'forest_age_category_IPCC' in actual_stages:

        uu.print_log(
            ":::::Creating tiles of forest age categories for IPCC removal rates"
        )
        start = datetime.datetime.now()

        mp_forest_age_category_IPCC(sensit_type,
                                    tile_id_list,
                                    run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for forest_age_category_IPCC:",
                     elapsed_time, "\n", "\n")

    # Creates tiles of annual AGB and BGB gain rates using IPCC Table 4.9 defaults
    if 'annual_removals_IPCC' in actual_stages:

        uu.print_log(
            ":::::Creating tiles of annual aboveground and belowground removal rates using IPCC defaults"
        )
        start = datetime.datetime.now()

        mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                          tile_id_list,
                                          run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for annual_gain_rate_IPCC:",
                     elapsed_time, "\n", "\n")

    # Creates tiles of annual AGC and BGC removal factors for the entire model, combining removal factors from all forest types
    if 'annual_removals_all_forest_types' in actual_stages:
        uu.print_log(
            ":::::Creating tiles of annual aboveground and belowground removal rates for all forest types"
        )
        start = datetime.datetime.now()

        mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                     tile_id_list,
                                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(
            ":::::Processing time for annual_gain_rate_AGC_BGC_all_forest_types:",
            elapsed_time, "\n", "\n")

    # Creates tiles of the number of years of removals for all model pixels (across all forest types)
    if 'gain_year_count' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for gain year count creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_BGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_planted_forest_type_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_all_types)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(
            ":::::Creating tiles of gain year count for all removal pixels")
        start = datetime.datetime.now()

        mp_gain_year_count_all_forest_types(sensit_type,
                                            tile_id_list,
                                            run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gain_year_count:", elapsed_time,
                     "\n", "\n")

    # Creates tiles of gross removals for all forest types (aboveground, belowground, and above+belowground)
    if 'gross_removals_all_forest_types' in actual_stages:

        uu.print_log(
            ":::::Creating gross removals for all forest types combined (above + belowground) tiles'"
        )
        start = datetime.datetime.now()

        mp_gross_removals_all_forest_types(sensit_type,
                                           tile_id_list,
                                           run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(
            ":::::Processing time for gross_removals_all_forest_types:",
            elapsed_time, "\n", "\n")

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for carbon pool creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_model_extent)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_BGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGC_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_annual_gain_AGC_BGC_all_types)))
        tiles_to_delete.extend(glob.glob('*growth_years*tif'))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_gain_year_count)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_cumul_gain_AGCO2_BGCO2_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_planted_forest_type_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_mangrove)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_natrl_forest_young)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGB_IPCC_defaults)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_stdev_annual_gain_AGC_all_types)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(":::::Creating carbon pool tiles")
        start = datetime.datetime.now()

        mp_create_carbon_pools(sensit_type,
                               tile_id_list,
                               carbon_pool_extent,
                               run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for create_carbon_pools:",
                     elapsed_time, "\n", "\n")

    # Creates gross emissions tiles by driver, gas, and all emissions combined
    if 'gross_emissions' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for gross emissions creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(
            cn.pattern_AGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(
            cn.pattern_BGC_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_deadwood_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_litter_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_total_C_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_elevation)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        uu.print_log(tiles_to_delete)

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(":::::Creating gross emissions tiles")
        start = datetime.datetime.now()

        mp_calculate_gross_emissions(sensit_type,
                                     tile_id_list,
                                     emitted_pools,
                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for gross_emissions:", elapsed_time,
                     "\n", "\n")

    # Creates net flux tiles (gross emissions - gross removals)
    if 'net_flux' in actual_stages:

        uu.print_log(
            ":::::Freeing up memory for net flux creation by deleting unneeded tiles"
        )
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_commod_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_shifting_ag_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_forestry_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_wildfire_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_urban_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_no_driver_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_gross_emis_nodes_biomass_soil)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_AGC_emis_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_BGC_emis_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_deadwood_emis_year_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_litter_emis_year_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_soil_C_emis_year_2000)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_total_C_emis_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_peat_mask)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_ifl_primary)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(
                cn.pattern_planted_forest_type_unmasked)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_drivers)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_climate_zone)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_bor_tem_trop_processed)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_burn_year)))
        tiles_to_delete.extend(
            glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000)))
        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()

        uu.print_log(":::::Creating net flux tiles")
        start = datetime.datetime.now()

        mp_net_flux(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for net_flux:", elapsed_time, "\n",
                     "\n")

    # Aggregates gross emissions, gross removals, and net flux to coarser resolution.
    # For sensitivity analyses, creates percent difference and sign change maps compared to standard model net flux.
    if 'aggregate' in actual_stages:

        uu.print_log(":::::Creating 4x4 km aggregate maps")
        start = datetime.datetime.now()

        mp_aggregate_results_to_4_km(sensit_type,
                                     thresh,
                                     tile_id_list,
                                     std_net_flux=std_net_flux,
                                     run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for aggregate:", elapsed_time, "\n",
                     "\n")

    # Converts gross emissions, gross removals and net flux from per hectare rasters to per pixel rasters
    if 'per_pixel_results' in actual_stages:

        uu.print_log(":::::Creating per pixel versions of main model outputs")
        start = datetime.datetime.now()

        mp_output_per_pixel(sensit_type, tile_id_list, run_date=run_date)

        end = datetime.datetime.now()
        elapsed_time = end - start
        uu.check_storage()
        uu.print_log(":::::Processing time for per pixel raster creation:",
                     elapsed_time, "\n", "\n")

    uu.print_log(":::::Counting tiles output to each folder")

    # Modifies output directory names to make them match those used during the model run.
    # The tiles in each of these directories and counted and logged.
    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Modifying output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # Changes the date in the output directories. This date was used during the model run.
    # This replaces the date in constants_and_names.
    if run_date:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    for output in output_dir_list:

        tile_count = uu.count_tiles_s3(output)
        uu.print_log("Total tiles in", output, ": ", tile_count)

    script_end = datetime.datetime.now()
    script_elapsed_time = script_end - script_start
    uu.print_log(":::::Processing time for entire run:", script_elapsed_time,
                 "\n")
def mp_create_inputs_for_C_pools(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir,
        cn.precip_processed_dir
    ]
    output_pattern_list = [
        cn.pattern_bor_tem_trop_processed, cn.pattern_elevation,
        cn.pattern_precip
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads two of the raw input files for creating carbon emitted_pools
    input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

    for input in input_files:
        uu.s3_file_download('{}'.format(input), cn.docker_base_dir,
                            sensit_type)

    uu.print_log(
        "Unzipping boreal/temperate/tropical file (from FAO ecozones)")
    cmd = [
        'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d',
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    uu.print_log("Copying elevation (srtm) files")
    uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type)

    uu.print_log("Making elevation (srtm) vrt")
    check_call(
        'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True
    )  # I don't know how to convert this to output to the pipe, so just leaving as is

    # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it.
    processes = int(cn.count / 2)
    uu.print_log('Inputs for C emitted_pools max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(create_inputs_for_C_pools.create_input_files, tile_id_list)

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_inputs_for_C_pools.create_input_files(tile_id)

    uu.print_log("Uploading output files")
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 19
0
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Ejemplo n.º 20
0
def mp_output_per_pixel(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # Pixel area tiles-- necessary for calculating values per pixel
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # Files to download for this script. Unusually, this script needs the output pattern in the dictionary as well!
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types,
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel
        ],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel
        ],
        cn.net_flux_dir: [cn.pattern_net_flux, cn.pattern_net_flux_per_pixel]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir,
        cn.net_flux_per_pixel_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel,
        cn.pattern_net_flux_per_pixel
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list = uu.tile_list_s3(input_dir, sensit_type)

        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list)

        # The pattern of the output files
        output_pattern = values[1]

        # 20 processors = 430 GB peak for cumul gain; 30 = 640 GB peak for cumul gain;
        # 32 = 680 GB peak for cumul gain; 33 = 710 GB peak for cumul gain, gross emis, net flux
        if cn.count == 96:
            processes = 20
        else:
            processes = 2
        uu.print_log("Creating {0} with {1} processors...".format(
            output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(output_per_pixel.output_per_pixel,
                    input_pattern=input_pattern,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     output_per_pixel.output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type)

        metadata_list = [
            'units=Mg CO2e/pixel over model duration (2001-20{})'.format(
                cn.loss_years), 'extent=Model extent',
            'pixel_areas=Pixel areas depend on the latitude at which the pixel is found',
            'scale=If this is for net flux, negative values are net sinks and positive values are net sources'
        ]
        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(uu.add_metadata_tags,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type,
                    metadata_list=metadata_list), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     uu.add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])