Example #1
0
def mp_annual_gain_rate_AGC_BGC_all_forest_types(sensit_type,
                                                 tile_id_list,
                                                 run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.model_extent_dir: [cn.pattern_model_extent],
        cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove],
        cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove],
        cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US],
        cn.annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_annual_gain_AGC_natrl_forest_young],
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults],
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe],
        cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked],
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir:
        [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        cn.stdev_annual_gain_AGC_natrl_forest_young_dir:
        [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young],
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.removal_forest_type_dir, cn.annual_gain_AGC_all_types_dir,
        cn.annual_gain_BGC_all_types_dir, cn.annual_gain_AGC_BGC_all_types_dir,
        cn.stdev_annual_gain_AGC_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_removal_forest_type, cn.pattern_annual_gain_AGC_all_types,
        cn.pattern_annual_gain_BGC_all_types,
        cn.pattern_annual_gain_AGC_BGC_all_types,
        cn.pattern_stdev_annual_gain_AGC_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 13
        else:
            processes = 17  # 30 processors > 740 GB peak; 18 = >740 GB peak; 16 = 660 GB peak; 17 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Removal factor processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_AGC_BGC_all_forest_types.
                annual_gain_rate_AGC_BGC_all_forest_types,
                output_pattern_list=output_pattern_list,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     annual_gain_rate_AGC_BGC_all_forest_types.annual_gain_rate_AGC_BGC_all_forest_types(tile_id, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Example #2
0
def mp_net_flux(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.cumul_gain_AGCO2_BGCO2_all_types_dir,
            sensit_type=sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil]
    }

    # List of output directories and output file name patterns
    output_dir_list = [cn.net_flux_dir]
    output_pattern_list = [cn.pattern_net_flux]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 32  # 32 processors = XXX GB peak
        else:
            processes = 40  # 38 = 690 GB peak; 40 = 715 GB peak
    else:
        processes = 9
    uu.print_log('Net flux max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(net_flux.net_calc, pattern=pattern, sensit_type=sensit_type),
        tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     net_flux.net_calc(tile_id, output_pattern_list[0], sensit_type)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Example #3
0
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None):

    os.chdir(cn.docker_base_dir)

    if (sensit_type != 'std') & (carbon_pool_extent != 'loss'):
        uu.exception_log("Sensitivity analysis run must use 'loss' extent")

    # Checks the validity of the carbon_pool_extent argument
    if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")


    # If a full model run is specified, the correct set of tiles for the particular script is listed.
    # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'):
        # Lists the tiles that have both model extent and loss pixels
        model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)
        loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type)
        uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:")
        tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list))

    # For runs generating carbon pools in 2000, all model extent tiles are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'):
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)


    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    output_dir_list = []
    output_pattern_list = []

    # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated
    if '2000' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                           cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000,
                               cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000]

        # Files to download for this script
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated
    if 'loss' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir,
                           cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000,
                               cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year]

        # Files to download for this script. This has the same items as the download_dict for 2000 pools plus
        # other tiles.
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
            cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types]
       }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]


    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)
    else:
        uu.print_log("Output directory list for standard model:", output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    pd.options.mode.chained_assignment = None

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first')

    mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                         cn.below_to_above_trop_dry_mang,
                                                                                         cn.below_to_above_trop_wet_mang,
                                                                                         cn.below_to_above_subtrop_mang)

    mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                              cn.deadwood_to_above_trop_dry_mang,
                                                                                              cn.deadwood_to_above_trop_wet_mang,
                                                                                              cn.deadwood_to_above_subtrop_mang)

    mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                            cn.litter_to_above_trop_dry_mang,
                                                                                            cn.litter_to_above_trop_wet_mang,
                                                                                            cn.litter_to_above_subtrop_mang)

    uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 16  # 16 processors = XXX GB peak
            else:
                processes = 20  # 25 processors > 750 GB peak; 16 = 560 GB peak;
                # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak
        else: # For 2000, or loss & 2000
            processes = 15  # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak
    else:
        processes = 2
    uu.print_log('AGC loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_AGC,
                     sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
    else:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
        uu.upload_final_set(output_dir_list[6], output_pattern_list[6])
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles")
    tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent))
    # Creates a single filename pattern to pass to the multiprocessor call
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 30  # 30 processors = XXX GB peak
            else:
                processes = 38  # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak
        else: # For 2000, or loss & 2000
            processes = 30  # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak
    else:
        processes = 2
    uu.print_log('BGC max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                     carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
    else:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
        uu.upload_final_set(output_dir_list[7], output_pattern_list[7])
    uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:
        uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000)))

        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 14  # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak
        else: # For 2000, or loss & 2000
            ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced.
            ### There wouldn't have been enough room for all deadwood and litter otherwise.
            ### For example, when deadwood and litter generation started getting up to around 50N, I deleted
            ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S.
            processes = 16  # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Deadwood and litter max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                carbon_pool_extent=carbon_pool_extent,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
    else:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
        uu.upload_final_set(output_dir_list[8], output_pattern_list[8])  # deadwood
        uu.upload_final_set(output_dir_list[9], output_pattern_list[9])  # litter
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles")
    tiles_to_delete = []
    tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    if 'loss' in carbon_pool_extent:

        uu.print_log("Creating tiles of soil carbon in loss extent")

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            pattern = output_pattern_list[4]
        else:
            pattern = output_pattern_list[10]

        if cn.count == 96:
            # More processors can be used for loss carbon pools than for 2000 carbon pools
            if carbon_pool_extent == 'loss':
                if sensit_type == 'biomass_swap':
                    processes = 36  # 36 processors = XXX GB peak
                else:
                    processes = 42  # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak
            else: # For 2000, or loss & 2000
                processes = 12  # 12 processors = XXX GB peak
        else:
            processes = 2
        uu.print_log('Soil carbon loss year max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern,
                         sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type)

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            uu.upload_final_set(output_dir_list[4], output_pattern_list[4])
        else:
            uu.upload_final_set(output_dir_list[10], output_pattern_list[10])

        uu.check_storage()

    if '2000' in carbon_pool_extent:
        uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.")
        uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:

        # Files to download for total C 2000. Previously deleted to save space
        download_dict = {
            cn.BGC_2000_dir: [cn.pattern_BGC_2000],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000]
        }

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    uu.print_log("Creating tiles of total carbon")
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 14  # 14 processors = XXX GB peak
            else:
                processes = 18  # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 12  # 12 processors = XXX GB peak
    else:
        processes = 2
    uu.print_log('Total carbon loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
    else:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
        uu.upload_final_set(output_dir_list[11], output_pattern_list[11])
    uu.check_storage()
Example #4
0
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Example #5
0
def mp_create_soil_C(tile_id_list, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.WHRC_biomass_2000_unmasked_dir,
            cn.mangrove_biomass_2000_dir,
            set3=cn.gain_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.soil_C_full_extent_2000_non_mang_dir,
        cn.soil_C_full_extent_2000_dir, cn.stdev_soil_C_full_extent_2000_dir
    ]
    output_pattern_list = [
        cn.pattern_soil_C_full_extent_2000_non_mang,
        cn.pattern_soil_C_full_extent_2000, cn.pattern_stdev_soil_C_full_extent
    ]

    # ### Soil carbon density
    #
    # uu.print_log("Downloading mangrove soil C rasters")
    # uu.s3_file_download(os.path.join(cn.mangrove_soil_C_dir, cn.name_mangrove_soil_C), cn.docker_base_dir, sensit_type)
    #
    # # For downloading all tiles in the input folders.
    # input_files = [cn.mangrove_biomass_2000_dir]
    #
    # for input in input_files:
    #     uu.s3_folder_download(input, cn.docker_base_dir, sensit_type)
    #
    # # Download raw mineral soil C density tiles.
    # # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # # There are 12951 tiles and it takes about 3 hours to download them!
    # cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
    #                '--accept', '*.tif', '{}'.format(cn.mineral_soil_C_url)]
    # uu.log_subprocess_output_full(cmd)
    #
    # uu.print_log("Unzipping mangrove soil C rasters...")
    # cmd = ['unzip', '-j', cn.name_mangrove_soil_C, '-d', cn.docker_base_dir]
    # uu.log_subprocess_output_full(cmd)
    #
    # # Mangrove soil receives precedence over mineral soil
    # uu.print_log("Making mangrove soil C vrt...")
    # check_call('gdalbuildvrt mangrove_soil_C.vrt *{}*.tif'.format(cn.pattern_mangrove_soil_C_raw), shell=True)
    # uu.print_log("Done making mangrove soil C vrt")
    #
    # uu.print_log("Making mangrove soil C tiles...")
    #
    # if cn.count == 96:
    #     processes = 32   # 32 processors = 570 GB peak
    # else:
    #     processes = int(cn.count/3)
    # uu.print_log('Mangrove soil C max processors=', processes)
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(create_soil_C.create_mangrove_soil_C, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # # For single processor use
    # # for tile_id in tile_id_list:
    # #
    # #     create_soil_C.create_mangrove_soil_C(tile_id, no_Upload)
    #
    # uu.print_log('Done making mangrove soil C tiles', '\n')

    uu.print_log("Making mineral soil C vrt...")
    check_call('gdalbuildvrt mineral_soil_C.vrt *{}*'.format(
        cn.pattern_mineral_soil_C_raw),
               shell=True)
    uu.print_log("Done making mineral soil C vrt")

    # Creates mineral soil C density tiles
    source_raster = 'mineral_soil_C.vrt'
    out_pattern = cn.pattern_soil_C_full_extent_2000_non_mang
    dt = 'Int16'
    if cn.count == 96:
        processes = 50  # 32 processors = 100 GB peak; 50 = XXX GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log(
        "Creating mineral soil C density tiles with {} processors...".format(
            processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_soil_C.create_mineral_soil_C(tile_id)

    uu.print_log("Done making non-mangrove soil C tiles", "\n")

    output_pattern = cn.pattern_soil_C_full_extent_2000_non_mang
    processes = 50  # 50 processors = 550 GB peak
    uu.print_log(
        "Checking for empty tiles of {0} pattern with {1} processors...".
        format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_delete_if_empty, output_pattern=output_pattern),
        tile_id_list)
    pool.close()
    pool.join()

    # If no_upload flag is not activated, output is uploaded to s3
    if not no_upload:

        uu.print_log("Uploading non-mangrove soil C density tiles")
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
# Removes rows with duplicate codes (N. and S. America for the same ecozone)
gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                   keep='first')

# Converts the continent-ecozone codes and young forest gain rates to a dictionary
gain_table_dict = pd.Series(
    gain_table_simplified.growth_secondary_less_20.values,
    index=gain_table_simplified.gainEcoCon).to_dict()

# Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
gain_table_dict[0] = 0

# This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
# It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
num_of_processes = 20  # This works on an r4.16xlarge spot machine (not m4)
pool = Pool(num_of_processes)
pool.map(
    partial(forest_age_category_natrl_forest.forest_age_category,
            gain_table_dict=gain_table_dict), biomass_tile_list)
pool.close()
pool.join()

# # For single processor use
# for tile in biomass_tile_list:
#
#     forest_age_category_natrl_forest.forest_age_category(tile, gain_table_dict)

print "Tiles processed. Uploading to s3 now..."
uu.upload_final_set(cn.age_cat_natrl_forest_dir,
                    cn.pattern_age_cat_natrl_forest)
Example #7
0
def mp_burn_year(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # Step 1:
    # Downloads the latest year of raw burn area hdfs to the spot machine.
    # This step requires using osgeo/gdal:ubuntu-full-X.X.X Docker image because the small image doesn't have an
    # hdf driver in gdal.
    file_name = "*.hdf"
    raw_source = '{0}/20{1}'.format(cn.burn_area_raw_ftp, cn.loss_years)
    cmd = [
        'wget', '-r', '--ftp-user=user', '--ftp-password=burnt_data',
        '--accept', file_name
    ]
    cmd += ['--no-directories', '--no-parent', raw_source]
    uu.log_subprocess_output_full(cmd)

    # Uploads the latest year of raw burn area hdfs to s3
    cmd = [
        'aws', 's3', 'cp', '.', cn.burn_year_hdf_raw_dir, '--recursive',
        '--exclude', '*', '--include', '*hdf'
    ]
    uu.log_subprocess_output_full(cmd)

    global_grid_hv = [
        "h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10",
        "h01v11", "h02v06", "h02v08", "h02v09", "h02v10", "h02v11", "h03v06",
        "h03v07", "h03v09", "h03v10", "h03v11", "h04v09", "h04v10", "h04v11",
        "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03", "h07v05",
        "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07",
        "h08v08", "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05",
        "h09v06", "h09v07", "h09v08", "h09v09", "h10v02", "h10v03", "h10v04",
        "h10v05", "h10v06", "h10v07", "h10v08", "h10v09", "h10v10", "h10v11",
        "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
        "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04",
        "h12v05", "h12v07", "h12v08", "h12v09", "h12v10", "h12v11", "h12v12",
        "h12v13", "h13v02", "h13v03", "h13v04", "h13v08", "h13v09", "h13v10",
        "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03", "h14v04",
        "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05",
        "h15v07", "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08",
        "h16v09", "h17v02", "h17v03", "h17v04", "h17v05", "h17v06", "h17v07",
        "h17v08", "h17v10", "h17v12", "h17v13", "h18v02", "h18v03", "h18v04",
        "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
        "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10",
        "h19v11", "h19v12", "h20v02", "h20v03", "h20v04", "h20v05", "h20v06",
        "h20v07", "h20v08", "h20v09", "h20v10", "h20v11", "h20v12", "h20v13",
        "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07", "h21v08",
        "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04",
        "h22v05", "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11",
        "h22v13", "h23v02", "h23v03", "h23v04", "h23v05", "h23v06", "h23v07",
        "h23v08", "h23v09", "h23v10", "h23v11", "h24v02", "h24v03", "h24v04",
        "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
        "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03",
        "h26v04", "h26v05", "h26v06", "h26v07", "h26v08", "h27v03", "h27v04",
        "h27v05", "h27v06", "h27v07", "h27v08", "h27v09", "h27v10", "h27v11",
        "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07", "h28v08",
        "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05",
        "h29v06", "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12",
        "h29v13", "h30v06", "h30v07", "h30v08", "h30v09", "h30v10", "h30v11",
        "h30v12", "h30v13", "h31v06", "h31v07", "h31v08", "h31v09", "h31v10",
        "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
        "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11",
        "h34v07", "h34v08", "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"
    ]

    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile
    uu.print_log(
        "Stacking hdf into MODIS burned area tifs by year and MODIS hv tile..."
    )

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # For single processor use
    for hv_tile in global_grid_hv:
        stack_ba_hv.stack_ba_hv(hv_tile)

    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2020 goes from 2001 to 2019)
    for year in range(2019, 2020):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = [
            'aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir,
            year_tifs_folder
        ]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = [
            'gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr',
            '.00025', '.00025', '-overwrite', vrt_name, vrt_wgs84
        ]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count - 5)
        pool.map(clip_year_tiles.clip_year_tiles, tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year)

        uu.print_log(
            "Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log(
        "Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned areaa), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(hansen_burnyear_final.hansen_burnyear, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_create_soil_C(tile_id_list, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.gain_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.soil_C_full_extent_2000_non_mang_dir, cn.soil_C_full_extent_2000_dir,
                       cn.stdev_soil_C_full_extent_2000_dir]
    output_pattern_list = [cn.pattern_soil_C_full_extent_2000_non_mang, cn.pattern_soil_C_full_extent_2000,
                           cn.pattern_stdev_soil_C_full_extent]


    ### Soil carbon density

    uu.print_log("Downloading mangrove soil C rasters")
    uu.s3_file_download(os.path.join(cn.mangrove_soil_C_dir, cn.name_mangrove_soil_C), cn.docker_base_dir, sensit_type)

    # For downloading all tiles in the input folders.
    input_files = [cn.mangrove_biomass_2000_dir]

    for input in input_files:
        uu.s3_folder_download(input, cn.docker_base_dir, sensit_type)

    # Download raw mineral soil C density tiles.
    # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # There are 12951 tiles and it takes about 3 hours to download them!
    cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
                   '--accept', '*.tif', '{}'.format(cn.mineral_soil_C_url)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping mangrove soil C rasters...")
    cmd = ['unzip', '-j', cn.name_mangrove_soil_C, '-d', cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    # Mangrove soil receives precedence over mineral soil
    uu.print_log("Making mangrove soil C vrt...")
    check_call('gdalbuildvrt mangrove_soil_C.vrt *{}*.tif'.format(cn.pattern_mangrove_soil_C_raw), shell=True)
    uu.print_log("Done making mangrove soil C vrt")

    uu.print_log("Making mangrove soil C tiles...")

    if cn.count == 96:
        processes = 32   # 32 processors = 570 GB peak
    else:
        processes = int(cn.count/3)
    uu.print_log('Mangrove soil C max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_soil_C.create_mangrove_soil_C, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_soil_C.create_mangrove_soil_C(tile_id, no_Upload)

    uu.print_log('Done making mangrove soil C tiles', '\n')

    uu.print_log("Making mineral soil C vrt...")
    check_call('gdalbuildvrt mineral_soil_C.vrt *{}*'.format(cn.pattern_mineral_soil_C_raw), shell=True)
    uu.print_log("Done making mineral soil C vrt")

    # Creates mineral soil C density tiles
    source_raster = 'mineral_soil_C.vrt'
    out_pattern = cn.pattern_soil_C_full_extent_2000_non_mang
    dt = 'Int16'
    if cn.count == 96:
        processes = 80  # 32 processors = 100 GB peak; 50 = 160 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating mineral soil C density tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt,
                     no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_soil_C.create_mineral_soil_C(tile_id)

    uu.print_log("Done making non-mangrove soil C tiles", "\n")

    output_pattern = cn.pattern_soil_C_full_extent_2000_non_mang
    processes = 60 # 50 processors = ~450 GB peak; 60 = XXX GB peak
    uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
    pool.close()
    pool.join()

    # If no_upload flag is not activated, output is uploaded to s3
    if not no_upload:

        uu.print_log("Uploading non-mangrove soil C density tiles")
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])


    uu.print_log("Making combined (mangrove & non-mangrove) soil C tiles...")

    if cn.count == 96:
        processes = 45   # 45 processors = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log('Combined soil C max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_soil_C.create_combined_soil_C, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_list:
    #
    #     create_soil_C.create_combined_soil_C(tile_id, no_upload)

    uu.print_log("Done making combined soil C tiles")

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.print_log("Uploading combined soil C density tiles")
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])


    # Need to delete soil c density rasters because they have the same pattern as the standard deviation rasters
    uu.print_log("Deleting raw soil C density rasters")
    c_stocks = glob.glob('*{}*'.format(cn.pattern_soil_C_full_extent_2000))
    for c_stock in c_stocks:
        os.remove(c_stock)


    ### Soil carbon density uncertainty

    # Separate directories for the 5% CI and 95% CI
    dir_CI05 = '{0}{1}'.format(cn.docker_base_dir, 'CI05/')
    dir_CI95 = '{0}{1}'.format(cn.docker_base_dir, 'CI95/')
    vrt_CI05 = 'mineral_soil_C_CI05.vrt'
    vrt_CI95 = 'mineral_soil_C_CI95.vrt'
    soil_C_stdev_global = 'soil_C_stdev.tif'

    # Download raw mineral soil C density 5% CI tiles
    # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # Like soil C density rasters, there are 12951 tifs and they take about 3 hours to download.
    os.mkdir(dir_CI05)

    cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
                   '--directory-prefix={}'.format(dir_CI05),
                   '--accept', '*.tif', '{}'.format(cn.CI5_mineral_soil_C_url)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making mineral soil C 5% CI vrt...")

    check_call('gdalbuildvrt {0} {1}*{2}*'.format(vrt_CI05, dir_CI05, cn.pattern_uncert_mineral_soil_C_raw), shell=True)
    uu.print_log("Done making mineral soil C CI05 vrt")

    # Download raw mineral soil C density 5% CI tiles
    # Like soil C density rasters, there are 12951 tifs and they take about 3 hours to download.
    os.mkdir(dir_CI95)

    cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
                   '--directory-prefix={}'.format(dir_CI95),
                   '--accept', '*.tif', '{}'.format(cn.CI95_mineral_soil_C_url)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making mineral soil C 95% CI vrt...")

    check_call('gdalbuildvrt {0} {1}*{2}*'.format(vrt_CI95, dir_CI95, cn.pattern_uncert_mineral_soil_C_raw), shell=True)
    uu.print_log("Done making mineral soil C CI95 vrt")


    uu.print_log("Creating raster of standard deviations in soil C at native SoilGrids250 resolution. This may take a while...")
    # global tif with approximation of the soil C stanard deviation (based on the 5% and 95% CIs)

    # This takes about 20 minutes. It doesn't show any progress until the last moment, when it quickly counts
    # up to 100.
    calc = '--calc=(A-B)/3'
    out_filearg = '--outfile={}'.format(soil_C_stdev_global)
    cmd = ['gdal_calc.py', '-A', vrt_CI95, '-B', vrt_CI05, calc, out_filearg,
           '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type=Float32']
    uu.log_subprocess_output_full(cmd)

    uu.print_log("{} created.".format(soil_C_stdev_global))


    # Creates soil carbon 2000 density standard deviation tiles
    out_pattern = cn.pattern_stdev_soil_C_full_extent
    dt = 'Float32'
    source_raster = soil_C_stdev_global
    if cn.count == 96:
        processes = 56  # 32 processors = 290 GB peak; 56 = XXX GB peal
    else:
        processes = 2
    uu.print_log("Creating mineral soil C stock stdev tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt,
                     no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()


    output_pattern = cn.pattern_stdev_soil_C_full_extent
    processes = 50 # 50 processors = 550 GB peak
    uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
    pool.close()
    pool.join()


    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(
                output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 50 processors = XXX GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.print_log("Uploading soil C density standard deviation tiles")
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])
# # For copying individual tiles to spot machine for testing
# for tile in tile_list:
#
#     uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_AGB_planted_forest_non_mangrove_dir, tile, cn.pattern_annual_gain_AGB_planted_forest_non_mangrove), '.')      # annual AGB gain rate tiles
#     uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_BGB_planted_forest_non_mangrove_dir, tile, cn.pattern_annual_gain_BGB_planted_forest_non_mangrove), '.')      # annual AGB gain rate tiles
#     uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.gain_year_count_planted_forest_non_mangrove_dir, tile, cn.pattern_gain_year_count_planted_forest_non_mangrove), '.')      # number of years with gain tiles

count = multiprocessing.cpu_count()
pool = multiprocessing.Pool(count / 3)
# Calculates cumulative aboveground carbon gain in non-mangrove planted forests
pool.map(cumulative_gain_planted_forest.cumulative_gain_AGC, tile_list)

# Calculates cumulative belowground carbon gain in non-mangrove planted forests
pool.map(cumulative_gain_planted_forest.cumulative_gain_BGC, tile_list)
pool.close()
pool.join()

# # For single processor use
# for tile in tile_list:
#
#     cumulative_gain_planted_forest.cumulative_gain_AGC(tile)
#
# for tile in tile_list:
#
#     cumulative_gain_planted_forest.cumulative_gain_BGC(tile)

uu.upload_final_set(cn.cumul_gain_AGC_planted_forest_non_mangrove_dir,
                    cn.pattern_cumul_gain_AGC_planted_forest_non_mangrove)
uu.upload_final_set(cn.cumul_gain_BGC_planted_forest_non_mangrove_dir,
                    cn.pattern_cumul_gain_BGC_planted_forest_non_mangrove)
def main():

    no_upload = False

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # Files to download for this script.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults]
    }

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['50N_130W'] # test tiles

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.US_annual_gain_AGB_natrl_forest_dir,
        cn.US_annual_gain_BGB_natrl_forest_dir
    ]
    output_pattern_list = [
        cn.pattern_US_annual_gain_AGB_natrl_forest,
        cn.pattern_US_annual_gain_BGB_natrl_forest
    ]

    # By definition, this script is for US-specific removals
    sensit_type = 'US_removals'

    # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir)

    # Only creates FIA region tiles if they don't already exist on s3.
    if FIA_regions_tile_count == 16:
        uu.print_log("FIA region tiles already created. Copying to s3 now...")
        uu.s3_flexible_download(cn.FIA_regions_processed_dir,
                                cn.pattern_FIA_regions_processed,
                                cn.docker_base_dir, 'std', 'all')

    else:
        uu.print_log(
            "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw),
            cn.docker_base_dir, 'std')

        cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # Converts the region shapefile to Hansen tiles
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(US_removal_rates.prep_FIA_regions, tile_id_list)

    # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script.
    US_tile_list = uu.tile_list_spot_machine(
        cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed))
    US_tile_id_list = [i[0:8] for i in US_tile_list]
    # US_tile_id_list = ['50N_130W']    # For testing
    uu.print_log(US_tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(US_tile_id_list))) +
        "\n")

    # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US.
    US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir)

    # Only creates FIA forest age category tiles if they don't already exist on s3.
    if US_age_tile_count == 16:
        uu.print_log(
            "Forest age category tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir,
                                cn.pattern_US_forest_age_cat_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.US_forest_age_cat_raw_dir,
                         cn.name_US_forest_age_cat_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest age category raster to Hansen tiles
        source_raster = cn.name_US_forest_age_cat_raw
        out_pattern = cn.pattern_US_forest_age_cat_processed
        dt = 'Int16'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), US_tile_id_list)

        uu.upload_final_set(cn.US_forest_age_cat_processed_dir,
                            cn.pattern_US_forest_age_cat_processed)

    # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_forest_group_tile_count = uu.count_tiles_s3(
        cn.FIA_forest_group_processed_dir)

    # Only creates FIA forest group tiles if they don't already exist on s3.
    if FIA_forest_group_tile_count == 16:
        uu.print_log(
            "FIA forest group tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.FIA_forest_group_processed_dir,
                                cn.pattern_FIA_forest_group_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_forest_group_raw_dir,
                         cn.name_FIA_forest_group_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest group raster to Hansen forest group tiles
        source_raster = cn.name_FIA_forest_group_raw
        out_pattern = cn.pattern_FIA_forest_group_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), US_tile_id_list)

        uu.upload_final_set(cn.FIA_forest_group_processed_dir,
                            cn.pattern_FIA_forest_group_processed)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                US_tile_id_list)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Imports the table with the region-group-age AGB removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_for_model")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type), US_tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in US_tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict,
    #                                           output_pattern_list, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_gain_year_count_all_forest_types(sensit_type,
                                        tile_id_list,
                                        run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script. 'true'/'false' says whether the input directory and pattern should be
    # changed for a sensitivity analysis. This does not need to change based on what run is being done;
    # this assignment should be true for all sensitivity analyses and the standard model.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.model_extent_dir: [cn.pattern_model_extent]
    }

    # Adds the correct loss tile to the download dictionary depending on the model run
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [
            cn.pattern_Brazil_annual_loss_processed
        ]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [
            cn.pattern_Mekong_loss_processed
        ]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]

    output_dir_list = [cn.gain_year_count_dir]
    output_pattern_list = [cn.pattern_gain_year_count]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # Creates gain year count tiles using only pixels that had only loss
    # count/3 maxes out at about 300 GB
    if cn.count == 96:
        processes = 90  # 66 = 310 GB peak; 75 = 380 GB peak; 90 = 480 GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log('Gain year count loss only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            gain_year_count_all_forest_types.create_gain_year_count_loss_only,
            sensit_type=sensit_type), tile_id_list)

    if cn.count == 96:
        processes = 90  # 66 = 330 GB peak; 75 = 380 GB peak; 90 = 530 GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log('Gain year count gain only pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(
            partial(gain_year_count_all_forest_types.
                    create_gain_year_count_gain_only_maxgain,
                    sensit_type=sensit_type), tile_id_list)
    if sensit_type == 'legal_Amazon_loss':
        uu.print_log(
            "Gain-only pixels do not apply to legal_Amazon_loss sensitivity analysis. Skipping this step."
        )
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(
            partial(gain_year_count_all_forest_types.
                    create_gain_year_count_gain_only_standard,
                    sensit_type=sensit_type), tile_id_list)

    # Creates gain year count tiles using only pixels that had neither loss nor gain pixels
    if cn.count == 96:
        processes = 90  # 66 = 360 GB peak; 88 = 430 GB peak; 90 = 510 GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log('Gain year count no change pixels max processors=', processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'legal_Amazon_loss':
        pool.map(
            partial(gain_year_count_all_forest_types.
                    create_gain_year_count_no_change_legal_Amazon_loss,
                    sensit_type=sensit_type), tile_id_list)
    else:
        pool.map(
            partial(gain_year_count_all_forest_types.
                    create_gain_year_count_no_change_standard,
                    sensit_type=sensit_type), tile_id_list)

    if cn.count == 96:
        processes = 90  # 66 = 370 GB peak; 88 = 430 GB peak; 90 = 550 GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log('Gain year count loss & gain pixels max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    if sensit_type == 'maxgain':
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(
            partial(gain_year_count_all_forest_types.
                    create_gain_year_count_loss_and_gain_maxgain,
                    sensit_type=sensit_type), tile_id_list)
    else:
        # Creates gain year count tiles using only pixels that had only gain
        pool.map(
            partial(gain_year_count_all_forest_types.
                    create_gain_year_count_loss_and_gain_standard,
                    sensit_type=sensit_type), tile_id_list)

    # Combines the four above gain year count tiles for each Hansen tile into a single output tile
    if cn.count == 96:
        processes = 84  # 28 processors = 220 GB peak; 62 = 470 GB peak; 78 = 600 GB peak; 80 = 620 GB peak; 84 = XXX GB peak
    elif cn.count < 4:
        processes = 1
    else:
        processes = int(cn.count / 4)
    uu.print_log('Gain year count gain merge all combos max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(gain_year_count_all_forest_types.create_gain_year_count_merge,
                pattern=pattern,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_loss_only(tile_id)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_maxgain(tile_id)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_gain_only_standard(tile_id)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_no_change_standard(tile_id)
    #
    # for tile_id in tile_id_list:
    #     if sensit_type == 'maxgain':
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_maxgain(tile_id)
    #     else:
    #         gain_year_count_all_forest_types.create_gain_year_count_loss_and_gain_standard(tile_id)
    #
    # for tile_id in tile_id_list:
    #     gain_year_count_all_forest_types.create_gain_year_count_merge(tile_id, pattern, sensit_type)

    # Intermediate output tiles for checking outputs
    uu.upload_final_set(output_dir_list[0], "growth_years_loss_only")
    uu.upload_final_set(output_dir_list[0], "growth_years_gain_only")
    uu.upload_final_set(output_dir_list[0], "growth_years_no_change")
    uu.upload_final_set(output_dir_list[0], "growth_years_loss_and_gain")

    # This is the final output used later in the model
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
pool.join()

# # For single processor use
# for tile in tile_list:
#     gain_year_count_natrl_forest.create_gain_year_count_loss_only(tile)
#
# for tile in tile_list:
#     gain_year_count_natrl_forest.create_gain_year_count_gain_only(tile)
#
# for tile in tile_list:
#     gain_year_count_natrl_forest.create_gain_year_count_no_change(tile)
#
# for tile in tile_list:
#     gain_year_count_natrl_forest.create_gain_year_count_loss_and_gain(tile)
#
# for tile in tile_list:
#     gain_year_count_natrl_forest.create_gain_year_count_merge(tile)

# Intermediate output tiles for checking outputs
uu.upload_final_set(cn.gain_year_count_natrl_forest_dir,
                    "growth_years_loss_only")
uu.upload_final_set(cn.gain_year_count_natrl_forest_dir,
                    "growth_years_gain_only")
uu.upload_final_set(cn.gain_year_count_natrl_forest_dir,
                    "growth_years_no_change")
uu.upload_final_set(cn.gain_year_count_natrl_forest_dir,
                    "growth_years_loss_and_gain")

# This is the final output used later in the model
uu.upload_final_set(cn.gain_year_count_natrl_forest_dir,
                    cn.pattern_gain_year_count_natrl_forest)
def mp_burn_year(tile_id_list, run_date = None, no_upload = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # A date can optionally be provided.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    global_grid_hv = ["h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10", "h01v11", "h02v06",
                      "h02v08", "h02v09", "h02v10", "h02v11", "h03v06", "h03v07", "h03v09", "h03v10", "h03v11",
                      "h04v09", "h04v10", "h04v11", "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03",
                      "h07v05", "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07", "h08v08",
                      "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05", "h09v06", "h09v07", "h09v08",
                      "h09v09", "h10v02", "h10v03", "h10v04", "h10v05", "h10v06", "h10v07", "h10v08", "h10v09",
                      "h10v10", "h10v11", "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
                      "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04", "h12v05", "h12v07",
                      "h12v08", "h12v09", "h12v10", "h12v11", "h12v12", "h12v13", "h13v02", "h13v03", "h13v04",
                      "h13v08", "h13v09", "h13v10", "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03",
                      "h14v04", "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05", "h15v07",
                      "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08", "h16v09", "h17v02", "h17v03",
                      "h17v04", "h17v05", "h17v06", "h17v07", "h17v08", "h17v10", "h17v12", "h17v13", "h18v02",
                      "h18v03", "h18v04", "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
                      "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10", "h19v11", "h19v12",
                      "h20v02", "h20v03", "h20v04", "h20v05", "h20v06", "h20v07", "h20v08", "h20v09", "h20v10",
                      "h20v11", "h20v12", "h20v13", "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07",
                      "h21v08", "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04", "h22v05",
                      "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11", "h22v13", "h23v02", "h23v03",
                      "h23v04", "h23v05", "h23v06", "h23v07", "h23v08", "h23v09", "h23v10", "h23v11", "h24v02",
                      "h24v03", "h24v04", "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
                      "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03", "h26v04", "h26v05",
                      "h26v06", "h26v07", "h26v08", "h27v03", "h27v04", "h27v05", "h27v06", "h27v07", "h27v08",
                      "h27v09", "h27v10", "h27v11", "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07",
                      "h28v08", "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05", "h29v06",
                      "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12", "h29v13", "h30v06", "h30v07",
                      "h30v08", "h30v09", "h30v10", "h30v11", "h30v12", "h30v13", "h31v06", "h31v07", "h31v08",
                      "h31v09", "h31v10", "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
                      "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11", "h34v07", "h34v08",
                      "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"]


    # Step 1: download hdf files for relevant year(s) from sftp site.
    # This only needs to be done for the most recent year of data.

    '''
    Downloading the hdf files from the sftp burned area site is done outside the script in the sftp shell on the command line.
    This will download all the 2020 hdfs to the spot machine. It will take a few minutes before the first
    hdf is downloaded but then it should go quickly.
    Change 2020 to other year for future years of downloads. 
    https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf, page 24, section 4.1.3

    sftp [email protected]
    [For password] burnt
    cd data/MODIS/C6/MCD64A1/HDF
    ls [to check that it's the folder with all the tile folders]
    get h??v??/MCD64A1.A2020*
    bye    //exits the stfp shell
    '''

    # Uploads the latest year of raw burn area hdfs to s3.
    # All hdfs go in this folder
    cmd = ['aws', 's3', 'cp', '{0}/burn_date/'.format(cn.docker_app), cn.burn_year_hdf_raw_dir, '--recursive', '--exclude', '*', '--include', '*hdf']
    uu.log_subprocess_output_full(cmd)


    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile.
    # This only needs to be done for the most recent year of data (set in stach_ba_hv).
    uu.print_log("Stacking hdf into MODIS burned area tifs by year and MODIS hv tile...")

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # # For single processor use
    # for hv_tile in global_grid_hv:
    #     stack_ba_hv.stack_ba_hv(hv_tile)


    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2021 goes from 2001 to 2020).
    # This only needs to be done for the most recent year of data.
    # NOTE: The first time I ran this for the 2020 TCL update, I got an error about uploading the log to s3
    # after most of the tiles were processed. I didn't know why it happened, so I reran the step and it went fine.
    for year in range(2020, 2021):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = ['aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir, year_tifs_folder]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = ['gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr', '.00025', '.00025', '-overwrite',
               vrt_name, vrt_wgs84]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count-5)
        pool.map(partial(clip_year_tiles.clip_year_tiles, no_upload=no_upload), tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year, no_upload)

        uu.print_log("Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss
    # or preceded TCL by one year.
    # This needs to be done on all years each time burned area is updated.

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log("Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned area), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(partial(hansen_burnyear_final.hansen_burnyear, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )
Example #15
0
def mp_gross_removals_all_forest_types(sensit_type,
                                       tile_id_list,
                                       run_date=None,
                                       no_upload=True):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        # tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)
        gain_year_count_tile_id_list = uu.tile_list_s3(cn.gain_year_count_dir,
                                                       sensit_type=sensit_type)
        annual_removals_tile_id_list = uu.tile_list_s3(
            cn.annual_gain_AGC_all_types_dir, sensit_type=sensit_type)
        tile_id_list = list(
            set(gain_year_count_tile_id_list).intersection(
                annual_removals_tile_id_list))
        uu.print_log(
            "Gross removals tile_id_list is combination of gain_year_count and annual_removals tiles:"
        )

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.annual_gain_BGC_all_types_dir:
        [cn.pattern_annual_gain_BGC_all_types],
        cn.gain_year_count_dir: [cn.pattern_gain_year_count]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_all_types_dir, cn.cumul_gain_BGCO2_all_types_dir,
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_all_types,
        cn.pattern_cumul_gain_BGCO2_all_types,
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Calculates gross removals
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 18
        else:
            processes = 22  # 50 processors > 740 GB peak; 25 = >740 GB peak; 15 = 490 GB peak; 20 = 590 GB peak; 22 = 710 GB peak
    else:
        processes = 2
    uu.print_log('Gross removals max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            gross_removals_all_forest_types.gross_removals_all_forest_types,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type,
            no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     gross_removals_all_forest_types.gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, no_upload)

    # Checks the gross removals outputs for tiles with no data
    for output_pattern in output_pattern_list:
        if cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = 55  # 55 processors = 670 GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_inputs_for_C_pools(tile_id_list, run_date=None, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir,
        cn.precip_processed_dir
    ]
    output_pattern_list = [
        cn.pattern_bor_tem_trop_processed, cn.pattern_elevation,
        cn.pattern_precip
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads two of the raw input files for creating carbon emitted_pools
    input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

    for input in input_files:
        uu.s3_file_download('{}'.format(input), cn.docker_base_dir,
                            sensit_type)

    uu.print_log(
        "Unzipping boreal/temperate/tropical file (from FAO ecozones)")
    cmd = [
        'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d',
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Copying elevation (srtm) files")
    uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type)

    uu.print_log("Making elevation (srtm) vrt")
    check_call(
        'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True
    )  # I don't know how to convert this to output to the pipe, so just leaving as is

    # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it.
    processes = int(cn.count / 2)
    uu.print_log('Inputs for C emitted_pools max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_inputs_for_C_pools.create_input_files,
                no_upload=no_upload), tile_id_list)

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_inputs_for_C_pools.create_input_files(tile_id, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.print_log("Uploading output files")
        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_calculate_gross_emissions(sensit_type,
                                 tile_id_list,
                                 emitted_pools,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    folder = cn.docker_base_dir

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    # If the tile_list argument is an s3 folder, the list of tiles in it is created
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.AGC_emis_year_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year],
        cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year],
        cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000],
        cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000],
        cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000],
        cn.peat_mask_dir: [cn.pattern_peat_mask],
        cn.ifl_primary_processed_dir: [cn.pattern_ifl_primary],
        cn.planted_forest_type_unmasked_dir:
        [cn.pattern_planted_forest_type_unmasked],
        cn.drivers_processed_dir: [cn.pattern_drivers],
        cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        cn.burn_year_dir: [cn.pattern_burn_year]
    }

    # Special loss tiles for the Brazil and Mekong sensitivity analyses
    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_annual_loss_processed_dir] = [
            cn.pattern_Brazil_annual_loss_processed
        ]
    elif sensit_type == 'Mekong_loss':
        download_dict[cn.Mekong_loss_processed_dir] = [
            cn.pattern_Mekong_loss_processed
        ]
    else:
        download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Checks the validity of the emitted_pools argument
    if (emitted_pools not in ['soil_only', 'biomass_soil']):
        uu.exception_log(
            no_upload,
            'Invalid pool input. Please choose soil_only or biomass_soil.')

    # Checks if the correct c++ script has been compiled for the pool option selected
    if emitted_pools == 'biomass_soil':

        # Output file directories for biomass+soil. Must be in same order as output pattern directories.
        output_dir_list = [
            cn.gross_emis_commod_biomass_soil_dir,
            cn.gross_emis_shifting_ag_biomass_soil_dir,
            cn.gross_emis_forestry_biomass_soil_dir,
            cn.gross_emis_wildfire_biomass_soil_dir,
            cn.gross_emis_urban_biomass_soil_dir,
            cn.gross_emis_no_driver_biomass_soil_dir,
            cn.gross_emis_all_gases_all_drivers_biomass_soil_dir,
            cn.gross_emis_co2_only_all_drivers_biomass_soil_dir,
            cn.gross_emis_non_co2_all_drivers_biomass_soil_dir,
            cn.gross_emis_nodes_biomass_soil_dir
        ]

        output_pattern_list = [
            cn.pattern_gross_emis_commod_biomass_soil,
            cn.pattern_gross_emis_shifting_ag_biomass_soil,
            cn.pattern_gross_emis_forestry_biomass_soil,
            cn.pattern_gross_emis_wildfire_biomass_soil,
            cn.pattern_gross_emis_urban_biomass_soil,
            cn.pattern_gross_emis_no_driver_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil,
            cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil,
            cn.pattern_gross_emis_nodes_biomass_soil
        ]

        # Some sensitivity analyses have specific gross emissions scripts.
        # The rest of the sensitivity analyses and the standard model can all use the same, generic gross emissions script.
        if sensit_type in ['no_shifting_ag', 'convert_to_grassland']:
            # if os.path.exists('../carbon-budget/emissions/cpp_util/calc_gross_emissions_{}.exe'.format(sensit_type)):
            if os.path.exists('{0}/calc_gross_emissions_{1}.exe'.format(
                    cn.c_emis_compile_dst, sensit_type)):
                uu.print_log(
                    "C++ for {} already compiled.".format(sensit_type))
            else:
                uu.exception_log(
                    no_upload,
                    'Must compile {} model C++...'.format(sensit_type))
        else:
            if os.path.exists('{0}/calc_gross_emissions_generic.exe'.format(
                    cn.c_emis_compile_dst)):
                uu.print_log("C++ for generic emissions already compiled.")
            else:
                uu.exception_log(no_upload,
                                 'Must compile generic emissions C++...')

    elif (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        if os.path.exists('{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst)):
            uu.print_log("C++ for soil_only already compiled.")

            # Output file directories for soil_only. Must be in same order as output pattern directories.
            output_dir_list = [
                cn.gross_emis_commod_soil_only_dir,
                cn.gross_emis_shifting_ag_soil_only_dir,
                cn.gross_emis_forestry_soil_only_dir,
                cn.gross_emis_wildfire_soil_only_dir,
                cn.gross_emis_urban_soil_only_dir,
                cn.gross_emis_no_driver_soil_only_dir,
                cn.gross_emis_all_gases_all_drivers_soil_only_dir,
                cn.gross_emis_co2_only_all_drivers_soil_only_dir,
                cn.gross_emis_non_co2_all_drivers_soil_only_dir,
                cn.gross_emis_nodes_soil_only_dir
            ]

            output_pattern_list = [
                cn.pattern_gross_emis_commod_soil_only,
                cn.pattern_gross_emis_shifting_ag_soil_only,
                cn.pattern_gross_emis_forestry_soil_only,
                cn.pattern_gross_emis_wildfire_soil_only,
                cn.pattern_gross_emis_urban_soil_only,
                cn.pattern_gross_emis_no_driver_soil_only,
                cn.pattern_gross_emis_all_gases_all_drivers_soil_only,
                cn.pattern_gross_emis_co2_only_all_drivers_soil_only,
                cn.pattern_gross_emis_non_co2_all_drivers_soil_only,
                cn.pattern_gross_emis_nodes_soil_only
            ]

        else:
            uu.exception_log(no_upload, 'Must compile soil_only C++...')

    else:
        uu.exception_log(no_upload,
                         'Pool and/or sensitivity analysis option not valid')

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    uu.print_log(output_dir_list)
    uu.print_log(output_pattern_list)

    # The C++ code expects certain tiles for every input 10x10.
    # However, not all Hansen tiles have all of these inputs.
    # This function creates "dummy" tiles for all Hansen tiles that currently have non-existent tiles.
    # That way, the C++ script gets all the necessary input files.
    # If it doesn't get the necessary inputs, it skips that tile.
    uu.print_log("Making blank tiles for inputs that don't currently exist")
    # All of the inputs that need to have dummy tiles made in order to match the tile list of the carbon emitted_pools
    pattern_list = [
        cn.pattern_planted_forest_type_unmasked, cn.pattern_peat_mask,
        cn.pattern_ifl_primary, cn.pattern_drivers,
        cn.pattern_bor_tem_trop_processed, cn.pattern_burn_year,
        cn.pattern_climate_zone, cn.pattern_soil_C_emis_year_2000
    ]

    # textfile that stores the names of the blank tiles that are created for processing.
    # This will be iterated through to delete the tiles at the end of the script.
    uu.create_blank_tile_txt()

    for pattern in pattern_list:
        pool = multiprocessing.Pool(
            processes=80)  # 60 = 100 GB peak; 80 =  XXX GB peak
        pool.map(
            partial(uu.make_blank_tile,
                    pattern=pattern,
                    folder=folder,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

    # # For single processor use
    # for pattern in pattern_list:
    #     for tile in tile_id_list:
    #         uu.make_blank_tile(tile, pattern, folder, sensit_type)

    # Calculates gross emissions for each tile
    # count/4 uses about 390 GB on a r4.16xlarge spot machine.
    # processes=18 uses about 440 GB on an r4.16xlarge spot machine.
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 15  # 15 processors = XXX GB peak
        else:
            processes = 19  # 17 = 650 GB peak; 18 = 677 GB peak; 19 = 716 GB peak
    else:
        processes = 9
    uu.print_log('Gross emissions max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(calculate_gross_emissions.calc_emissions,
                emitted_pools=emitted_pools,
                sensit_type=sensit_type,
                folder=folder,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #       calculate_gross_emissions.calc_emissions(tile, emitted_pools, sensit_type, folder, no_upload)

    # Print the list of blank created tiles, delete the tiles, and delete their text file
    uu.list_and_delete_blank_tiles()

    for i in range(0, len(output_pattern_list)):
        pattern = output_pattern_list[i]

        uu.print_log("Adding metadata tags for pattern {}".format(pattern))

        if cn.count == 96:
            processes = 75  # 45 processors = ~30 GB peak; 55 = XXX GB peak; 75 = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(calculate_gross_emissions.add_metadata_tags,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     calculate_gross_emissions.add_metadata_tags(tile_id, pattern, sensit_type)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Example #18
0
def mp_prep_other_inputs(tile_id_list, run_date, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        ### BUG: THIS SHOULD ALSO INCLUDE cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir IN ITS LIST
        tile_id_list = uu.create_combined_tile_list(
            cn.WHRC_biomass_2000_unmasked_dir,
            cn.mangrove_biomass_2000_dir,
            set3=cn.gain_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")
    '''
    Before processing the driver, it needs to be reprojected from Goode Homolosine to WGS84. 
    gdal_warp is producing a weird output, so I did it in ArcMap for the 2020 update, 
    with the output cell size being 0.01 x 0.01 degree and the method being nearest.
    
    arcpy.ProjectRaster_management(in_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/FinalClassification_2020_v2__from_Jimmy_MacCarthy_20210323.tif", 
    out_raster="C:/GIS/Drivers of loss/2020_drivers__tif__from_Forrest_Follett_20210323/Final_Classification_2020__reproj_nearest_0-005_0-005_deg__20210323.tif", 
    out_coor_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", 
    resampling_type="NEAREST", cell_size="0.005 0.005", geographic_transform="", 
    Registration_Point="", 
    in_coor_system="PROJCS['WGS_1984_Goode_Homolosine',GEOGCS['GCS_unknown',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Goode_Homolosine'],PARAMETER['False_Easting',0.0],PARAMETER['False_Northing',0.0],PARAMETER['Central_Meridian',0.0],PARAMETER['Option',1.0],UNIT['Meter',1.0]]", 
    vertical="NO_VERTICAL")
    '''

    # List of output directories and output file name patterns
    output_dir_list = [
        # cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
        cn.drivers_processed_dir
        # cn.ifl_primary_processed_dir,
        # cn.annual_gain_AGC_natrl_forest_young_dir,
        # cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
        # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
        # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
        # cn.FIA_forest_group_processed_dir,
        # cn.age_cat_natrl_forest_US_dir,
        # cn.FIA_regions_processed_dir
    ]
    output_pattern_list = [
        # cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
        cn.pattern_drivers
        # cn.pattern_ifl_primary,
        # cn.pattern_annual_gain_AGC_natrl_forest_young,
        # cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
        # cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
        # cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
        # cn.pattern_FIA_forest_group_processed,
        # cn.pattern_age_cat_natrl_forest_US,
        # cn.pattern_FIA_regions_processed
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    # uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(
        os.path.join(cn.drivers_raw_dir, cn.pattern_drivers_raw),
        cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    # uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # # Thus, using wget instead.
    # cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    # process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    # with process.stdout:
    #     uu.log_subprocess_output(process.stdout)
    # uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    # cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    # uu.log_subprocess_output_full(cmd)
    #
    # uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)
    #
    # uu.print_log("Unzipping pre-2000 plantations...")
    # cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    # uu.log_subprocess_output_full(cmd)

    # Creates tree cover loss driver tiles.
    # The raw driver tile should have NoData for unassigned drivers as opposed to 0 for unassigned drivers.
    # For the 2020 driver update, I reclassified the 0 values as NoData in ArcMap. I also unprojected the global drivers
    # map to WGS84 because running the homolosine projection that Jimmy provided was giving incorrect processed results.
    source_raster = cn.pattern_drivers_raw
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 87  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = 100 GB peak; 87 = 125 GB peak
    else:
        processes = int(cn.count / 2)
    uu.print_log(
        "Creating tree cover loss driver tiles with {} processors...".format(
            processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt,
                no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # Creates young natural forest removal rate tiles
    # source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    # out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates young natural forest removal rate standard deviation tiles
    # source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    # out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates pre-2000 oil palm plantation tiles
    # if cn.count == 96:
    #     processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates climate zone tiles
    # if cn.count == 96:
    #     processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates European natural forest removal rate tiles
    # source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    # out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates European natural forest standard deviation of removal rate tiles
    # source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    # out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    # dt = 'float32'
    # if cn.count == 96:
    #     processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    # uu.print_log("Creating vrt of humid tropial primary forest...")
    # primary_vrt = 'primary_2001.vrt'
    # os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    # uu.print_log("  Humid tropical primary forest vrt created")
    #
    # # Creates primary forest tiles
    # source_raster = primary_vrt
    # out_pattern = 'primary_2001'
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 45  # 45 processors = 650 GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates a combined IFL/primary forest raster
    # # Uses very little memory since it's just file renaming
    # if cn.count == 96:
    #     processes = 60  # 60 processors = 10 GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    # # Creates forest age category tiles for US forests
    # source_raster = cn.name_age_cat_natrl_forest_US_raw
    # out_pattern = cn.pattern_age_cat_natrl_forest_US
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates forest groups for US forests
    # source_raster = cn.name_FIA_forest_group_raw
    # out_pattern = cn.pattern_FIA_forest_group_processed
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # Creates FIA regions for US forests
    # source_raster = cn.name_FIA_regions_raw
    # out_pattern = cn.pattern_FIA_regions_processed
    # dt = 'Byte'
    # if cn.count == 96:
    #     processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)
    # pool.close()
    # pool.join()
    #
    #
    for output_pattern in [
            cn.pattern_drivers
            # ,cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    ]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [
                cn.pattern_annual_gain_AGC_natrl_forest_young,
                cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
        ]:
            processes = int(cn.count / 2)
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2:  # For local tests
            processes = 1
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors using light function..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty_light,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log(
                "Checking for empty tiles of {0} pattern with {1} processors..."
                .format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(
                partial(uu.check_and_delete_if_empty,
                        output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
pool.join()

# # For single processor use
# for tile in tile_list:
#     gain_year_count_planted_forest.create_gain_year_count_loss_only(tile)
#
# for tile in tile_list:
#     gain_year_count_planted_forest.create_gain_year_count_gain_only(tile)
#
# for tile in tile_list:
#     gain_year_count_planted_forest.create_gain_year_count_no_change(tile)
#
# for tile in tile_list:
#     gain_year_count_planted_forest.create_gain_year_count_loss_and_gain(tile)
#
# for tile in tile_list:
#     gain_year_count_planted_forest.create_gain_year_count_merge(tile)

# Intermediate output tiles for checking outputs
uu.upload_final_set(cn.gain_year_count_planted_forest_non_mangrove_dir,
                    "growth_years_loss_only")
uu.upload_final_set(cn.gain_year_count_planted_forest_non_mangrove_dir,
                    "growth_years_gain_only")
uu.upload_final_set(cn.gain_year_count_planted_forest_non_mangrove_dir,
                    "growth_years_no_change")
uu.upload_final_set(cn.gain_year_count_planted_forest_non_mangrove_dir,
                    "growth_years_loss_and_gain")

# This is the final output used later in the model
uu.upload_final_set(cn.gain_year_count_planted_forest_non_mangrove_dir,
                    cn.pattern_gain_year_count_planted_forest_non_mangrove)
Example #20
0
def mp_model_extent(sensit_type, tile_id_list, run_date = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model. Which biomass tiles to use depends on sensitivity analysis
        if sensit_type == 'biomass_swap':
            tile_id_list = uu.tile_list_s3(cn.JPL_processed_dir, sensit_type)
        elif sensit_type == 'legal_Amazon_loss':
            tile_id_list = uu.tile_list_s3(cn.Brazil_forest_extent_2000_processed_dir, sensit_type)
        else:
            tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             cn.gain_dir, cn.tcd_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # Files to download for this script.
    download_dict = {
                    cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
                    cn.gain_dir: [cn.pattern_gain],
                    cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
    }

    if sensit_type == 'legal_Amazon_loss':
        download_dict[cn.Brazil_forest_extent_2000_processed_dir] = [cn.pattern_Brazil_forest_extent_2000_processed]
    else:
        download_dict[cn.tcd_dir] = [cn.pattern_tcd]

    if sensit_type == 'biomass_swap':
        download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
    else:
        download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

    # List of output directories and output file name patterns
    output_dir_list = [cn.model_extent_dir]
    output_pattern_list = [cn.pattern_model_extent]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Creates a single filename pattern to pass to the multiprocessor call
    pattern = output_pattern_list[0]

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 38
        else:
            processes = 42 # 30 processors = 480 GB peak (sporadic decreases followed by sustained increases);
            # 36 = 550 GB peak; 40 = 590 GB peak; 42 = XXX GB peak
    else:
        processes = 3
    uu.print_log('Removal model forest extent processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(model_extent.model_extent, pattern=pattern, sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     model_extent.model_extent(tile_id, pattern, sensit_type)

    output_pattern = output_pattern_list[0]
    if cn.count <= 2:  # For local tests
        processes = 1
        uu.print_log(
            "Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()
    else:
        processes = 50  # 50 processors = XXX GB peak
        uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()


    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Example #21
0
def mp_output_per_pixel(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # Pixel area tiles-- necessary for calculating values per pixel
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # Files to download for this script. Unusually, this script needs the output pattern in the dictionary as well!
    download_dict = {
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types,
            cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel
        ],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil,
            cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel
        ],
        cn.net_flux_dir: [cn.pattern_net_flux, cn.pattern_net_flux_per_pixel]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.cumul_gain_AGCO2_BGCO2_all_types_per_pixel_dir,
        cn.gross_emis_all_gases_all_drivers_biomass_soil_per_pixel_dir,
        cn.net_flux_per_pixel_dir
    ]
    output_pattern_list = [
        cn.pattern_cumul_gain_AGCO2_BGCO2_all_types_per_pixel,
        cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil_per_pixel,
        cn.pattern_net_flux_per_pixel
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through input tile sets
    for key, values in download_dict.items():

        # Sets the directory and pattern for the input being processed
        input_dir = key
        input_pattern = values[0]

        # If a full model run is specified, the correct set of tiles for the particular script is listed
        if tile_id_list == 'all':
            # List of tiles to run in the model
            tile_id_list = uu.tile_list_s3(input_dir, sensit_type)

        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        uu.print_log("Downloading tiles from", input_dir)
        uu.s3_flexible_download(input_dir, input_pattern, cn.docker_base_dir,
                                sensit_type, tile_id_list)

        # The pattern of the output files
        output_pattern = values[1]

        # 20 processors = 430 GB peak for cumul gain; 30 = 640 GB peak for cumul gain;
        # 32 = 680 GB peak for cumul gain; 33 = 710 GB peak for cumul gain, gross emis, net flux
        if cn.count == 96:
            processes = 20
        else:
            processes = 2
        uu.print_log("Creating {0} with {1} processors...".format(
            output_pattern, processes))
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(output_per_pixel.output_per_pixel,
                    input_pattern=input_pattern,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     output_per_pixel.output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type)

        metadata_list = [
            'units=Mg CO2e/pixel over model duration (2001-20{})'.format(
                cn.loss_years), 'extent=Model extent',
            'pixel_areas=Pixel areas depend on the latitude at which the pixel is found',
            'scale=If this is for net flux, negative values are net sinks and positive values are net sources'
        ]
        if cn.count == 96:
            processes = 45  # 45 processors = XXX GB peak
        else:
            processes = 9
        uu.print_log('Adding metadata tags max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(uu.add_metadata_tags,
                    output_pattern=output_pattern,
                    sensit_type=sensit_type,
                    metadata_list=metadata_list), tile_id_list)
        pool.close()
        pool.join()

        # for tile_id in tile_id_list:
        #     uu.add_metadata_tags(tile_id, output_pattern, sensit_type, metadata_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])