def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type):

    uu.print_log("Loss and gain pixel processing using standard function:",
                 tile_id)

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    # start time
    start = datetime.datetime.now()

    if os.path.exists(loss):
        uu.print_log(
            "Loss tile found for {}. Using it in loss and gain pixel gain year count."
            .format(tile_id))
        loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({}+1-A)/2)))'.format(
            cn.loss_years)
        loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(
            tile_id)
        loss_and_gain_outfilearg = '--outfile={}'.format(
            loss_and_gain_outfilename)
        cmd = [
            'gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent,
            loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0',
            '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
        ]
        uu.log_subprocess_output_full(cmd)
    else:
        uu.print_log(
            "No loss tile found for {}. Skipping loss and gain pixel gain year count."
            .format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
Beispiel #2
0
def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type,
                                                       no_upload):

    uu.print_log(
        "Gain year count for pixels without loss for legal_Amazon_loss:",
        tile_id)

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    # start time
    start = datetime.datetime.now()

    # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it
    # to a vrt so that the 0 pixels are recognized.
    # This was the case with PRODES loss in model v.1.1.2.
    loss_vrt = '{}_loss.vrt'.format(tile_id)
    os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss))

    no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years)
    no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
    no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
    cmd = [
        'gdal_calc.py', '-A', loss_vrt, '-B', model_extent, no_change_calc,
        no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co',
        'COMPRESS=LZW', '--type', 'Byte', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    os.remove(loss_vrt)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload)
def create_mangrove_soil_C(tile_id, no_upload):

    # Start time
    start = datetime.datetime.now()

    # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile.
    if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)):

        uu.print_log("Mangrove aboveground biomass tile found for", tile_id)

        uu.print_log("Getting extent of", tile_id)
        xmin, ymin, xmax, ymax = uu.coords(tile_id)

        uu.print_log("Clipping mangrove soil C from mangrove soil vrt for", tile_id)
        uu.warp_to_Hansen('mangrove_soil_C.vrt', '{0}_mangrove_full_extent.tif'.format(tile_id), xmin, ymin, xmax, ymax, 'Int16')

        mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id)
        mangrove_biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)
        outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id)
        out = '--outfile={}'.format(outname)
        calc = '--calc=A*(B>0)'
        datatype = '--type={}'.format('Int16')

        uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id)
        cmd = ['gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass,
               calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', datatype, '--quiet']
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("No mangrove aboveground biomass tile for", tile_id)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove', no_upload)
Beispiel #4
0
def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload):

    uu.print_log("Gain year count for loss only pixels:", tile_id)

    # start time
    start = datetime.datetime.now()

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    if os.path.exists(loss):
        uu.print_log(
            "Loss tile found for {}. Using it in loss only pixel gain year count."
            .format(tile_id))
        loss_calc = '--calc=(A>0)*(B==0)*(C>0)*(A-1)'
        loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id)
        loss_outfilearg = '--outfile={}'.format(loss_outfilename)
        cmd = [
            'gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent,
            loss_calc, loss_outfilearg, '--NoDataValue=0', '--overwrite',
            '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
        ]
        uu.log_subprocess_output_full(cmd)
    else:
        uu.print_log(
            "No loss tile found for {}. Skipping loss only pixel gain year count."
            .format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only', no_upload)
def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload):

    uu.print_log("Gain year count for pixels with neither loss nor gain:", tile_id)

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    # start time
    start = datetime.datetime.now()

    if os.path.exists(loss):
        uu.print_log("Loss tile found for {}. Using it in no change pixel gain year count.".format(tile_id))
        no_change_calc = '--calc=(A==0)*(B==0)*(C>0)*{}'.format(cn.loss_years)
        no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
        no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
        cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, no_change_calc,
               no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet']
        uu.log_subprocess_output_full(cmd)
    else:
        uu.print_log("No loss tile found for {}. Not using it for no change pixel gain year count.".format(tile_id))
        no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years)
        no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
        no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
        cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, no_change_calc,
               no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet']
        uu.log_subprocess_output_full(cmd)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload)
Beispiel #6
0
def output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type):

    uu.print_log("Calculating per pixel values for", tile_id)

    # Start time
    start = datetime.datetime.now()

    # Names of the input biomass and TCD tiles
    input_model_tile = '{0}_{1}.tif'.format(tile_id, input_pattern)
    area_tile = 'hanson_2013_area_{}.tif'.format(tile_id)
    output_model_tile = '{0}_{1}.tif'.format(tile_id, output_pattern)

    uu.print_log("Converting {} from Mg CO2/ha to Mg CO2/pixel...".format(
        input_model_tile))
    # Equation argument for converting emissions from per hectare to per pixel.
    # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare.
    calc = '--calc=A*B/{}'.format(cn.m2_per_ha)
    out = '--outfile={}'.format(output_model_tile)
    cmd = [
        'gdal_calc.py', '-A', input_model_tile, '-B', area_tile, calc, out,
        '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log(
        "  Per pixel values calculated for {}".format(output_model_tile))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_pattern)
Beispiel #7
0
def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type):

    # start time
    start = datetime.datetime.now()
    date = datetime.datetime.now()
    date_formatted = date.strftime("%Y_%m_%d")

    uu.print_log(sensit_aggreg_flux)
    uu.print_log(std_aggreg_flux)

    # This produces errors about dividing by 0. As far as I can tell, those are fine. It's just trying to divide NoData
    # pixels by NoData pixels, and it doesn't affect the output.
    # For model v1.2.0, this kept producing incorrect values for the biomass_swap analysis. I don't know why. I ended
    # up just using raster calculator in ArcMap to create the percent diff raster for biomass_swap. It worked
    # fine for all the other analyses, though (including legal_Amazon_loss).
    # Maybe that divide by 0 is throwing off other values now.
    perc_diff_calc = '--calc=(A-B)/absolute(B)*100'
    perc_diff_outfilename = '{0}_{1}_{2}.tif'.format(
        cn.pattern_aggreg_sensit_perc_diff, sensit_type, date_formatted)
    perc_diff_outfilearg = '--outfile={}'.format(perc_diff_outfilename)
    # cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg,
    #        '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--quiet']
    cmd = [
        'gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux,
        perc_diff_calc, perc_diff_outfilearg, '--overwrite', '--co',
        'COMPRESS=LZW', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
def mp_mangrove_processing(tile_id_list, run_date=None, no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads zipped raw mangrove files
    uu.s3_file_download(
        os.path.join(cn.mangrove_biomass_raw_dir,
                     cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std')

    # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument)
    # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that.
    cmd = ['unzip', '-o', '-j', cn.mangrove_biomass_raw_file]
    uu.log_subprocess_output_full(cmd)

    # Creates vrt for the Saatchi biomass rasters
    mangrove_vrt = 'mangrove_biomass.vrt'
    os.system('gdalbuildvrt {} *.tif'.format(mangrove_vrt))

    # Converts the mangrove AGB vrt into Hansen tiles
    source_raster = mangrove_vrt
    out_pattern = cn.pattern_mangrove_biomass_2000
    dt = 'float32'
    processes = int(cn.count / 4)
    uu.print_log('Mangrove preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt,
                no_upload=no_upload), tile_id_list)

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern, no_upload)

    # Checks if each tile has data in it. Only tiles with data are uploaded.
    upload_dir = cn.mangrove_biomass_2000_dir
    pattern = cn.pattern_mangrove_biomass_2000
    processes = int(cn.count - 5)
    uu.print_log('Mangrove check for data max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)
Beispiel #9
0
def add_metadata_tags(tile_id, pattern, sensit_type):

    # Adds metadata tags to output rasters
    uu.add_universal_metadata_tags('{0}_{1}.tif'.format(tile_id, pattern), sensit_type)

    cmd = ['gdal_edit.py', '-mo',
           'units=Mg CO2e/ha over model duration (2001-20{})'.format(cn.loss_years),
           '-mo', 'source=many data sources',
           '-mo', 'extent=Tree cover loss pixels within model extent (and tree cover loss driver, if applicable)',
           '{0}_{1}.tif'.format(tile_id, pattern)]
    uu.log_subprocess_output_full(cmd)
def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload):

    uu.print_log("Calculating gross emissions for", tile_id, "using",
                 sensit_type, "model type...")

    start = datetime.datetime.now()

    # Runs the correct c++ script given the emitted_pools (biomass+soil or soil_only) and model type selected.
    # soil_only, no_shiftin_ag, and convert_to_grassland have special gross emissions C++ scripts.
    # The other sensitivity analyses and the standard model all use the same gross emissions C++ script.
    if (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        cmd = [
            '{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst), tile_id, sensit_type, folder
        ]

    elif (emitted_pools == 'biomass_soil') & (
            sensit_type in ['convert_to_grassland', 'no_shifting_ag']):
        cmd = [
            '{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst,
                                                      sensit_type), tile_id,
            sensit_type, folder
        ]

    # This C++ script has an extra argument that names the input carbon emitted_pools and output emissions correctly
    elif (emitted_pools == 'biomass_soil') & (
            sensit_type not in ['no_shifting_ag', 'convert_to_grassland']):
        cmd = [
            '{0}/calc_gross_emissions_generic.exe'.format(
                cn.c_emis_compile_dst), tile_id, sensit_type, folder
        ]

    else:
        uu.exception_log(no_upload,
                         'Pool and/or sensitivity analysis option not valid')

    uu.log_subprocess_output_full(cmd)

    # Identifies which pattern to use for counting tile completion
    pattern = cn.pattern_gross_emis_commod_biomass_soil
    if (emitted_pools == 'biomass_soil') & (sensit_type == 'std'):
        pattern = pattern

    elif (emitted_pools == 'biomass_soil') & (sensit_type != 'std'):
        pattern = pattern + "_" + sensit_type

    elif emitted_pools == 'soil_only':
        pattern = pattern.replace('biomass_soil', 'soil_only')

    else:
        uu.exception_log(no_upload, 'Pool option not valid')

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, pattern, no_upload)
Beispiel #11
0
def stack_ba_hv(hv_tile):

    for year in range(2019,
                      2020):  # End year is not included in burn year product

        # Download hdf files from s3 into folders by h and v
        output_dir = utilities.makedir('{0}/{1}/raw/'.format(hv_tile, year))
        utilities.download_df(year, hv_tile, output_dir)

        # convert hdf to array
        hdf_files = glob.glob(output_dir + "*hdf")

        if len(hdf_files) > 0:
            array_list = []
            for hdf in hdf_files:
                uu.print_log("converting hdf to array")
                array = utilities.hdf_to_array(hdf)
                array_list.append(array)

            # stack arrays, get 1 raster for the year and tile
            stacked_year_array = utilities.stack_arrays(array_list)
            max_stacked_year_array = stacked_year_array.max(0)

            # convert stacked month arrays to 1 raster for the year
            template_hdf = hdf_files[0]

            year_folder = utilities.makedir('{0}/{1}/stacked/'.format(
                hv_tile, year))

            stacked_year_raster = utilities.array_to_raster(
                hv_tile, year, max_stacked_year_array, template_hdf,
                year_folder)

            # upload to s3
            cmd = [
                'aws', 's3', 'cp', stacked_year_raster,
                cn.burn_year_stacked_hv_tif_dir
            ]
            uu.log_subprocess_output_full(cmd)

            # remove files
            shutil.rmtree(output_dir)

        else:
            pass
Beispiel #12
0
def mp_continent_ecozone_tiles(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.pattern_WHRC_biomass_2000_non_mang_non_planted,
            cn.mangrove_biomass_2000_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped
    uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std')

    # Unzips ecozone shapefile
    cmd = ['unzip', cn.cont_eco_zip]
    uu.log_subprocess_output_full(cmd)

    # List of output directories and output file name patterns
    output_dir_list = [cn.cont_eco_raw_dir, cn.cont_eco_dir]
    output_pattern_list = [
        cn.pattern_cont_eco_raw, cn.pattern_cont_eco_processed
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # For multiprocessor use
    processes = int(cn.count / 4)
    uu.print_log('Continent-ecozone tile creation max processors=', processes)
    pool.map(continent_ecozone_tiles.create_continent_ecozone_tiles,
             tile_id_list)

    # Uploads the continent-ecozone tile to s3 before the codes are expanded to pixels in 1024x1024 windows that don't have codes.
    # These are not used for the model. They are for reference and completeness.
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #13
0
def clip_year_tiles(tile_year_list, no_upload):

    # Start time
    start = datetime.datetime.now()

    tile_id = tile_year_list[0].strip('.tif')
    year = tile_year_list[1]

    vrt_name = "global_vrt_{}_wgs84.vrt".format(year)

    # Gets coordinates of hansen tile
    uu.print_log("Getting coordinates of", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # Clips vrt to tile extent
    uu.print_log("Clipping burn year vrt to {0} for {1}".format(tile_id, year))

    clipped_raster = "ba_clipped_{0}_{1}.tif".format(year, tile_id)
    cmd = [
        'gdal_translate', '-ot', 'Byte', '-co', 'COMPRESS=LZW', '-a_nodata',
        '0'
    ]
    cmd += [vrt_name, clipped_raster, '-tr', '.00025', '.00025']
    cmd += ['-projwin', str(xmin), str(ymax), str(xmax), str(ymin)]
    uu.log_subprocess_output_full(cmd)

    # Calculates year tile values to be equal to year. ex: 17*1
    calc = '--calc={}*(A>0)'.format(int(year) - 2000)
    recoded_output = "ba_{0}_{1}.tif".format(year, tile_id)
    outfile = '--outfile={}'.format(recoded_output)

    cmd = [
        'gdal_calc.py', '-A', clipped_raster, calc, outfile, '--NoDataValue=0',
        '--co', 'COMPRESS=LZW', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    # Only copies to s3 if the tile has data.
    # No tiles for 2000 have data because the burn year is coded as 0, which is NoData.
    uu.print_log("Checking if {} contains any data...".format(tile_id))
    empty = uu.check_for_data(recoded_output)

    if empty:
        uu.print_log("  No data found. Not copying {}.".format(tile_id))

    else:
        uu.print_log(
            "  Data found in {}. Copying tile to s3...".format(tile_id))
        cmd = [
            'aws', 's3', 'cp', recoded_output,
            cn.burn_year_warped_to_Hansen_dir
        ]
        uu.log_subprocess_output_full(cmd)
        uu.print_log("    Tile copied to", cn.burn_year_warped_to_Hansen_dir)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, "ba_{}".format(year), no_upload)
Beispiel #14
0
def mp_create_soil_C(tile_id_list):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.soil_C_full_extent_2000_dir, cn.stdev_soil_C_full_extent_2000_dir
    ]
    output_pattern_list = [
        cn.pattern_soil_C_full_extent_2000, cn.pattern_stdev_soil_C_full_extent
    ]

    ### Soil carbon density

    # uu.print_log("Downloading mangrove soil C rasters")
    # uu.s3_file_download(os.path.join(cn.mangrove_soil_C_dir, cn.name_mangrove_soil_C), cn.docker_base_dir, sensit_type)
    #
    # # For downloading all tiles in the input folders.
    # input_files = [cn.mangrove_biomass_2000_dir]
    #
    # for input in input_files:
    #     uu.s3_folder_download(input, cn.docker_base_dir, sensit_type)
    #
    # # Download raw mineral soil C density tiles.
    # # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # # There are 12951 tiles and it takes about 3 hours to download them!
    # cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
    #                '--accept', '*.tif', '{}'.format(cn.mineral_soil_C_url)]
    # uu.log_subprocess_output_full(cmd)
    #
    # uu.print_log("Unzipping mangrove soil C rasters...")
    # cmd = ['unzip', '-j', cn.name_mangrove_soil_C, '-d', cn.docker_base_dir]
    # uu.log_subprocess_output_full(cmd)
    #
    # # Mangrove soil receives precedence over mineral soil
    # uu.print_log("Making mangrove soil C vrt...")
    # check_call('gdalbuildvrt mangrove_soil_C.vrt *{}*.tif'.format(cn.pattern_mangrove_soil_C_raw), shell=True)
    # uu.print_log("Done making mangrove soil C vrt")
    #
    # uu.print_log("Making mangrove soil C tiles...")
    #
    # if cn.count == 96:
    #     processes = 32   # 32 processors = 570 GB peak
    # else:
    #     processes = int(cn.count/3)
    # uu.print_log('Mangrove soil C max processors=', processes)
    # pool = multiprocessing.Pool(processes)
    # pool.map(create_soil_C.create_mangrove_soil_C, tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # # For single processor use
    # # for tile_id in tile_id_list:
    # #
    # #     create_soil_C.create_mangrove_soil_C(tile_id)
    #
    # uu.print_log('Done making mangrove soil C tiles', '\n')
    #
    # uu.print_log("Making mineral soil C vrt...")
    # check_call('gdalbuildvrt mineral_soil_C.vrt *{}*'.format(cn.pattern_mineral_soil_C_raw), shell=True)
    # uu.print_log("Done making mineral soil C vrt")
    #
    # # Creates mineral soil C density tiles
    # source_raster = 'mineral_soil_C.vrt'
    # out_pattern = 'mineral_soil'
    # dt = 'Int16'
    # if cn.count == 96:
    #     processes = 50  # 32 processors = 100 GB peak; 50 = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log("Creating mineral soil C density tiles with {} processors...".format(processes))
    # pool = multiprocessing.Pool(processes)
    # pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # # For single processor use
    # # for tile_id in tile_id_list:
    # #
    # #     create_soil_C.create_mineral_soil_C(tile_id)
    #
    # uu.print_log("Done making mineral soil C tiles", "\n")
    #
    #
    # uu.print_log("Making combined (mangrove & non-mangrove) soil C tiles...")
    #
    # if cn.count == 96:
    #     processes = 45   # 45 processors = XXX GB peak
    # else:
    #     processes = int(cn.count/2)
    # uu.print_log('Combined soil C max processors=', processes)
    # pool = multiprocessing.Pool(processes)
    # pool.map(create_soil_C.create_combined_soil_C, tile_id_list)
    # pool.close()
    # pool.join()
    #
    # # # For single processor use
    # # for tile in tile_list:
    # #
    # #     create_soil_C.create_combined_soil_C(tile_id)
    #
    # uu.print_log("Done making combined soil C tiles")
    #
    # uu.print_log("Uploading soil C density tiles")
    # uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
    #
    # # Need to delete soil c density rasters because they have the same pattern as the standard deviation rasters
    # uu.print_log("Deleting raw soil C density rasters")
    # c_stocks = glob.glob('*{}*'.format(cn.pattern_soil_C_full_extent_2000))
    # for c_stock in c_stocks:
    #     os.remove(c_stock)

    ### Soil carbon density uncertainty

    # Separate directories for the 5% CI and 95% CI
    dir_CI05 = '{0}{1}'.format(cn.docker_base_dir, 'CI05/')
    dir_CI95 = '{0}{1}'.format(cn.docker_base_dir, 'CI95/')
    vrt_CI05 = 'mineral_soil_C_CI05.vrt'
    vrt_CI95 = 'mineral_soil_C_CI95.vrt'
    soil_C_stdev_global = 'soil_C_stdev.tif'

    # # Download raw mineral soil C density 5% CI tiles
    # # First tries to download index.html.tmp from every folder, then goes back and downloads all the tifs in each folder
    # # Based on https://stackoverflow.com/questions/273743/using-wget-to-recursively-fetch-a-directory-with-arbitrary-files-in-it
    # # Like soil C density rasters, there are 12951 tifs and they take about 3 hours to download.
    # os.mkdir(dir_CI05)
    #
    # cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
    #                '--directory-prefix={}'.format(dir_CI05),
    #                '--accept', '*.tif', '{}'.format(cn.CI5_mineral_soil_C_url)]
    # uu.log_subprocess_output_full(cmd)
    #
    # uu.print_log("Making mineral soil C 5% CI vrt...")

    # check_call('gdalbuildvrt {0} {1}*{2}*'.format(vrt_CI05, dir_CI05, cn.pattern_uncert_mineral_soil_C_raw), shell=True)
    # uu.print_log("Done making mineral soil C CI05 vrt")
    #
    # # Download raw mineral soil C density 5% CI tiles
    # # Like soil C density rasters, there are 12951 tifs and they take about 3 hours to download.
    # os.mkdir(dir_CI95)
    #
    # cmd = ['wget', '--recursive', '-nH', '--cut-dirs=6', '--no-parent', '--reject', 'index.html*',
    #                '--directory-prefix={}'.format(dir_CI95),
    #                '--accept', '*.tif', '{}'.format(cn.CI95_mineral_soil_C_url)]
    # uu.log_subprocess_output_full(cmd)
    #
    # uu.print_log("Making mineral soil C 95% CI vrt...")

    # check_call('gdalbuildvrt {0} {1}*{2}*'.format(vrt_CI95, dir_CI95, cn.pattern_uncert_mineral_soil_C_raw), shell=True)
    # uu.print_log("Done making mineral soil C CI95 vrt")

    uu.print_log(
        "Creating raster of standard deviations in soil C at native SoilGrids250 resolution. This may take a while..."
    )
    # global tif with approximation of the soil C stanard deviation (based on the 5% and 95% CIs)

    # This takes about 20 minutes. It doesn't show any progress until the last moment, when it quickly counts
    # up to 100.
    calc = '--calc=(A-B)/3'
    out_filearg = '--outfile={}'.format(soil_C_stdev_global)
    cmd = [
        'gdal_calc.py', '-A', vrt_CI95, '-B', vrt_CI05, calc, out_filearg,
        '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW',
        '--type=Float32'
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("{} created.".format(soil_C_stdev_global))

    # Creates soil carbon 2000 density standard deviation tiles
    out_pattern = cn.pattern_stdev_soil_C_full_extent
    dt = 'Float32'
    source_raster = soil_C_stdev_global
    if cn.count == 96:
        processes = 56  # 32 processors = 290 GB peak; 56 = XXX GB peal
    else:
        processes = 2
    uu.print_log(
        "Creating mineral soil C stock stdev tiles with {} processors...".
        format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt), tile_id_list)
    pool.close()
    pool.join()

    output_pattern = cn.pattern_stdev_soil_C_full_extent
    processes = 50  # 50 processors = 550 GB peak
    uu.print_log(
        "Checking for empty tiles of {0} pattern with {1} processors...".
        format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_delete_if_empty, output_pattern=output_pattern),
        tile_id_list)
    pool.close()
    pool.join()

    uu.print_log("Uploading soil C density standard deviation tiles")
    uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
def mp_US_removal_rates(sensit_type, tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        tile_id_list = uu.tile_list_s3(cn.FIA_regions_processed_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed],
        cn.FIA_forest_group_processed_dir:
        [cn.pattern_FIA_forest_group_processed],
        cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGC_BGC_natrl_forest_US_dir,
        cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGC_BGC_natrl_forest_US,
        cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US
    ]

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the region-group-age AGC+BGC removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_AGC+BGC")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    ### To make the removal factor standard deviation dictionaries

    # Converts gain table from wide to long, so each region-group-age category has its own row
    stdev_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['SD_young', 'SD_middle', 'SD_old'])
    stdev_table_group_region_by_age = stdev_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    stdev_dict = {'SD_young': 1000, 'SD_middle': 2000, 'SD_old': 3000}

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    stdev_table_group_region_age = stdev_table_group_region_by_age.replace(
        {"variable": stdev_dict})
    stdev_table_group_region_age[
        'age_cat'] = stdev_table_group_region_age['variable'] * 10
    stdev_table_group_region_age['group_region_age_combined'] = stdev_table_group_region_age['age_cat'] + \
                                                               stdev_table_group_region_age['forest_group_code'] * 100 + \
                                                               stdev_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    stdev_table_group_region_age_dict = pd.Series(
        stdev_table_group_region_age.value.values,
        index=stdev_table_group_region_age.group_region_age_combined).to_dict(
        )
    uu.print_log(stdev_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    stdev_table_group_region = stdev_table_group_region_age.drop(
        stdev_table_group_region_age[
            stdev_table_group_region_age.age_cat != 10000].index)
    stdev_table_group_region['group_region_combined'] = stdev_table_group_region['forest_group_code'] * 100 + \
                                                       stdev_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    stdev_table_group_region_dict = pd.Series(
        stdev_table_group_region.value.values,
        index=stdev_table_group_region.group_region_combined).to_dict()
    uu.print_log(stdev_table_group_region_dict)

    if cn.count == 96:
        processes = 68  # 68 processors (only 16 tiles though) = 310 GB peak
    else:
        processes = 24
    uu.print_log('US natural forest AGC+BGC removal rate max processors=',
                 processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            stdev_table_group_region_age_dict=stdev_table_group_region_age_dict,
            stdev_table_group_region_dict=stdev_table_group_region_dict,
            output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id,
    #       gain_table_group_region_age_dict,
    #       gain_table_group_region_dict,
    #       stdev_table_group_region_age_dict,
    #       stdev_table_group_region_dict,
    #       output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #16
0
def mp_burn_year(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # Step 1:
    # Downloads the latest year of raw burn area hdfs to the spot machine.
    # This step requires using osgeo/gdal:ubuntu-full-X.X.X Docker image because the small image doesn't have an
    # hdf driver in gdal.
    file_name = "*.hdf"
    raw_source = '{0}/20{1}'.format(cn.burn_area_raw_ftp, cn.loss_years)
    cmd = [
        'wget', '-r', '--ftp-user=user', '--ftp-password=burnt_data',
        '--accept', file_name
    ]
    cmd += ['--no-directories', '--no-parent', raw_source]
    uu.log_subprocess_output_full(cmd)

    # Uploads the latest year of raw burn area hdfs to s3
    cmd = [
        'aws', 's3', 'cp', '.', cn.burn_year_hdf_raw_dir, '--recursive',
        '--exclude', '*', '--include', '*hdf'
    ]
    uu.log_subprocess_output_full(cmd)

    global_grid_hv = [
        "h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10",
        "h01v11", "h02v06", "h02v08", "h02v09", "h02v10", "h02v11", "h03v06",
        "h03v07", "h03v09", "h03v10", "h03v11", "h04v09", "h04v10", "h04v11",
        "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03", "h07v05",
        "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07",
        "h08v08", "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05",
        "h09v06", "h09v07", "h09v08", "h09v09", "h10v02", "h10v03", "h10v04",
        "h10v05", "h10v06", "h10v07", "h10v08", "h10v09", "h10v10", "h10v11",
        "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
        "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04",
        "h12v05", "h12v07", "h12v08", "h12v09", "h12v10", "h12v11", "h12v12",
        "h12v13", "h13v02", "h13v03", "h13v04", "h13v08", "h13v09", "h13v10",
        "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03", "h14v04",
        "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05",
        "h15v07", "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08",
        "h16v09", "h17v02", "h17v03", "h17v04", "h17v05", "h17v06", "h17v07",
        "h17v08", "h17v10", "h17v12", "h17v13", "h18v02", "h18v03", "h18v04",
        "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
        "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10",
        "h19v11", "h19v12", "h20v02", "h20v03", "h20v04", "h20v05", "h20v06",
        "h20v07", "h20v08", "h20v09", "h20v10", "h20v11", "h20v12", "h20v13",
        "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07", "h21v08",
        "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04",
        "h22v05", "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11",
        "h22v13", "h23v02", "h23v03", "h23v04", "h23v05", "h23v06", "h23v07",
        "h23v08", "h23v09", "h23v10", "h23v11", "h24v02", "h24v03", "h24v04",
        "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
        "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03",
        "h26v04", "h26v05", "h26v06", "h26v07", "h26v08", "h27v03", "h27v04",
        "h27v05", "h27v06", "h27v07", "h27v08", "h27v09", "h27v10", "h27v11",
        "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07", "h28v08",
        "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05",
        "h29v06", "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12",
        "h29v13", "h30v06", "h30v07", "h30v08", "h30v09", "h30v10", "h30v11",
        "h30v12", "h30v13", "h31v06", "h31v07", "h31v08", "h31v09", "h31v10",
        "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
        "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11",
        "h34v07", "h34v08", "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"
    ]

    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile
    uu.print_log(
        "Stacking hdf into MODIS burned area tifs by year and MODIS hv tile..."
    )

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # For single processor use
    for hv_tile in global_grid_hv:
        stack_ba_hv.stack_ba_hv(hv_tile)

    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2020 goes from 2001 to 2019)
    for year in range(2019, 2020):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = [
            'aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir,
            year_tifs_folder
        ]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = [
            'gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr',
            '.00025', '.00025', '-overwrite', vrt_name, vrt_wgs84
        ]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count - 5)
        pool.map(clip_year_tiles.clip_year_tiles, tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year)

        uu.print_log(
            "Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log(
        "Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned areaa), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(hansen_burnyear_final.hansen_burnyear, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id)

    # Uploads output tiles to s3
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Beispiel #17
0
def mp_annual_gain_rate_IPCC_defaults(sensit_type,
                                      tile_id_list,
                                      run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script.
    download_dict = {
        cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_IPCC_defaults_dir,
        cn.annual_gain_BGB_IPCC_defaults_dir,
        cn.stdev_annual_gain_AGB_IPCC_defaults_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_IPCC_defaults,
        cn.pattern_annual_gain_BGB_IPCC_defaults,
        cn.pattern_stdev_annual_gain_AGB_IPCC_defaults
    ]

    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type,
                                                output_pattern_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

    # Table with IPCC Table 4.9 default gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="natrl fores gain, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default rates for tile creation")

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                      id_vars=['gainEcoCon'],
                                      value_vars=[
                                          'growth_primary',
                                          'growth_secondary_greater_20',
                                          'growth_secondary_less_20'
                                      ])
    gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    gain_table_con_eco_only = gain_table_cont_eco_age
    gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    gain_table_con_eco_only['value'] = 0
    gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    rate_age_dict = {
        'growth_secondary_less_20': 10000,
        'growth_secondary_greater_20': 20000,
        'growth_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
        {"variable": rate_age_dict})
    gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
        'gainEcoCon'] + gain_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    gain_table_all_combos = pd.concat(
        [gain_table_con_eco_only, gain_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    gain_table_dict = pd.Series(
        gain_table_all_combos.value.values,
        index=gain_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    gain_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in rate_age_dict.items():

        gain_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    gain_table_dict = {
        float(key): value
        for key, value in gain_table_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Special removal rate table for no_primary_gain sensitivity analysis: primary forests and IFLs have removal rate of 0
    if sensit_type == 'no_primary_gain':
        # Imports the table with the ecozone-continent codes and the carbon gain rates
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, no_prim_gain")
        uu.print_log(
            "Using no_primary_gain IPCC default standard deviations for tile creation"
        )

    # All other analyses use the standard removal rates
    else:
        # Imports the table with the ecozone-continent codes and the biomass gain rate standard deviations
        stdev_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores stdv, for std model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
    stdev_table_cont_eco_age = pd.melt(stdev_table_simplified,
                                       id_vars=['gainEcoCon'],
                                       value_vars=[
                                           'stdev_primary',
                                           'stdev_secondary_greater_20',
                                           'stdev_secondary_less_20'
                                       ])
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.dropna()

    # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
    # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
    # Assigns removal rate of 0 when there's no age category.
    stdev_table_con_eco_only = stdev_table_cont_eco_age
    stdev_table_con_eco_only = stdev_table_con_eco_only.drop_duplicates(
        subset='gainEcoCon', keep='first')
    stdev_table_con_eco_only['value'] = 0
    stdev_table_con_eco_only['cont_eco_age'] = stdev_table_con_eco_only[
        'gainEcoCon']

    # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
    stdev_age_dict = {
        'stdev_secondary_less_20': 10000,
        'stdev_secondary_greater_20': 20000,
        'stdev_primary': 30000
    }

    # Creates a unique value for each continent-ecozone-age category
    stdev_table_cont_eco_age = stdev_table_cont_eco_age.replace(
        {"variable": stdev_age_dict})
    stdev_table_cont_eco_age['cont_eco_age'] = stdev_table_cont_eco_age[
        'gainEcoCon'] + stdev_table_cont_eco_age['variable']

    # Merges the table of just continent-ecozone codes and the table of  continent-ecozone-age codes
    stdev_table_all_combos = pd.concat(
        [stdev_table_con_eco_only, stdev_table_cont_eco_age])

    # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
    stdev_table_dict = pd.Series(
        stdev_table_all_combos.value.values,
        index=stdev_table_all_combos.cont_eco_age).to_dict()

    # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
    stdev_table_dict[0] = 0

    # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
    for key, value in stdev_age_dict.items():

        stdev_table_dict[value] = 0

    # Converts all the keys (continent-ecozone-age codes) to float type
    stdev_table_dict = {
        float(key): value
        for key, value in stdev_table_dict.items()
    }

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    if cn.count == 96:
        if sensit_type == 'biomass_swap':
            processes = 24  # 24 processors = 590 GB peak
        else:
            processes = 30  # 30 processors = 725 GB peak
    else:
        processes = 2
    uu.print_log('Annual gain rate natural forest max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_IPCC_defaults.annual_gain_rate,
                sensit_type=sensit_type,
                gain_table_dict=gain_table_dict,
                stdev_table_dict=stdev_table_dict,
                output_pattern_list=output_pattern_list), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     annual_gain_rate_IPCC_defaults.annual_gain_rate(tile_id, sensit_type,
    #       gain_table_dict, stdev_table_dict, output_pattern_list)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )
Beispiel #19
0
def create_tile_statistics(tile, sensit_type, tile_stats_txt):

    # Extracts the tile id from the full tile name
    tile_id = uu.get_tile_id(tile)

    uu.print_log("Calculating tile statistics for {0}, tile id {1}...".format(
        tile, tile_id))

    # start time
    start = datetime.datetime.now()

    # Source: http://gis.stackexchange.com/questions/90726
    # Opens raster we're getting statistics on
    focus_tile = gdal.Open(tile)

    nodata = uu.get_raster_nodata_value(tile)
    uu.print_log("NoData value =", nodata)

    # Turns the raster into a numpy array
    tile_array = np.array(focus_tile.GetRasterBand(1).ReadAsArray())

    # Flattens the numpy array to a single dimension
    tile_array_flat = tile_array.flatten()

    # Removes NoData values from the array. NoData are generally either 0 or -9999.
    tile_array_flat_mask = tile_array_flat[tile_array_flat != nodata]

    ### For converting value/hectare to value/pixel
    # Tile with the area of each pixel in m2
    area_tile = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id)

    # Output file name
    tile_short = tile[:-4]
    outname = '{0}_value_per_pixel.tif'.format(tile_short)

    # Equation argument for converting emissions from per hectare to per pixel.
    # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare.
    calc = '--calc=A*B/{}'.format(cn.m2_per_ha)

    # Argument for outputting file
    out = '--outfile={}'.format(outname)

    uu.print_log("Converting {} from /ha to /pixel...".format(tile))
    cmd = [
        'gdal_calc.py', '-A', tile, '-B', area_tile, calc, out,
        '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("{} converted to /pixel".format(tile))

    uu.print_log(
        "Converting value/pixel tile {} to numpy array...".format(tile))
    # Opens raster with value per pixel
    value_per_pixel = gdal.Open(outname)

    # Turns the pixel area raster into a numpy array
    value_per_pixel_array = np.array(
        value_per_pixel.GetRasterBand(1).ReadAsArray())

    # Flattens the pixel area numpy array to a single dimension
    value_per_pixel_array_flat = value_per_pixel_array.flatten()

    uu.print_log("Converted {} to numpy array".format(tile))

    # Empty statistics list
    stats = [None] * 13

    # Calculates basic tile info
    stats[0] = tile_id
    stats[1] = tile[9:-4]
    stats[2] = tile
    stats[3] = tile_array_flat_mask.size

    # If there are no pixels with values in the tile (as determined by the length of the array when NoData values are removed),
    # the statistics are all N/A.
    if stats[3] == 0:

        stats[4] = "N/A"
        stats[5] = "N/A"
        stats[6] = "N/A"
        stats[7] = "N/A"
        stats[8] = "N/A"
        stats[9] = "N/A"
        stats[10] = "N/A"
        stats[11] = "N/A"
        stats[12] = "N/A"

    # If there are pixels with values in the tile, the following statistics are calculated
    else:

        stats[4] = np.mean(tile_array_flat_mask, dtype=np.float64)
        stats[5] = np.median(tile_array_flat_mask)
        stats[6] = np.percentile(tile_array_flat_mask, 10)
        stats[7] = np.percentile(tile_array_flat_mask, 25)
        stats[8] = np.percentile(tile_array_flat_mask, 75)
        stats[9] = np.percentile(tile_array_flat_mask, 90)
        stats[10] = np.amin(tile_array_flat_mask)
        stats[11] = np.amax(tile_array_flat_mask)
        stats[12] = np.sum(value_per_pixel_array_flat)

    stats_no_brackets = ', '.join(map(str, stats))

    uu.print_log(stats_no_brackets)

    # Adds the tile's statistics to the txt file
    with open(tile_stats_txt, 'a+') as f:
        f.write(stats_no_brackets + '\r\n')
    f.close()

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'value_per_pixel.tif')
def main():

    no_upload = False

    sensit_type = "legal_Amazon_loss"

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    Brazil_stages = ['all', 'create_forest_extent', 'create_loss']

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description=
        'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES'
    )
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help=
        'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}'
        .format(Brazil_stages))
    parser.add_argument(
        '--run_through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    args = parser.parse_args()
    stage_input = args.stages
    run_through = args.run_through

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (stage_input not in Brazil_stages):
        uu.exception_log(
            no_upload, 'Invalid stage selection. Please provide a stage from',
            Brazil_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            no_upload,
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through,
                                       sensit_type)
    uu.print_log(actual_stages)

    # By definition, this script is for US-specific removals
    sensit_type = 'legal_Amazon_loss'

    # List of output directories and output file name patterns
    master_output_dir_list = [
        cn.Brazil_forest_extent_2000_processed_dir,
        cn.Brazil_annual_loss_processed_dir
    ]

    master_output_pattern_list = [
        cn.pattern_Brazil_forest_extent_2000_processed,
        cn.pattern_Brazil_annual_loss_processed
    ]

    # Creates forest extent 2000 raster from multiple PRODES forest extent rasters
    ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works.
    if 'create_forest_extent' in actual_stages:

        uu.print_log('Creating forest extent tiles')

        # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
        tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
        # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
        # tile_id_list = ['50N_130W'] # test tiles
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir,
                              cn.docker_base_dir, sensit_type)
        raw_forest_extent_inputs = glob.glob(
            '*_AMZ_warped_*tif')  # The list of tiles to merge

        # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]
        uu.print_log(pixelSizeX)
        uu.print_log(pixelSizeY)

        # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out
        # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases.
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged),
            '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte',
            '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            raw_forest_extent_inputs[0], raw_forest_extent_inputs[1],
            raw_forest_extent_inputs[2], raw_forest_extent_inputs[3],
            raw_forest_extent_inputs[4], raw_forest_extent_inputs[5]
        ]
        uu.log_subprocess_output_full(cmd)

        # Uploads the merged forest extent raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir,
                            cn.pattern_Brazil_forest_extent_2000_merged)

        # Creates legal Amazon extent 2000 tiles
        source_raster = '{}.tif'.format(
            cn.pattern_Brazil_forest_extent_2000_merged)
        out_pattern = cn.pattern_Brazil_forest_extent_2000_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        upload_dir = master_output_dir_list[0]
        pattern = master_output_pattern_list[0]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates annual loss raster for 2001-2019 from multiples PRODES rasters
    if 'create_loss' in actual_stages:

        uu.print_log('Creating annual PRODES loss tiles')

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        cmd = [
            'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.',
            '--recursive'
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log(
            "Input loss rasters downloaded. Getting resolution of recent raster..."
        )

        # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]

        uu.print_log("  Recent raster resolution: {0} by {1}".format(
            pixelSizeX, pixelSizeY))

        # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out
        # at about 180 GB, then go back to 0.
        # This took about 8 minutes.
        uu.print_log(
            "Merging input loss rasters into a composite for all years...")
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co',
            'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps',
            '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            'Prodes2019_annual_loss_2008_2019.tif',
            'Prodes2014_annual_loss_2001_2007.tif'
        ]
        uu.log_subprocess_output_full(cmd)
        uu.print_log("  Loss rasters combined into composite")

        # Uploads the merged loss raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_annual_loss_merged_dir,
                            cn.pattern_Brazil_annual_loss_merged)

        # Creates annual loss 2001-2015 tiles
        uu.print_log("Warping composite PRODES loss to Hansen tiles...")
        source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged)
        out_pattern = cn.pattern_Brazil_annual_loss_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)
        uu.print_log("  PRODES composite loss raster warped to Hansen tiles")

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough.
        upload_dir = master_output_dir_list[1]
        pattern = master_output_pattern_list[1]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates forest age category tiles
    if 'forest_age_category' in actual_stages:

        uu.print_log('Creating forest age category tiles')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[2]

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # With processes=30, peak usage was about 350 GB using WHRC AGB.
        # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that.
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_forest_age_category,
                    sensit_type=sensit_type,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #
        #     legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern)

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

    # Creates tiles of the number of years of removals
    if 'gain_year_count' in actual_stages:

        uu.print_log('Creating gain year count tiles for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[3]

        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(legal_AMZ_loss.
                    legal_Amazon_create_gain_year_count_loss_and_gain_standard,
                    sensit_type=sensit_type), tile_id_list)

        pool = multiprocessing.Pool(
            int(cn.count / 8)
        )  # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory.
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge,
                    output_pattern=output_pattern), tile_id_list)

        # # For single processor use
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern)

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change")
        uu.upload_final_set(stage_output_dir_list[3],
                            "growth_years_loss_and_gain")

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

    # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'annual_removals' in actual_stages:

        uu.print_log('Creating annual removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (annual removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[4:6])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[4:6])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Table with IPCC Table 4.9 default gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
        gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                          id_vars=['gainEcoCon'],
                                          value_vars=[
                                              'growth_primary',
                                              'growth_secondary_greater_20',
                                              'growth_secondary_less_20'
                                          ])
        gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

        # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
        # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
        # Assigns removal rate of 0 when there's no age category.
        gain_table_con_eco_only = gain_table_cont_eco_age
        gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
            subset='gainEcoCon', keep='first')
        gain_table_con_eco_only['value'] = 0
        gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
            'gainEcoCon']

        # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
        age_dict = {
            'growth_primary': 10000,
            'growth_secondary_greater_20': 20000,
            'growth_secondary_less_20': 30000
        }

        # Creates a unique value for each continent-ecozone-age category
        gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
            {"variable": age_dict})
        gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
            'gainEcoCon'] + gain_table_cont_eco_age['variable']

        # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes
        gain_table_all_combos = pd.concat(
            [gain_table_con_eco_only, gain_table_cont_eco_age])

        # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
        gain_table_dict = pd.Series(
            gain_table_all_combos.value.values,
            index=gain_table_all_combos.cont_eco_age).to_dict()

        # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
        gain_table_dict[0] = 0

        # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
        for key, value in age_dict.items():
            gain_table_dict[value] = 0

        # Converts all the keys (continent-ecozone-age codes) to float type
        gain_table_dict = {
            float(key): value
            for key, value in gain_table_dict.items()
        }

        uu.print_log(gain_table_dict)

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(annual_gain_rate_natrl_forest.annual_gain_rate,
                    sensit_type=sensit_type,
                    gain_table_dict=gain_table_dict,
                    output_pattern_list=output_pattern_list), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_id_list:
        #
        #     annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'cumulative_removals' in actual_stages:

        uu.print_log('Creating cumulative removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.gain_year_count_natrl_forest_dir:
            [cn.pattern_gain_year_count_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[6:8])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[6:8])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Calculates cumulative aboveground carbon gain in non-mangrove planted forests
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)

        # Calculates cumulative belowground carbon gain in non-mangrove planted forests
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type)
        #
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground)
    if 'removals_merged' in actual_stages:

        uu.print_log(
            'Creating annual and cumulative removals for all forest types combined (above + belowground)'
        )

        # Files to download for this script
        download_dict = {
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_mangrove_dir:
            [cn.pattern_annual_gain_BGB_mangrove],
            cn.annual_gain_BGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.cumul_gain_BGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_mangrove],
            cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_BGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_BGCO2_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[8:10])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[8:10])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # For multiprocessing
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(merge_cumulative_annual_gain_all_forest_types.gain_merge,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type)

        # Uploads output tiles to s3
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log('Creating emissions year carbon emitted_pools')

        # Specifies that carbon emitted_pools are created for loss year rather than in 2000
        extent = 'loss'

        # Files to download for this script
        download_dict = {
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir:
            [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults]
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [
                cn.pattern_JPL_unmasked_processed
            ]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [
                cn.pattern_WHRC_biomass_2000_unmasked
            ]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [
                cn.pattern_Brazil_annual_loss_processed
            ]
        else:
            download_dict[cn.loss_dir] = ['']

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(
                sensit_type, master_output_dir_list[10:16])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[10:16])

        # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="mangrove gain, for model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.below_to_above_trop_dry_mang,
            cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang)

        mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.deadwood_to_above_trop_dry_mang,
            cn.deadwood_to_above_trop_wet_mang,
            cn.deadwood_to_above_subtrop_mang)

        mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.litter_to_above_trop_dry_mang,
            cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang)

        if extent == 'loss':

            uu.print_log(
                "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)"
            )
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 410-415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(int(cn.count / 4))
            pool.map(
                partial(create_carbon_pools.create_emitted_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        elif extent == '2000':

            uu.print_log("Creating tiles of aboveground carbon in 2000")
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(processes=14)
            pool.map(
                partial(create_carbon_pools.create_2000_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of belowground carbon")
        # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[1]
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(create_carbon_pools.create_BGC,
                    mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type)

        uu.upload_final_set(stage_output_dir_list[1],
                            stage_output_pattern_list[1])

        uu.print_log("Creating tiles of deadwood carbon")
        # processes=16 maxes out at about 430 GB
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[2]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_deadwood,
                    mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type)

        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

        uu.print_log("Creating tiles of litter carbon")
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[3]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_litter,
                    mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type)

        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

        if extent == 'loss':

            uu.print_log("Creating tiles of soil carbon")
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[4]
            pool = multiprocessing.Pool(int(cn.count / 3))
            pool.map(
                partial(create_carbon_pools.create_soil,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type)

            uu.upload_final_set(stage_output_dir_list[4],
                                stage_output_pattern_list[4])

        elif extent == '2000':
            uu.print_log("Skipping soil for 2000 carbon pool calculation")

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of total carbon")
        # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory
        # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check.
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[5]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_total_C,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type)

        uu.upload_final_set(stage_output_dir_list[5],
                            stage_output_pattern_list[5])
Beispiel #21
0
def rewindow(tile):

    # start time
    start = datetime.datetime.now()

    uu.print_log(
        "Rewindowing {} to 200x200 pixel windows (0.04 degree x 0.04 degree)..."
        .format(tile))

    # Extracts the tile id, tile type, and bounding box for the tile
    tile_id = uu.get_tile_id(tile)
    tile_type = uu.get_tile_type(tile)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # Raster name for 400x400 pixel tiles (intermediate output)
    input_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type)
    area_tile = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id)
    pixel_area_rewindow = '{0}_{1}_rewindow.tif'.format(
        cn.pattern_pixel_area, tile_id)
    tcd_tile = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id)
    tcd_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_tcd, tile_id)
    gain_tile = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id)
    gain_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_gain, tile_id)
    mangrove_tile = '{0}_{1}.tif'.format(tile_id,
                                         cn.pattern_mangrove_biomass_2000)
    mangrove_tile_rewindow = '{0}_{1}_rewindow.tif'.format(
        tile_id, cn.pattern_mangrove_biomass_2000)

    # Only rewindows the necessary files if they haven't already been processed (just in case
    # this was run on the spot machine before)

    if not os.path.exists(input_rewindow):
        uu.print_log(
            "Model output for {} not rewindowed. Rewindowing...".format(
                tile_id))

        # Converts the tile of interest to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', tile, input_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    if not os.path.exists(tcd_rewindow):
        uu.print_log(
            "Canopy cover for {} not rewindowed. Rewindowing...".format(
                tile_id))

        # Converts the tcd tile to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', tcd_tile, tcd_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("Canopy cover for {} already rewindowed.".format(tile_id))

    if not os.path.exists(pixel_area_rewindow):
        uu.print_log(
            "Pixel area for {} not rewindowed. Rewindowing...".format(tile_id))

        # Converts the pixel area tile to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', area_tile, pixel_area_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("Pixel area for {} already rewindowed.".format(tile_id))

    if not os.path.exists(gain_rewindow):
        uu.print_log(
            "Hansen gain for {} not rewindowed. Rewindowing...".format(
                tile_id))

        # Converts the pixel area tile to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', gain_tile, gain_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("Hansen gain for {} already rewindowed.".format(tile_id))

    if os.path.exists(mangrove_tile):
        uu.print_log(
            "Mangrove for {} not rewindowed. Rewindowing...".format(tile_id))

        if not os.path.exists(mangrove_tile_rewindow):

            # Converts the pixel area tile to the 400x400 pixel windows
            cmd = [
                'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata',
                '0', '-te',
                str(xmin),
                str(ymin),
                str(xmax),
                str(ymax), '-tap', '-tr',
                str(cn.Hansen_res),
                str(cn.Hansen_res), '-co', 'TILED=YES', '-co',
                'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', mangrove_tile,
                mangrove_tile_rewindow
            ]
            uu.log_subprocess_output_full(cmd)

        else:

            uu.print_log(
                "Mangrove tile for {} already rewindowed.".format(tile_id))
    else:

        uu.print_log("No mangrove tile found for {}".format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type))
def mp_tile_statistics(sensit_type, tile_id_list):

    os.chdir(cn.docker_base_dir)

    # The column names for the tile summary statistics.
    # If the statistics calculations are changed in tile_statistics.py, the list here needs to be changed, too.
    headers = [
        'tile_id', 'tile_type', 'tile_name', 'pixel_count', 'mean', 'median',
        'percentile10', 'percentile25', 'percentile75', 'percentile90', 'min',
        'max', 'sum'
    ]
    header_no_brackets = ', '.join(headers)

    tile_stats_txt = '{0}_v{1}_{2}_{3}.csv'.format(cn.tile_stats_pattern,
                                                   cn.version, sensit_type,
                                                   uu.date_time_today)

    # Creates the output text file with the column names
    with open(tile_stats_txt, 'w+') as f:
        f.write(header_no_brackets + '\r\n')
    f.close()

    uu.print_log(tile_id_list)

    # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # For downloading all tiles in selected folders
    download_dict = {
        # cn.WHRC_biomass_2000_unmasked_dir: [cn.pattern_WHRC_biomass_2000_unmasked],
        # cn.JPL_processed_dir: [cn.pattern_JPL_unmasked_processed],
        # cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
        # cn.cont_eco_dir: [cn.pattern_cont_eco_processed],

        # cn.model_extent_dir: [cn.pattern_model_extent], # 15 = 370 GB peak
        #
        # # Mangrove removals
        # cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], # 15 = 640 GB peak
        # cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], # 15 = 640 GB peak
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],  # 15 = 640 GB peak
        #
        # # European forest removals
        # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe], # 15 = 630 GB peak
        # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe], # 15 = 630 GB peak
        #
        # # Planted forest removals
        # cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked], # 15 = 600 GB peak
        # cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], # 15 = 360 GB peak
        # cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked], # 15 = 600 GB peak
        #
        # # US forest removals
        # cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed], # 15 = 350 GB peak
        # cn.FIA_forest_group_processed_dir: [cn.pattern_FIA_forest_group_processed], # 15 = 340 GB peak
        # cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US], # 15 = 350 GB peak
        # cn.annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US], # 15 = 620 GB peak
        # # cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        #
        # # Young natural forest removals
        # cn.annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_annual_gain_AGC_natrl_forest_young], # 15 = 710 GB peak
        # cn.stdev_annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young], # 15 = 700 GB peak
        #
        # # IPCC defaults forest removals
        # cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], # 15 = 330 GB peak
        # cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], # 15 = 620 GB peak
        # cn.annual_gain_BGB_IPCC_defaults_dir: [cn.pattern_annual_gain_BGB_IPCC_defaults], # 15 = 620 GB peak
        # cn.stdev_annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults], # 15 = 620 GB peak
        #
        # # Annual removals from all forest types
        # cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], # 15 = XXX GB peak
        # cn.annual_gain_BGC_all_types_dir: [cn.pattern_annual_gain_BGC_all_types], # 15 > 550 GB peak
        # cn.annual_gain_AGC_BGC_all_types_dir: [cn.pattern_annual_gain_AGC_BGC_all_types], # 15 = XXX GB peak
        # cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], # 15 = XXX GB peak
        cn.stdev_annual_gain_AGC_all_types_dir:
        [cn.pattern_stdev_annual_gain_AGC_all_types],  # 15 = XXX GB peak

        # # Gain year count
        # cn.gain_year_count_dir: [cn.pattern_gain_year_count], # 15 = XXX GB peak

        # # Gross removals from all forest types
        # cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types], # 15 = 630 GB peak
        # cn.cumul_gain_BGCO2_all_types_dir: [cn.pattern_cumul_gain_BGCO2_all_types], # 15 = XXX GB peak
        # cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], # 15 = XXX GB peak

        # # Carbon pool inputs
        # cn.elevation_processed_dir: [cn.pattern_elevation],
        # cn.precip_processed_dir: [cn.pattern_precip],
        # cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        # cn.drivers_processed_dir: [cn.pattern_drivers],
        # cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        # cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], # 15 = 430 GB peak
        cn.stdev_soil_C_full_extent_2000_dir:
        [cn.pattern_stdev_soil_C_full_extent],

        # Carbon pools in emissions year
        # cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], # 14 = 590 GB peak
        # cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], # 14 = > 520 GB peak
        # cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], # 14 > 560 GB peak (error memory when using 15, so switched to 14)
        # cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000], # 14 = XXX GB peak
        # cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000], # 14 = XXX GB peak
        # cn.total_C_emis_year_dir: [cn.pattern_total_C_emis_year], # 14 = XXX GB peak

        # # Carbon pools in 2000
        # cn.AGC_2000_dir: [cn.pattern_AGC_2000],
        # cn.BGC_2000_dir: [cn.pattern_BGC_2000],
        # cn.deadwood_2000_dir: [cn.pattern_deadwood_2000],
        # cn.litter_2000_dir: [cn.pattern_litter_2000],
        # cn.total_C_2000_dir: [cn.pattern_total_C_2000],

        # # Net flux
        # cn.net_flux_dir: [cn.pattern_net_flux],  # 14 = XXX GB peak
        #
        # # Gross emissions from biomass and soil
        # cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_co2_only_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_non_co2_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_commod_biomass_soil_dir: [cn.pattern_gross_emis_commod_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_shifting_ag_biomass_soil_dir: [cn.pattern_gross_emis_shifting_ag_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_forestry_biomass_soil_dir: [cn.pattern_gross_emis_forestry_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_wildfire_biomass_soil_dir: [cn.pattern_gross_emis_wildfire_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_urban_biomass_soil_dir: [cn.pattern_gross_emis_urban_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_no_driver_biomass_soil_dir: [cn.pattern_gross_emis_no_driver_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_nodes_biomass_soil_dir: [cn.pattern_gross_emis_nodes_biomass_soil], # 14 = XXX GB peak

        # Gross emissions from soil only
        cn.gross_emis_all_gases_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_soil_only],
        cn.gross_emis_co2_only_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_co2_only_all_drivers_soil_only],
        cn.gross_emis_non_co2_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_non_co2_all_drivers_soil_only],
        cn.gross_emis_commod_soil_only_dir:
        [cn.pattern_gross_emis_commod_soil_only],
        cn.gross_emis_shifting_ag_soil_only_dir:
        [cn.pattern_gross_emis_shifting_ag_soil_only]
        # cn.gross_emis_forestry_soil_only_dir: [cn.pattern_gross_emis_forestry_soil_only],
        # cn.gross_emis_wildfire_soil_only_dir: [cn.pattern_gross_emis_wildfire_soil_only],
        # cn.gross_emis_urban_soil_only_dir: [cn.pattern_gross_emis_urban_soil_only],
        # cn.gross_emis_no_driver_soil_only_dir: [cn.pattern_gross_emis_no_driver_soil_only],
        # cn.gross_emis_nodes_soil_only_dir: [cn.pattern_gross_emis_nodes_soil_only]
    }

    # Iterates through each set of tiles and gets statistics of it
    for key, values in download_dict.items():

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

        # List of all the tiles on the spot machine to be summarized (excludes pixel area tiles and tiles created by gdal_calc
        # (in case this script was already run on this spot machine and created output from gdal_calc)
        tile_list = uu.tile_list_spot_machine(".", ".tif")
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [
            i for i in tile_list
            if not ('hanson_2013' in i or 'value_per_pixel' in i)
        ]
        uu.print_log(tile_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_list))) + "\n")

        # For multiprocessor use.
        processes = 14
        uu.print_log('Tile statistics max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(tile_statistics.create_tile_statistics,
                    sensit_type=sensit_type,
                    tile_stats_txt=tile_stats_txt), tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #     tile_statistics.create_tile_statistics(tile, sensit_type)

        # Copies the text file to the tile statistics folder on s3
        cmd = ['aws', 's3', 'cp', tile_stats_txt, cn.tile_stats_dir]
        uu.log_subprocess_output_full(cmd)

        # Spot machine can't store all the tiles, so this cleans it up
        uu.print_log("Deleting tiles...")
        for tile in tile_list:
            os.remove(tile)
            tile_short = tile[:-4]
            outname = '{0}_value_per_pixel.tif'.format(tile_short)
            os.remove(outname)
            uu.print_log("  {} deleted".format(tile))

    uu.print_log("Script complete. All tiles analyzed!")
def mp_create_inputs_for_C_pools(tile_id_list, run_date=None, no_upload=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir,
        cn.precip_processed_dir
    ]
    output_pattern_list = [
        cn.pattern_bor_tem_trop_processed, cn.pattern_elevation,
        cn.pattern_precip
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads two of the raw input files for creating carbon emitted_pools
    input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

    for input in input_files:
        uu.s3_file_download('{}'.format(input), cn.docker_base_dir,
                            sensit_type)

    uu.print_log(
        "Unzipping boreal/temperate/tropical file (from FAO ecozones)")
    cmd = [
        'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d',
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Copying elevation (srtm) files")
    uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type)

    uu.print_log("Making elevation (srtm) vrt")
    check_call(
        'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True
    )  # I don't know how to convert this to output to the pipe, so just leaving as is

    # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it.
    processes = int(cn.count / 2)
    uu.print_log('Inputs for C emitted_pools max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_inputs_for_C_pools.create_input_files,
                no_upload=no_upload), tile_id_list)

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_inputs_for_C_pools.create_input_files(tile_id, no_upload)

    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.print_log("Uploading output files")
        for i in range(0, len(output_dir_list)):
            uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def hansen_burnyear(tile_id, no_upload):

    # Start time
    start = datetime.datetime.now()

    uu.print_log("Processing", tile_id)

    # The tiles that are used. out_tile_no_tag is the output before metadata tags are added. out_tile is the output
    # once metadata tags have been added.
    out_tile_no_tag = '{0}_{1}_no_tag.tif'.format(tile_id,
                                                  cn.pattern_burn_year)
    out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_burn_year)
    loss = '{0}.tif'.format(tile_id)

    # Does not continue processing tile if no loss (because there will not be any output)
    if not os.path.exists(loss):
        uu.print_log("No loss tile for", tile_id)
        return
    else:
        uu.print_log("Loss tile exists for", tile_id)

    # Downloads the burned area tiles for each year
    include = 'ba_*_{}.tif'.format(tile_id)
    burn_tiles_dir = 'burn_tiles'
    if not os.path.exists(burn_tiles_dir):
        os.mkdir(burn_tiles_dir)
    cmd = [
        'aws', 's3', 'cp', cn.burn_year_warped_to_Hansen_dir, burn_tiles_dir,
        '--recursive', '--exclude', "*", '--include', include
    ]
    uu.log_subprocess_output_full(cmd)

    # For each year tile, converts to array and stacks them
    array_list = []
    ba_tifs = glob.glob(burn_tiles_dir + '/*{}*'.format(tile_id))

    # Skips the tile if it has no burned area data for any year
    uu.print_log("There are {0} tiles to stack for {1}".format(
        len(ba_tifs), tile_id))
    if len(ba_tifs) == 0:
        uu.print_log(
            "Skipping {} because there are no tiles to stack".format(tile_id))
        return

    # NOTE: All of this could pretty easily be done in rasterio. However, Sam's use of GDAL for this still works fine,
    # so I've left it using GDAL.

    for ba_tif in ba_tifs:
        uu.print_log("Creating array with {}".format(ba_tif))
        array = utilities.raster_to_array(ba_tif)
        array_list.append(array)

    # Stacks arrays from each year
    uu.print_log("Stacking arrays for", tile_id)
    stacked_year_array = utilities.stack_arrays(array_list)

    # Converts Hansen tile to array
    uu.print_log("Creating loss year array for", tile_id)
    loss_array = utilities.raster_to_array(loss)

    # Determines what year to assign burned area
    lossarray_min1 = np.subtract(loss_array, 1)

    stack_con = (stacked_year_array >= lossarray_min1) & (stacked_year_array <=
                                                          loss_array)
    stack_con2 = stack_con * stacked_year_array
    lossyear_burn_array = stack_con2.max(0)

    utilities.array_to_raster_simple(lossyear_burn_array, out_tile_no_tag,
                                     loss)

    # Only copies to s3 if the tile has data
    uu.print_log("Checking if {} contains any data...".format(tile_id))
    empty = uu.check_for_data(out_tile_no_tag)

    # Checks output for data. There could be burned area but none of it coincides with tree cover loss,
    # so this is the final check for whether there is any data.
    if empty:
        uu.print_log("  No data found. Not copying {}.".format(tile_id))

        # Without this, the untagged version is counted and eventually copied to s3 if it has data in it
        os.remove(out_tile_no_tag)

        return

    else:
        uu.print_log(
            "  Data found in {}. Adding metadata tags...".format(tile_id))

        ### Thomas suggested these on 8/19/2020 but they didn't work. The first one wrote the tags but erased all the
        ### data in the tiles (everything became 0 according to gdalinfo). The second one had some other error.
        # with rasterio.open(out_tile_no_tag, 'r') as src:
        #
        #     profile = src.profile
        #
        # with rasterio.open(out_tile_no_tag, 'w', **profile) as dst:
        #
        #     dst.update_tags(units='year (2001, 2002, 2003...)',
        #                     source='MODIS collection 6 burned area',
        #                     extent='global')
        #
        # with rasterio.open(out_tile_no_tag, 'w+') as src:
        #
        #     dst.update_tags(units='year (2001, 2002, 2003...)',
        #                     source='MODIS collection 6 burned area',
        #                     extent='global')

        # All of the below is to add metadata tags to the output burn year masks.
        # For some reason, just doing what's at https://rasterio.readthedocs.io/en/latest/topics/tags.html
        # results in the data getting removed.
        # I found it necessary to copy the desired output and read its windows into a new copy of the file, to which the
        # metadata tags are added. I'm sure there's an easier way to do this but I couldn't figure out how.
        # I know it's very convoluted but I really couldn't figure out how to add the tags without erasing the data.

        copyfile(out_tile_no_tag, out_tile)

        with rasterio.open(out_tile_no_tag) as out_tile_no_tag_src:

            # Grabs metadata about the tif, like its location/projection/cellsize
            kwargs = out_tile_no_tag_src.meta  #### Use profile instead

            # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory
            windows = out_tile_no_tag_src.block_windows(1)

            # Updates kwargs for the output dataset
            kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0)

            out_tile_tagged = rasterio.open(out_tile, 'w', **kwargs)

            # Adds metadata tags to the output raster
            uu.add_rasterio_tags(out_tile_tagged, 'std')
            out_tile_tagged.update_tags(units='year (2001, 2002, 2003...)')
            out_tile_tagged.update_tags(
                source=
                'MODIS collection 6 burned area, https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf'
            )
            out_tile_tagged.update_tags(extent='global')

            # Iterates across the windows (1 pixel strips) of the input tile
            for idx, window in windows:
                in_window = out_tile_no_tag_src.read(1, window=window)

                # Writes the output window to the output
                out_tile_tagged.write_band(1, in_window, window=window)

        # Without this, the untagged version is counted and eventually copied to s3 if it has data in it
        os.remove(out_tile_no_tag)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_burn_year, no_upload)
def mp_prep_other_inputs(tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)
    sensit_type='std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
                       cn.drivers_processed_dir, cn.ifl_primary_processed_dir,
                       cn.annual_gain_AGC_natrl_forest_young_dir,
                       cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
                       cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.FIA_forest_group_processed_dir,
                       cn.age_cat_natrl_forest_US_dir,
                       cn.FIA_regions_processed_dir]
    output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
                           cn.pattern_drivers, cn.pattern_ifl_primary,
                           cn.pattern_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_FIA_forest_group_processed,
                           cn.pattern_age_cat_natrl_forest_US,
                           cn.pattern_FIA_regions_processed]


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)


    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # Thus, using wget instead.
    cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
    uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    uu.log_subprocess_output_full(cmd)

    uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Unzipping pre-2000 plantations...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping drivers...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)]
    uu.log_subprocess_output_full(cmd)


    # Creates tree cover loss driver tiles
    source_raster = '{}.tif'.format(cn.pattern_drivers_raw)
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates young natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates young natural forest removal rate standard deviation tiles
    source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates pre-2000 oil palm plantation tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    pool.close()
    pool.join()


    # Creates climate zone tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest standard deviation of removal rate tiles
    source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    uu.print_log("Creating vrt of humid tropial primary forest...")
    primary_vrt = 'primary_2001.vrt'
    os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    uu.print_log("  Humid tropical primary forest vrt created")

    # Creates primary forest tiles
    source_raster = primary_vrt
    out_pattern = 'primary_2001'
    dt = 'Byte'
    if cn.count == 96:
        processes = 45  # 45 processors = 650 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a combined IFL/primary forest raster
    # Uses very little memory since it's just file renaming
    if cn.count == 96:
        processes = 60  # 60 processors = 10 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    pool.close()
    pool.join()


    # Creates forest age category tiles for US forests
    source_raster = cn.name_age_cat_natrl_forest_US_raw
    out_pattern = cn.pattern_age_cat_natrl_forest_US
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates forest groups for US forests
    source_raster = cn.name_FIA_forest_group_raw
    out_pattern = cn.pattern_FIA_forest_group_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates FIA regions for US forests
    source_raster = cn.name_FIA_regions_raw
    out_pattern = cn.pattern_FIA_regions_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2: # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')


    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #26
0
def create_peat_mask_tiles(tile_id):

    # Start time
    start = datetime.datetime.now()

    uu.print_log("Getting bounding coordinates for tile", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)
    uu.print_log("  ymax:", ymax, "; ymin:", ymin, "; xmax", xmax, "; xmin:",
                 xmin)

    out_tile_no_tag = '{0}_{1}_no_tag.tif'.format(tile_id,
                                                  cn.pattern_peat_mask)
    out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_peat_mask)

    # If the tile is outside the band covered by the CIFOR peat raster, SoilGrids250m is used
    if ymax > 40 or ymax < -60:

        uu.print_log(
            "{} is outside CIFOR band. Using SoilGrids250m organic soil mask..."
            .format(tile_id))

        out_intermediate = '{0}_intermediate.tif'.format(
            tile_id, cn.pattern_peat_mask)

        # Cuts the SoilGrids250m global raster to the focal tile
        uu.warp_to_Hansen('most_likely_soil_class.vrt', out_intermediate, xmin,
                          ymin, xmax, ymax, 'Byte')

        # Removes all non-histosol sub-groups from the SoilGrids raster.
        # Ideally, this would be done once on the entire SoilGrids raster in the main function but I didn't think of that.
        # Code 14 is the histosol subgroup in SoilGrids250 (https://files.isric.org/soilgrids/latest/data/wrb/MostProbable.qml).
        calc = '--calc=(A==14)'
        peat_mask_out_filearg = '--outfile={}'.format(out_tile_no_tag)
        cmd = [
            'gdal_calc.py', '-A', out_intermediate, calc,
            peat_mask_out_filearg, '--NoDataValue=0', '--overwrite', '--co',
            'COMPRESS=LZW', '--type=Byte', '--quiet'
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("{} created.".format(tile_id))

    # If the tile is inside the band covered by CIFOR, CIFOR is used (and Jukka in the tiles where it occurs).
    # For some reason, the CIFOR raster has a color scheme that makes it symbolized from 0 to 255. This carries
    # over to the output file but that seems like a problem with the output symbology, not the values.
    # gdalinfo shows that the min and max values are 1, as they should be, and it visualizes correctly in ArcMap.
    else:

        uu.print_log(
            "{} is inside CIFOR band. Using CIFOR/Jukka combination...".format(
                tile_id))

        # Combines CIFOR and Jukka (if it occurs there)
        cmd = [
            'gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr',
            '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-dstnodata', '0', '-overwrite',
            '{}'.format(cn.cifor_peat_file), 'jukka_peat.tif', out_tile_no_tag
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("{} created.".format(tile_id))

    # All of the below is to add metadata tags to the output peat masks.
    # For some reason, just doing what's at https://rasterio.readthedocs.io/en/latest/topics/tags.html
    # results in the data getting removed.
    # I found it necessary to copy the peat mask and read its windows into a new copy of the file, to which the
    # metadata tags are added. I'm sure there's an easier way to do this but I couldn't figure out how.
    # I know it's very convoluted but I really couldn't figure out how to add the tags without erasing the data.
    # To make it even stranger, adding the tags before the gdal processing seemed to work fine for the non-tropical
    # (SoilGrids) tiles but not for the tropical (CIFOR/Jukka) tiles (i.e. data didn't disappear in the non-tropical
    # tiles if I added the tags before the GDAL steps but the tropical data did disappear).

    copyfile(out_tile_no_tag, out_tile)

    uu.print_log("Adding metadata tags to", tile_id)
    # Opens the output tile, only so that metadata tags can be added
    # Based on https://rasterio.readthedocs.io/en/latest/topics/tags.html
    with rasterio.open(out_tile_no_tag) as out_tile_no_tag_src:

        # Grabs metadata about the tif, like its location/projection/cellsize
        kwargs = out_tile_no_tag_src.meta

        # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory
        windows = out_tile_no_tag_src.block_windows(1)

        # Updates kwargs for the output dataset
        kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0)

        out_tile_tagged = rasterio.open(out_tile, 'w', **kwargs)

        # Adds metadata tags to the output raster
        uu.add_rasterio_tags(out_tile_tagged, 'std')
        out_tile_tagged.update_tags(key='1 = peat. 0 = not peat.')
        out_tile_tagged.update_tags(
            source=
            'Jukka for IDN and MYS; CIFOR for rest of tropics; SoilGrids250 (May 2020) most likely histosol for outside tropics'
        )
        out_tile_tagged.update_tags(extent='Full extent of input datasets')

        # Iterates across the windows (1 pixel strips) of the input tile
        for idx, window in windows:

            peat_mask_window = out_tile_no_tag_src.read(1, window=window)

            # Writes the output window to the output
            out_tile_tagged.write_band(1, peat_mask_window, window=window)

    # Otherwise, the untagged version is counted and eventually copied to s3 if it has data in it
    os.remove(out_tile_no_tag)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_peat_mask)
def mp_burn_year(tile_id_list, run_date = None, no_upload = None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.burn_year_dir]
    output_pattern_list = [cn.pattern_burn_year]

    # A date can optionally be provided.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    global_grid_hv = ["h00v08", "h00v09", "h00v10", "h01v07", "h01v08", "h01v09", "h01v10", "h01v11", "h02v06",
                      "h02v08", "h02v09", "h02v10", "h02v11", "h03v06", "h03v07", "h03v09", "h03v10", "h03v11",
                      "h04v09", "h04v10", "h04v11", "h05v10", "h05v11", "h05v13", "h06v03", "h06v11", "h07v03",
                      "h07v05", "h07v06", "h07v07", "h08v03", "h08v04", "h08v05", "h08v06", "h08v07", "h08v08",
                      "h08v09", "h08v11", "h09v02", "h09v03", "h09v04", "h09v05", "h09v06", "h09v07", "h09v08",
                      "h09v09", "h10v02", "h10v03", "h10v04", "h10v05", "h10v06", "h10v07", "h10v08", "h10v09",
                      "h10v10", "h10v11", "h11v02", "h11v03", "h11v04", "h11v05", "h11v06", "h11v07", "h11v08",
                      "h11v09", "h11v10", "h11v11", "h11v12", "h12v02", "h12v03", "h12v04", "h12v05", "h12v07",
                      "h12v08", "h12v09", "h12v10", "h12v11", "h12v12", "h12v13", "h13v02", "h13v03", "h13v04",
                      "h13v08", "h13v09", "h13v10", "h13v11", "h13v12", "h13v13", "h13v14", "h14v02", "h14v03",
                      "h14v04", "h14v09", "h14v10", "h14v11", "h14v14", "h15v02", "h15v03", "h15v05", "h15v07",
                      "h15v11", "h16v02", "h16v05", "h16v06", "h16v07", "h16v08", "h16v09", "h17v02", "h17v03",
                      "h17v04", "h17v05", "h17v06", "h17v07", "h17v08", "h17v10", "h17v12", "h17v13", "h18v02",
                      "h18v03", "h18v04", "h18v05", "h18v06", "h18v07", "h18v08", "h18v09", "h19v02", "h19v03",
                      "h19v04", "h19v05", "h19v06", "h19v07", "h19v08", "h19v09", "h19v10", "h19v11", "h19v12",
                      "h20v02", "h20v03", "h20v04", "h20v05", "h20v06", "h20v07", "h20v08", "h20v09", "h20v10",
                      "h20v11", "h20v12", "h20v13", "h21v02", "h21v03", "h21v04", "h21v05", "h21v06", "h21v07",
                      "h21v08", "h21v09", "h21v10", "h21v11", "h21v13", "h22v02", "h22v03", "h22v04", "h22v05",
                      "h22v06", "h22v07", "h22v08", "h22v09", "h22v10", "h22v11", "h22v13", "h23v02", "h23v03",
                      "h23v04", "h23v05", "h23v06", "h23v07", "h23v08", "h23v09", "h23v10", "h23v11", "h24v02",
                      "h24v03", "h24v04", "h24v05", "h24v06", "h24v07", "h24v12", "h25v02", "h25v03", "h25v04",
                      "h25v05", "h25v06", "h25v07", "h25v08", "h25v09", "h26v02", "h26v03", "h26v04", "h26v05",
                      "h26v06", "h26v07", "h26v08", "h27v03", "h27v04", "h27v05", "h27v06", "h27v07", "h27v08",
                      "h27v09", "h27v10", "h27v11", "h27v12", "h28v03", "h28v04", "h28v05", "h28v06", "h28v07",
                      "h28v08", "h28v09", "h28v10", "h28v11", "h28v12", "h28v13", "h29v03", "h29v05", "h29v06",
                      "h29v07", "h29v08", "h29v09", "h29v10", "h29v11", "h29v12", "h29v13", "h30v06", "h30v07",
                      "h30v08", "h30v09", "h30v10", "h30v11", "h30v12", "h30v13", "h31v06", "h31v07", "h31v08",
                      "h31v09", "h31v10", "h31v11", "h31v12", "h31v13", "h32v07", "h32v08", "h32v09", "h32v10",
                      "h32v11", "h32v12", "h33v07", "h33v08", "h33v09", "h33v10", "h33v11", "h34v07", "h34v08",
                      "h34v09", "h34v10", "h35v08", "h35v09", "h35v10"]


    # Step 1: download hdf files for relevant year(s) from sftp site.
    # This only needs to be done for the most recent year of data.

    '''
    Downloading the hdf files from the sftp burned area site is done outside the script in the sftp shell on the command line.
    This will download all the 2020 hdfs to the spot machine. It will take a few minutes before the first
    hdf is downloaded but then it should go quickly.
    Change 2020 to other year for future years of downloads. 
    https://modis-fire.umd.edu/files/MODIS_C6_BA_User_Guide_1.3.pdf, page 24, section 4.1.3

    sftp [email protected]
    [For password] burnt
    cd data/MODIS/C6/MCD64A1/HDF
    ls [to check that it's the folder with all the tile folders]
    get h??v??/MCD64A1.A2020*
    bye    //exits the stfp shell
    '''

    # Uploads the latest year of raw burn area hdfs to s3.
    # All hdfs go in this folder
    cmd = ['aws', 's3', 'cp', '{0}/burn_date/'.format(cn.docker_app), cn.burn_year_hdf_raw_dir, '--recursive', '--exclude', '*', '--include', '*hdf']
    uu.log_subprocess_output_full(cmd)


    # Step 2:
    # Makes burned area rasters for each year for each MODIS horizontal-vertical tile.
    # This only needs to be done for the most recent year of data (set in stach_ba_hv).
    uu.print_log("Stacking hdf into MODIS burned area tifs by year and MODIS hv tile...")

    count = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count - 10)
    pool.map(stack_ba_hv.stack_ba_hv, global_grid_hv)
    pool.close()
    pool.join()

    # # For single processor use
    # for hv_tile in global_grid_hv:
    #     stack_ba_hv.stack_ba_hv(hv_tile)


    # Step 3:
    # Creates a 10x10 degree wgs 84 tile of .00025 res burned year.
    # Downloads all MODIS hv tiles from s3,
    # makes a mosaic for each year, and warps to Hansen extent.
    # Range is inclusive at lower end and exclusive at upper end (e.g., 2001, 2021 goes from 2001 to 2020).
    # This only needs to be done for the most recent year of data.
    # NOTE: The first time I ran this for the 2020 TCL update, I got an error about uploading the log to s3
    # after most of the tiles were processed. I didn't know why it happened, so I reran the step and it went fine.
    for year in range(2020, 2021):

        uu.print_log("Processing", year)

        # Downloads all hv tifs for this year
        include = '{0}_*.tif'.format(year)
        year_tifs_folder = "{}_year_tifs".format(year)
        utilities.makedir(year_tifs_folder)

        uu.print_log("Downloading MODIS burn date files from s3...")

        cmd = ['aws', 's3', 'cp', cn.burn_year_stacked_hv_tif_dir, year_tifs_folder]
        cmd += ['--recursive', '--exclude', "*", '--include', include]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Creating vrt of MODIS files...")

        vrt_name = "global_vrt_{}.vrt".format(year)

        # Builds list of vrt files
        with open('vrt_files.txt', 'w') as vrt_files:
            vrt_tifs = glob.glob(year_tifs_folder + "/*.tif")
            for tif in vrt_tifs:
                vrt_files.write(tif + "\n")

        # Creates vrt with wgs84 MODIS tiles.
        cmd = ['gdalbuildvrt', '-input_file_list', 'vrt_files.txt', vrt_name]
        uu.log_subprocess_output_full(cmd)

        uu.print_log("Reprojecting vrt...")

        # Builds new vrt and virtually project it
        # This reprojection could be done as part of the clip_year_tiles function but Sam had it out here like this and
        # so I'm leaving it like that.
        vrt_wgs84 = 'global_vrt_{}_wgs84.vrt'.format(year)
        cmd = ['gdalwarp', '-of', 'VRT', '-t_srs', "EPSG:4326", '-tap', '-tr', '.00025', '.00025', '-overwrite',
               vrt_name, vrt_wgs84]
        uu.log_subprocess_output_full(cmd)

        # Creates a list of lists, with year and tile id to send to multi processor
        tile_year_list = []
        for tile_id in tile_id_list:
            tile_year_list.append([tile_id, year])

        # Given a list of tiles and years ['00N_000E', 2017] and a VRT of burn data,
        # the global vrt has pixels representing burned or not. This process clips the global VRT
        # and changes the pixel value to represent the year the pixel was burned. Each tile has value of
        # year burned and NoData.
        count = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=count-5)
        pool.map(partial(clip_year_tiles.clip_year_tiles, no_upload=no_upload), tile_year_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_year in tile_year_list:
        #     clip_year_tiles.clip_year_tiles(tile_year, no_upload)

        uu.print_log("Processing for {} done. Moving to next year.".format(year))

    # Step 4:
    # Creates a single Hansen tile covering all years that represents where burning coincided with tree cover loss
    # or preceded TCL by one year.
    # This needs to be done on all years each time burned area is updated.

    # Downloads the loss tiles
    uu.s3_folder_download(cn.loss_dir, '.', 'std', cn.pattern_loss)

    uu.print_log("Extracting burn year data that coincides with tree cover loss...")

    # Downloads the 10x10 deg burn year tiles (1 for each year in which there was burned area), stack and evaluate
    # to return burn year values on hansen loss pixels within 1 year of loss date
    if cn.count == 96:
        processes = 5
        # 6 processors = >750 GB peak (1 processor can use up to 130 GB of memory)
    else:
        processes = 1
    pool = multiprocessing.Pool(processes)
    pool.map(partial(hansen_burnyear_final.hansen_burnyear, no_upload=no_upload), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     hansen_burnyear_final.hansen_burnyear(tile_id, no_upload)


    # If no_upload flag is not activated, output is uploaded
    if not no_upload:

        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
def mp_peatland_processing(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # List of output directories and output file name patterns
    output_dir_list = [cn.peat_mask_dir]
    output_pattern_list = [cn.pattern_peat_mask]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Download SoilGrids250 most probable soil class rasters.
    # There are 459 tiles and it takes about 20 minutes to download them
    cmd = [
        'wget', '--recursive', '--no-parent', '-nH', '--cut-dirs=7',
        '--accept', '*.geotiff', '{}'.format(cn.soilgrids250_peat_url)
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Making SoilGrids250 most likely soil class vrt...")
    check_call('gdalbuildvrt most_likely_soil_class.vrt *{}*'.format(
        cn.pattern_soilgrids_most_likely_class),
               shell=True)
    uu.print_log("Done making SoilGrids250 most likely soil class vrt")

    # Downloads peat layers
    uu.s3_file_download(
        os.path.join(cn.peat_unprocessed_dir, cn.cifor_peat_file),
        cn.docker_base_dir, sensit_type)
    uu.s3_file_download(
        os.path.join(cn.peat_unprocessed_dir, cn.jukka_peat_zip),
        cn.docker_base_dir, sensit_type)

    # Unzips the Jukka peat shapefile (IDN and MYS)
    cmd = ['unzip', '-o', '-j', cn.jukka_peat_zip]
    uu.log_subprocess_output_full(cmd)

    jukka_tif = 'jukka_peat.tif'

    # Converts the Jukka peat shapefile to a raster
    uu.print_log('Rasterizing jukka peat...')
    cmd = [
        'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr',
        '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot',
        'Byte', '-a_nodata', '0', cn.jukka_peat_shp, jukka_tif
    ]
    uu.log_subprocess_output_full(cmd)
    uu.print_log('   Jukka peat rasterized')

    # For multiprocessor use
    # count-10 maxes out at about 100 GB on an r5d.16xlarge
    processes = cn.count - 5
    uu.print_log('Peatland preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(peatland_processing.create_peat_mask_tiles, tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     peatland_processing.create_peat_mask_tiles(tile_id)

    output_pattern = output_pattern_list[0]
    processes = 50  # 50 processors = XXX GB peak
    uu.print_log(
        "Checking for empty tiles of {0} pattern with {1} processors...".
        format(output_pattern, processes))
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_delete_if_empty, output_pattern=output_pattern),
        tile_id_list)
    pool.close()
    pool.join()

    uu.print_log("Uploading output files")
    uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
Beispiel #29
0
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Beispiel #30
0
def mp_create_carbon_pools(sensit_type, tile_id_list, carbon_pool_extent, run_date = None):

    os.chdir(cn.docker_base_dir)

    if (sensit_type != 'std') & (carbon_pool_extent != 'loss'):
        uu.exception_log("Sensitivity analysis run must use 'loss' extent")

    # Checks the validity of the carbon_pool_extent argument
    if (carbon_pool_extent not in ['loss', '2000', 'loss,2000', '2000,loss']):
        uu.exception_log("Invalid carbon_pool_extent input. Please choose loss, 2000, loss,2000 or 2000,loss.")


    # If a full model run is specified, the correct set of tiles for the particular script is listed.
    # For runs generating carbon pools in emissions year, only tiles with model extent and loss are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent == 'loss'):
        # Lists the tiles that have both model extent and loss pixels
        model_extent_tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)
        loss_tile_id_list = uu.tile_list_s3(cn.loss_dir, sensit_type=sensit_type)
        uu.print_log("Carbon pool at emissions year is combination of model_extent and loss tiles:")
        tile_id_list = list(set(model_extent_tile_id_list).intersection(loss_tile_id_list))

    # For runs generating carbon pools in 2000, all model extent tiles are relevant.
    if (tile_id_list == 'all') & (carbon_pool_extent != 'loss'):
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type=sensit_type)


    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    output_dir_list = []
    output_pattern_list = []

    # Output files and patterns and files to download if carbon emitted_pools for 2000 are being generated
    if '2000' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_2000_dir, cn.BGC_2000_dir, cn.deadwood_2000_dir,
                           cn.litter_2000_dir, cn.soil_C_full_extent_2000_dir, cn.total_C_2000_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_2000, cn.pattern_BGC_2000, cn.pattern_deadwood_2000,
                               cn.pattern_litter_2000, cn.pattern_soil_C_full_extent_2000, cn.pattern_total_C_2000]

        # Files to download for this script
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]

    # Output files and patterns and files to download if carbon emitted_pools for loss year are being generated
    if 'loss' in carbon_pool_extent:

        # List of output directories and output file name patterns
        output_dir_list = output_dir_list + [cn.AGC_emis_year_dir, cn.BGC_emis_year_dir, cn.deadwood_emis_year_2000_dir,
                           cn.litter_emis_year_2000_dir, cn.soil_C_emis_year_2000_dir, cn.total_C_emis_year_dir]
        output_pattern_list = output_pattern_list + [cn.pattern_AGC_emis_year, cn.pattern_BGC_emis_year, cn.pattern_deadwood_emis_year_2000,
                               cn.pattern_litter_emis_year_2000, cn.pattern_soil_C_emis_year_2000, cn.pattern_total_C_emis_year]

        # Files to download for this script. This has the same items as the download_dict for 2000 pools plus
        # other tiles.
        download_dict = {
            cn.removal_forest_type_dir: [cn.pattern_removal_forest_type],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types],
            cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types]
       }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [cn.pattern_JPL_unmasked_processed]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [cn.pattern_WHRC_biomass_2000_unmasked]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [cn.pattern_Brazil_annual_loss_processed]
        elif sensit_type == 'Mekong_loss':
            download_dict[cn.Mekong_loss_processed_dir] = [cn.pattern_Mekong_loss_processed]
        else:
            download_dict[cn.loss_dir] = [cn.pattern_loss]


    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':
        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)
    else:
        uu.print_log("Output directory list for standard model:", output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = ['aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir]
    uu.log_subprocess_output_full(cmd)

    pd.options.mode.chained_assignment = None

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first')

    mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                         cn.below_to_above_trop_dry_mang,
                                                                                         cn.below_to_above_trop_wet_mang,
                                                                                         cn.below_to_above_subtrop_mang)

    mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                              cn.deadwood_to_above_trop_dry_mang,
                                                                                              cn.deadwood_to_above_trop_wet_mang,
                                                                                              cn.deadwood_to_above_subtrop_mang)

    mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(gain_table_simplified,
                                                                                            cn.litter_to_above_trop_dry_mang,
                                                                                            cn.litter_to_above_trop_wet_mang,
                                                                                            cn.litter_to_above_subtrop_mang)

    uu.print_log("Creating tiles of aboveground carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 16  # 16 processors = XXX GB peak
            else:
                processes = 20  # 25 processors > 750 GB peak; 16 = 560 GB peak;
                # 18 = 570 GB peak; 19 = 620 GB peak; 20 = 670 GB peak; 21 > 750 GB peak
        else: # For 2000, or loss & 2000
            processes = 15  # 12 processors = 490 GB peak (stops around 455, then increases slowly); 15 = XXX GB peak
    else:
        processes = 2
    uu.print_log('AGC loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_AGC,
                     sensit_type=sensit_type, carbon_pool_extent=carbon_pool_extent), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_AGC(tile_id, sensit_type, carbon_pool_extent)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
    else:
        uu.upload_final_set(output_dir_list[0], output_pattern_list[0])
        uu.upload_final_set(output_dir_list[6], output_pattern_list[6])
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for belowground carbon creation; deleting unneeded tiles")
    tiles_to_delete = glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_all_types))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    uu.print_log("Creating tiles of belowground carbon in {}".format(carbon_pool_extent))
    # Creates a single filename pattern to pass to the multiprocessor call
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 30  # 30 processors = XXX GB peak
            else:
                processes = 38  # 20 processors = 370 GB peak; 32 = 590 GB peak; 36 = 670 GB peak; 38 = 700 GB peak
        else: # For 2000, or loss & 2000
            processes = 30  # 20 processors = 370 GB peak; 25 = 460 GB peak; 30 = XXX GB peak
    else:
        processes = 2
    uu.print_log('BGC max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                     carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
    else:
        uu.upload_final_set(output_dir_list[1], output_pattern_list[1])
        uu.upload_final_set(output_dir_list[7], output_pattern_list[7])
    uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete AGC, BGC, and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:
        uu.print_log(":::::Freeing up memory for deadwood and litter carbon 2000 creation; deleting unneeded tiles")
        tiles_to_delete = []
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_BGC_2000)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_loss)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain)))
        tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_soil_C_full_extent_2000)))

        uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

        for tile_to_delete in tiles_to_delete:
            os.remove(tile_to_delete)
        uu.print_log(":::::Deleted unneeded tiles")
        uu.check_storage()


    uu.print_log("Creating tiles of deadwood and litter carbon in {}".format(carbon_pool_extent))
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 14  # 32 processors = >750 GB peak; 24 > 750 GB peak; 14 = 650 GB peak; 15 = 700 GB peak
        else: # For 2000, or loss & 2000
            ### Note: deleted precip, elevation, and WHRC AGB tiles at equatorial latitudes as deadwood and litter were produced.
            ### There wouldn't have been enough room for all deadwood and litter otherwise.
            ### For example, when deadwood and litter generation started getting up to around 50N, I deleted
            ### 00N precip, elevation, and WHRC AGB. I deleted all of those from 30N to 20S.
            processes = 16  # 7 processors = 320 GB peak; 14 = 620 GB peak; 16 = XXX GB peak
    else:
        processes = 2
    uu.print_log('Deadwood and litter max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(create_carbon_pools.create_deadwood_litter, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                carbon_pool_extent=carbon_pool_extent,
                sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_ratio, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
    else:
        uu.upload_final_set(output_dir_list[2], output_pattern_list[2])  # deadwood
        uu.upload_final_set(output_dir_list[3], output_pattern_list[3])  # litter
        uu.upload_final_set(output_dir_list[8], output_pattern_list[8])  # deadwood
        uu.upload_final_set(output_dir_list[9], output_pattern_list[9])  # litter
    uu.check_storage()

    uu.print_log(":::::Freeing up memory for soil and total carbon creation; deleting unneeded tiles")
    tiles_to_delete = []
    tiles_to_delete .extend(glob.glob('*{}*tif'.format(cn.pattern_elevation)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_precip)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_JPL_unmasked_processed)))
    tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed)))
    uu.print_log("  Deleting", len(tiles_to_delete), "tiles...")

    for tile_to_delete in tiles_to_delete:
        os.remove(tile_to_delete)
    uu.print_log(":::::Deleted unneeded tiles")
    uu.check_storage()


    if 'loss' in carbon_pool_extent:

        uu.print_log("Creating tiles of soil carbon in loss extent")

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            pattern = output_pattern_list[4]
        else:
            pattern = output_pattern_list[10]

        if cn.count == 96:
            # More processors can be used for loss carbon pools than for 2000 carbon pools
            if carbon_pool_extent == 'loss':
                if sensit_type == 'biomass_swap':
                    processes = 36  # 36 processors = XXX GB peak
                else:
                    processes = 42  # 24 processors = 360 GB peak; 32 = 490 GB peak; 38 = 580 GB peak; 42 = XXX GB peak
            else: # For 2000, or loss & 2000
                processes = 12  # 12 processors = XXX GB peak
        else:
            processes = 2
        uu.print_log('Soil carbon loss year max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(partial(create_carbon_pools.create_soil_emis_extent, pattern=pattern,
                         sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_soil_emis_extent(tile_id, pattern, sensit_type)

        # If pools in 2000 weren't generated, soil carbon in emissions extent is 4.
        # If pools in 2000 were generated, soil carbon in emissions extent is 10.
        if '2000' not in carbon_pool_extent:
            uu.upload_final_set(output_dir_list[4], output_pattern_list[4])
        else:
            uu.upload_final_set(output_dir_list[10], output_pattern_list[10])

        uu.check_storage()

    if '2000' in carbon_pool_extent:
        uu.print_log("Skipping soil for 2000 carbon pool calculation. Soil carbon in 2000 already created.")
        uu.check_storage()


    # 825 GB isn't enough space to create deadwood and litter 2000 while having AGC and BGC 2000 on.
    # Thus must delete BGC and soil C 2000 for creation of deadwood and litter, then copy them back to spot machine
    # for total C 2000 calculation.
    if '2000' in carbon_pool_extent:

        # Files to download for total C 2000. Previously deleted to save space
        download_dict = {
            cn.BGC_2000_dir: [cn.pattern_BGC_2000],
            cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000]
        }

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list)


    uu.print_log("Creating tiles of total carbon")
    if cn.count == 96:
        # More processors can be used for loss carbon pools than for 2000 carbon pools
        if carbon_pool_extent == 'loss':
            if sensit_type == 'biomass_swap':
                processes = 14  # 14 processors = XXX GB peak
            else:
                processes = 18  # 20 processors > 750 GB peak (by just a bit, I think); 15 = 550 GB peak; 18 = XXX GB peak
        else: # For 2000, or loss & 2000
            processes = 12  # 12 processors = XXX GB peak
    else:
        processes = 2
    uu.print_log('Total carbon loss year max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(create_carbon_pools.create_total_C, carbon_pool_extent=carbon_pool_extent,
                     sensit_type=sensit_type), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in tile_id_list:
    #     create_carbon_pools.create_total_C(tile_id, carbon_pool_extent, sensit_type)

    if carbon_pool_extent in ['loss', '2000']:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
    else:
        uu.upload_final_set(output_dir_list[5], output_pattern_list[5])
        uu.upload_final_set(output_dir_list[11], output_pattern_list[11])
    uu.check_storage()