def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type):

    uu.print_log("Gain year count for non-loss pixels:", tile_id)

    # start time
    start = datetime.datetime.now()

    # Names of the loss, gain and tree cover density tiles
    loss, gain, extent, biomass = tile_names(tile_id, sensit_type)

    # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it
    # to a vrt so that the 0 pixels are recognized.
    loss_vrt = '{}_loss.vrt'.format(tile_id)
    os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss))

    # Pixels with loss but in areas with PRODES forest 2000 and biomass >0 (same as standard model)
    no_change_calc = '--calc=(A==0)*(B==1)*(C>0)*{}'.format(cn.loss_years)
    no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
    no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
    cmd = [
        'gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass,
        no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite',
        '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change')
Exemple #2
0
def create_10x10_plantation_type(tile_id, plant_type_1x1_vrt):

    uu.print_log("Getting bounding coordinates for tile", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)
    uu.print_log("  xmin:", xmin, "; xmax:", xmax, "; ymin", ymin, "; ymax:", ymax)

    tile_10x10 = '{0}_{1}.tif'.format(tile_id, cn.pattern_planted_forest_type_unmasked)
    uu.print_log("Rasterizing", tile_10x10)
    cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)),
           '-co', 'COMPRESS=LZW', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax),
           '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', '-ot', 'Byte', plant_type_1x1_vrt, tile_10x10]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    uu.print_log("Checking if {} contains any data...".format(tile_id))
    stats = uu.check_for_data(tile_10x10)
    
    if stats[0] > 0:

        uu.print_log("  Data found in {}. Copying tile to s3...".format(tile_id))
        uu.upload_final(cn.planted_forest_type_unmasked_dir, tile_id, cn.pattern_planted_forest_type_unmasked)
        uu.print_log("    Tile converted and copied to s3")

    else:

        print("  No data found. Not copying {}.".format(tile_id))
def rasterize_pre_2000_plantations(tile_id):

    # Start time
    start = datetime.datetime.now()

    uu.print_log("Getting extent of", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000)

    cmd = [
        'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr',
        '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot',
        'Byte', '-a_nodata', '0', '-te',
        str(xmin),
        str(ymin),
        str(xmax),
        str(ymax), '{}.shp'.format(cn.pattern_plant_pre_2000_raw), out_tile
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_plant_pre_2000)
def legal_Amazon_create_gain_year_count_loss_and_gain_standard(
        tile_id, sensit_type):

    uu.print_log("Gain year count for loss and gain pixels:", tile_id)

    # start time
    start = datetime.datetime.now()

    # Names of the loss, gain and tree cover density tiles
    loss, gain, extent, biomass = tile_names(tile_id, sensit_type)

    # Pixels with both loss and gain, and in PRODES forest 2000
    loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C==1)*((A-1)+({}+1-A)/2))'.format(
        cn.loss_years)
    loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(
        tile_id)
    loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename)
    cmd = [
        'gdal_calc.py', '-A', loss, '-B', gain, '-C', extent,
        loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0',
        '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern):

    uu.print_log(
        "Merging loss, gain, no change, and loss/gain pixels into single raster for {}"
        .format(tile_id))

    # start time
    start = datetime.datetime.now()

    # The four rasters from above that are to be merged
    loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id)
    no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
    loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(
        tile_id)

    # All four components are merged together to the final output raster
    age_outfile = '{}_{}.tif'.format(tile_id, output_pattern)
    cmd = [
        'gdal_merge.py', '-o', age_outfile, loss_outfilename,
        no_change_outfilename, loss_and_gain_outfilename, '-co',
        'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_pattern)
Exemple #6
0
def recode_tiles(annual_loss):

    uu.print_log("Recoding loss tile by year")

    year = int(annual_loss[-8:-4])
    uu.print_log(year)

    if year < 2001 or year > (2000 + cn.loss_years):

        uu.print_log("Skipping {} because outside of model range".format(year))
        return

    else:

        calc = '--calc={}*(A==100)'.format(int((year - 2000)))
        recoded_output = "Mekong_loss_recoded_{}.tif".format(year)
        outfile = '--outfile={}'.format(recoded_output)

        cmd = [
            'gdal_calc.py', '-A', annual_loss, calc, outfile,
            '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--quiet'
        ]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)
def create_1x1_plantation_type_from_1x1_planted(tile_1x1):

    # Gets the bounding coordinates for the 1x1 degree tile
    coords = tile_1x1.split("_")
    xmin_1x1 = str(coords[3])[:-4]
    xmax_1x1 = int(xmin_1x1) + 1
    ymax_1x1 = int(coords[2])
    ymin_1x1 = ymax_1x1 - 1

    uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:",
                 xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1)

    uu.print_log("There are plantations in {}. Converting to raster...".format(
        tile_1x1))

    # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector
    cmd = [
        'gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res),
        '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu',
        '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1,
                                                           xmin_1x1), '-te',
        str(xmin_1x1),
        str(ymin_1x1),
        str(xmax_1x1),
        str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0', '-ot', 'Byte'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
Exemple #8
0
def create_1x1_plantation_from_1x1_gadm(tile_1x1):

    # Gets the bounding coordinates for the 1x1 degree tile
    coords = tile_1x1.split("_")
    uu.print_log(coords)
    xmin_1x1 = str(coords[2])[:-4]
    xmax_1x1 = int(xmin_1x1) + 1
    ymax_1x1 = int(coords[1])
    ymin_1x1 = ymax_1x1 - 1

    uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1)

    # Connects Python to PostGIS using psycopg2. The credentials work on spot machines as they are currently configured
    # and are based on this: https://github.com/wri/gfw-annual-loss-processing/blob/master/1b_Summary-AOIs-to-TSV/utilities/postgis_util.py
    creds = {'host': 'localhost', 'user': '******', 'dbname': 'ubuntu'}
    conn = psycopg2.connect(**creds)
    cursor = conn.cursor()

    # Intersects the plantations PostGIS table with the 1x1 tile, then saves any growth rates in that tile as a 1x1 tile
    # https://gis.stackexchange.com/questions/30267/how-to-create-a-valid-global-polygon-grid-in-postgis
    # https://stackoverflow.com/questions/48978616/best-way-to-run-st-intersects-on-features-inside-one-table
    # https://postgis.net/docs/ST_Intersects.html
    uu.print_log("Checking if {} has plantations in it".format(tile_1x1))

    # Does the intersect of the PostGIS table and the 1x1 GADM tile
    cursor.execute("SELECT growth FROM all_plant WHERE ST_Intersects(all_plant.wkb_geometry, ST_GeogFromText('POLYGON(({0} {1},{2} {1},{2} {3},{0} {3},{0} {1}))'))".format(
            xmin_1x1, ymax_1x1, xmax_1x1, ymin_1x1))

    # A Python list of the output of the intersection, which in this case is a list of features that were successfully intersected.
    # This is what I use to determine if any PostGIS features were intersected.
    features = cursor.fetchall()
    cursor.close()

    # If any features in the PostGIS table were intersected with the 1x1 GADM tile, then the features in this 1x1 tile
    # are converted to a planted forest gain rate tile and a plantation type tile
    if len(features) > 0:

        uu.print_log("There are plantations in {}. Converting to gain rate and plantation type rasters...".format(tile_1x1))

        # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector
        # For plantation gain rate
        cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0']
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector
        # For plantation type
        cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0']
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    # If no features in the PostGIS table were intersected with the 1x1 GADM tile, nothing happens.
    else:
        uu.print_log("There are no plantations in {}. Not converting to raster.".format(tile_1x1))
def rasterize_gadm_1x1(tile_id):

    uu.print_log("Getting bounding coordinates for tile", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)
    uu.print_log("  xmin:", xmin, "; xmax:", xmax, "; ymin", ymin, "; ymax:",
                 ymax)

    # Degrees of tile in x and y dimensions
    x_size = abs(int(xmin) - int(xmax))
    y_size = abs(int(ymin) - int(ymax))

    # Iterates through input 10x10 tile by 1x1 degree
    for x in range(x_size):

        xmin_1x1 = int(xmin) + x
        xmax_1x1 = int(xmin) + x + 1

        for y in range(y_size):

            ymin_1x1 = int(ymin) + y
            ymax_1x1 = int(ymin) + y + 1

            uu.print_log("  xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1,
                         "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1)

            tile_1x1 = 'GADM_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1)
            uu.print_log("Rasterizing", tile_1x1)
            cmd = [
                'gdal_rasterize', '-tr', '{}'.format(str(cn.Hansen_res)),
                '{}'.format(str(cn.Hansen_res)), '-co', 'COMPRESS=LZW', '-te',
                str(xmin_1x1),
                str(ymin_1x1),
                str(xmax_1x1),
                str(ymax_1x1), '-burn', '1', '-a_nodata', '0', cn.gadm_iso,
                tile_1x1
            ]
            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Only keeps 1x1 GADM tiles if they actually include a country; many 1x1 tiles created out of 10x10 tiles
            # don't actually include a country.
            uu.print_log(
                "Checking if {} contains any data...".format(tile_1x1))
            stats = uu.check_for_data(tile_1x1)

            if stats[1] > 0:
                uu.print_log(
                    "  Data found in {}. Keeping tile".format(tile_1x1))

            else:
                uu.print_log(
                    "  No data found in {}. Deleting.".format(tile_1x1))
                os.remove(tile_1x1)
Exemple #10
0
def download_df(year, hv_tile, output_dir):
    include = '*A{0}*{1}*'.format(year, hv_tile)
    cmd = [
        'aws', 's3', 'cp', cn.burn_year_hdf_raw_dir, output_dir, '--recursive',
        '--exclude', "*", '--include', include
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
Exemple #11
0
def loss_in_raster(tile_id, raster_type, output_name, lat, mask):

    uu.print_log("Calculating loss area for tile id {0}...".format(tile_id))

    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # start time
    start = datetime.datetime.now()

    # Name of the loss time
    loss_tile = '{0}.tif'.format(tile_id)

    # The raster that loss is being analyzed inside
    raster_of_interest = '{0}_{1}.tif'.format(tile_id, raster_type)

    # Output file name
    outname = '{0}_{1}.tif'.format(tile_id, output_name)

    # Only processes the tile if it is inside the latitude band (north of the specified latitude)
    if ymax > lat and os.path.exists(raster_of_interest):

        uu.print_log("{} inside latitude band and peat tile exists. Processing tile.".format(tile_id))

        # If the user has asked to create just a mask of loss as opposed to the actual output values
        if mask == "True":

            calc = '--calc=(A>=1)*(A+1)/(A+1)*B'

        # If the user has asked to output the actual loss values
        if mask == "False":

            # Equation argument for converting emissions from per hectare to per pixel.
            # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare.
            calc = '--calc=A*B'

        # Argument for outputting file
        out = '--outfile={}'.format(outname)

        uu.print_log("Masking loss in {} by raster of interest...".format(tile_id))
        cmd = ['gdal_calc.py', '-A', loss_tile, '-B', raster_of_interest, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW',
               '--overwrite', '--quiet']
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        uu.print_log("{} masked".format(tile_id))

    else:

        uu.print_log("{} outside of latitude band. Skipped tile.".format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_name)
Exemple #12
0
def mp_mangrove_processing(tile_id_list, run_date = None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # Downloads zipped raw mangrove files
    uu.s3_file_download(os.path.join(cn.mangrove_biomass_raw_dir, cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std')

    # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument)
    # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that.
    cmd = ['unzip', '-o', '-j', cn.mangrove_biomass_raw_file]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Creates vrt for the Saatchi biomass rasters
    mangrove_vrt = 'mangrove_biomass.vrt'
    os.system('gdalbuildvrt {} *.tif'.format(mangrove_vrt))

    # Converts the mangrove AGB vrt into Hansen tiles
    source_raster = mangrove_vrt
    out_pattern = cn.pattern_mangrove_biomass_2000
    dt = 'float32'
    processes=int(cn.count/4)
    uu.print_log('Mangrove preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern)

    # Checks if each tile has data in it. Only tiles with data are uploaded.
    upload_dir = cn.mangrove_biomass_2000_dir
    pattern = cn.pattern_mangrove_biomass_2000
    processes=int(cn.count-5)
    uu.print_log('Mangrove check for data max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def mp_continent_ecozone_tiles(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.pattern_WHRC_biomass_2000_non_mang_non_planted,
            cn.mangrove_biomass_2000_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped
    uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std')

    # Unzips ecozone shapefile
    cmd = ['unzip', cn.cont_eco_zip]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # List of output directories and output file name patterns
    output_dir_list = [cn.cont_eco_raw_dir, cn.cont_eco_dir]
    output_pattern_list = [
        cn.pattern_cont_eco_raw, cn.pattern_cont_eco_processed
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # For multiprocessor use
    processes = int(cn.count / 4)
    uu.print_log('Continent-ecozone tile creation max processors=', processes)
    pool.map(continent_ecozone_tiles.create_continent_ecozone_tiles,
             tile_id_list)

    # Uploads the continent-ecozone tile to s3 before the codes are expanded to pixels in 1024x1024 windows that don't have codes.
    # These are not used for the model. They are for reference and completeness.
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Exemple #14
0
def create_mangrove_soil_C(tile_id):

    # Start time
    start = datetime.datetime.now()

    # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile.
    if os.path.exists('{0}_{1}.tif'.format(tile_id,
                                           cn.pattern_mangrove_biomass_2000)):

        uu.print_log("Mangrove aboveground biomass tile found for", tile_id)

        uu.print_log("Getting extent of", tile_id)
        xmin, ymin, xmax, ymax = uu.coords(tile_id)

        uu.print_log("Clipping mangrove soil C from mangrove soil vrt for",
                     tile_id)
        uu.warp_to_Hansen('mangrove_soil_C.vrt',
                          '{0}_mangrove_full_extent.tif'.format(tile_id), xmin,
                          ymin, xmax, ymax, 'Int16')

        mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id)
        mangrove_biomass = '{0}_{1}.tif'.format(
            tile_id, cn.pattern_mangrove_biomass_2000)
        outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id)
        out = '--outfile={}'.format(outname)
        calc = '--calc=A*(B>0)'
        datatype = '--type={}'.format('Int16')

        uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id)
        cmd = [
            'gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass, calc,
            out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite',
            datatype, '--quiet'
        ]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    else:

        uu.print_log("No mangrove aboveground biomass tile for", tile_id)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove')
def mp_prep_other_inputs(tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)
    sensit_type='std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
                       cn.drivers_processed_dir, cn.ifl_primary_processed_dir,
                       cn.annual_gain_AGC_natrl_forest_young_dir,
                       cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
                       cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.FIA_forest_group_processed_dir,
                       cn.age_cat_natrl_forest_US_dir,
                       cn.FIA_regions_processed_dir]
    output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
                           cn.pattern_drivers, cn.pattern_ifl_primary,
                           cn.pattern_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_FIA_forest_group_processed,
                           cn.pattern_age_cat_natrl_forest_US,
                           cn.pattern_FIA_regions_processed]


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)


    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # Thus, using wget instead.
    cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
    uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    uu.log_subprocess_output_full(cmd)

    uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Unzipping pre-2000 plantations...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping drivers...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)]
    uu.log_subprocess_output_full(cmd)


    # Creates tree cover loss driver tiles
    source_raster = '{}.tif'.format(cn.pattern_drivers_raw)
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates young natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates young natural forest removal rate standard deviation tiles
    source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates pre-2000 oil palm plantation tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    pool.close()
    pool.join()


    # Creates climate zone tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest standard deviation of removal rate tiles
    source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    uu.print_log("Creating vrt of humid tropial primary forest...")
    primary_vrt = 'primary_2001.vrt'
    os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    uu.print_log("  Humid tropical primary forest vrt created")

    # Creates primary forest tiles
    source_raster = primary_vrt
    out_pattern = 'primary_2001'
    dt = 'Byte'
    if cn.count == 96:
        processes = 45  # 45 processors = 650 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a combined IFL/primary forest raster
    # Uses very little memory since it's just file renaming
    if cn.count == 96:
        processes = 60  # 60 processors = 10 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    pool.close()
    pool.join()


    # Creates forest age category tiles for US forests
    source_raster = cn.name_age_cat_natrl_forest_US_raw
    out_pattern = cn.pattern_age_cat_natrl_forest_US
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates forest groups for US forests
    source_raster = cn.name_FIA_forest_group_raw
    out_pattern = cn.pattern_FIA_forest_group_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates FIA regions for US forests
    source_raster = cn.name_FIA_regions_raw
    out_pattern = cn.pattern_FIA_regions_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2: # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')


    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Exemple #16
0
def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
    # tile_id_list = ['50N_130W'] # test tiles
    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads the Mekong loss folder. Each year of loss has its own raster
    uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir,
                          sensit_type)

    # The list of all annual loss rasters
    annual_loss_list = glob.glob('Loss_20*tif')
    uu.print_log(annual_loss_list)

    uu.print_log(
        "Creating first year of loss Hansen tiles for Mekong region...")
    # Recodes raw loss rasters with their loss year (for model years only)
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(Mekong_loss.recode_tiles, annual_loss_list)

    # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years,
    # the earlier loss gets)
    uu.print_log("Merging all loss years within model range...")
    loss_composite = "Mekong_loss_2001_2015.tif"
    cmd = [
        'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW',
        '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif",
        "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif",
        "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif",
        "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif",
        "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif",
        "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif",
        "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif",
        "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif"
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Creates Hansen tiles out of the composite Mekong loss
    source_raster = loss_composite
    out_pattern = cn.pattern_Mekong_loss_processed
    dt = 'Byte'
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt), tile_id_list)

    # This is necessary for changing NoData values to 0s (so they are recognized as 0s)
    pool.map(Mekong_loss.recode_tiles, tile_id_list)

    # Only uploads tiles that actually have Mekong loss in them
    upload_dir = cn.Mekong_loss_processed_dir
    pattern = cn.pattern_Mekong_loss_processed
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)
def mp_plantation_preparation(gadm_index_shp, planted_index_shp):

    os.chdir(cn.docker_base_dir)

    # ## Not actually using this but leaving it here in case I want to add this functionality eventually. This
    # # was to allow users to run plantations for a select (contiguous) area rather than for the whole planet.
    # # List of bounding box coordinates
    # bound_list = args.bounding_box
    # # Checks if bounding box coordinates are in multiples of 10 (10 degree tiles). If they're not, the script stops.
    # for bound in bound_list:
    #     if bound%10:
    #         uu.exception_log(bound, 'not a multiple of 10. Please make bounding box coordinates are multiples of 10.')

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (gadm_index_path not in cn.gadm_plant_1x1_index_dir or planted_index_path not in cn.gadm_plant_1x1_index_dir):
        uu.exception_log('Invalid inputs. Please provide None or s3 shapefile locations for both arguments.')

    # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles)
    total_tile_list = uu.tile_list_s3(cn.pixel_area_dir)
    uu.print_log("Number of possible 10x10 tiles to evaluate:", len(total_tile_list))

    # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman.
    # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968.
    # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest).
    # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands.
    planted_lat_tile_list = [tile for tile in total_tile_list if '90N' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80N' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '50S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '60S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '70S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80S' not in tile]
    # planted_lat_tile_list = ['10N_080W']

    uu.print_log(planted_lat_tile_list)
    uu.print_log("Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len(planted_lat_tile_list))


    # If a planted forest extent 1x1 tile index shapefile isn't supplied
    if 'None' in args.planted_tile_index:

        ### Entry point 1:
        # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created.
        # This runs the process from the very beginning and will take a few days.
        if 'None' in args.gadm_tile_index:

            uu.print_log("No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch...")

            # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas
            uu.s3_file_download(cn.gadm_path, cn.docker_base_dir)
            cmd = ['unzip', cn.gadm_zip]
            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Creates a new GADM shapefile with just the countries that have planted forests in them.
            # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries.
            # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list
            # in constants_and_names.py must be updated, too.
            uu.print_log("Creating shapefile of countries with planted forests...")
            os.system('''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp'''.format(str(cn.plantation_countries)[1:-1], cn.gadm_iso))

            # Creates 1x1 degree tiles of countries that have planted forests in them.
            # I think this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small.
            # This takes several days to run because it iterates through at least 250 10x10 tiles.
            # For multiprocessor use.
            processes = 50
            uu.print_log('Rasterize GADM 1x1 max processors=', processes)
            pool = Pool(processes)
            pool.map(plantation_preparation.rasterize_gadm_1x1, planted_lat_tile_list)
            pool.close()
            pool.join()

            # # Creates 1x1 degree tiles of countries that have planted forests in them.
            # # For single processor use.
            # for tile in planted_lat_tile_list:
            #
            #     plantation_preparation.rasterize_gadm_1x1(tile)

            # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests
            os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(cn.pattern_gadm_1x1_index, uu.date_time_today))
            cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive']

            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)


            # # Saves the 1x1 country extent tiles to s3
            # # Only use if the entire process can't run in one go on the spot machine
            # cmd = ['aws', 's3', 'cp', cn.docker_base_dir, 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive']

            # # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            # process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            # with process.stdout:
            #     uu.log_subprocess_output(process.stdout)


            # Delete the aux.xml files
            os.system('''rm GADM*.tif.*''')

            # List of all 1x1 degree countey extent tiles created
            gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_")
            uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1)
            uu.print_log(len(gadm_list_1x1))

        ### Entry point 2:
        # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied,
        # a list of the 1x1 tiles is created from the shapefile.
        # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied
        # in the shapefile.
        elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index:

            uu.print_log("Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles...")

            uu.print_log('{}/'.format(gadm_index_path))

            # Copies the shapefile of 1x1 tiles of extent of countries with planted forests
            cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)]

            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Gets the attribute table of the country extent 1x1 tile shapefile
            gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0]

            # Converts the attribute table to a dataframe
            dbf = Dbf5(gadm)
            df = dbf.to_dataframe()

            # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
            gadm_list_1x1 = df['location'].tolist()
            gadm_list_1x1 = [str(y) for y in gadm_list_1x1]
            uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1)
            uu.print_log("There are", len(gadm_list_1x1), "1x1 country extent tiles to iterate through.")

        # In case some other arguments are provided
        else:
            uu.exception_log('Invalid GADM tile index shapefile provided. Please provide a valid shapefile.')

        # Creates 1x1 degree tiles of plantation growth wherever there are plantations.
        # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks
        # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that
        # 1x1 country extent tile.
        # 55 processors seems to use about 350 GB of memory, which seems fine. But there was some error about "PQconnectdb failed-- sorry, too many clients already".
        # So, moved the number of processors down to 48.
        # For multiprocessor use
        processes = 48
        uu.print_log('Create 1x1 plantation from 1x1 gadm max processors=', processes)
        pool = Pool(processes)
        pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm, gadm_list_1x1)
        pool.close()
        pool.join()

        # # Creates 1x1 degree tiles of plantation growth wherever there are plantations
        # # For single processor use
        # for tile in gadm_list_1x1:
        #
        #     plantation_preparation.create_1x1_plantation(tile)

        # Creates a shapefile in which each feature is the extent of a plantation extent tile.
        # This index shapefile can be used the next time this process is run if starting with Entry Point 3.
        os.system('''gdaltindex {0}_{1}.shp plant_gain_*.tif'''.format(cn.pattern_plant_1x1_index, uu.date_time_today))
        cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive']

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    ### Entry point 3
    # If a shapefile of the extents of 1x1 planted forest tiles is provided.
    # This is the part that actually creates the sequestration rate and forest type tiles.
    
    if cn.pattern_plant_1x1_index in args.planted_tile_index:

        uu.print_log("Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth rate and forest type tiles...")

        # Copies the shapefile of 1x1 tiles of extent of planted forests
        cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include',
               '{}*'.format(planted_index_shp), '--recursive']

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)


        # Gets the attribute table of the planted forest extent 1x1 tile shapefile
        gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0]

        # Converts the attribute table to a dataframe
        dbf = Dbf5(gadm)
        df = dbf.to_dataframe()

        # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
        planted_list_1x1 = df['location'].tolist()
        planted_list_1x1 = [str(y) for y in planted_list_1x1]
        uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1)
        uu.print_log("There are", len(planted_list_1x1), "1x1 planted forest extent tiles to iterate through.")

        # Creates 1x1 degree tiles of plantation growth and type wherever there are plantations.
        # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run
        # of this script), it does not need to check whether there are planted forests in this tile. It goes directly
        # to intersecting the planted forest table with the 1x1 tile.

        # For single processor use
        #for tile in planted_list_1x1:
        #    plantation_preparation.create_1x1_plantation_growth_from_1x1_planted(tile)

        # For multiprocessor use
        # processes=40 uses about 360 GB of memory. Works on r4.16xlarge with space to spare
      	# processes=52 uses about 465 GB of memory (quite stably), so this is basically the max.
        num_of_processes = 52
        pool = Pool(num_of_processes)
        pool.map(plantation_preparation.create_1x1_plantation_growth_from_1x1_planted, planted_list_1x1)
        pool.close()
        pool.join()

        # This works with 50 processors on an r4.16xlarge marchine. Uses about 430 GB out of 480 GB.
        num_of_processes = 52
        pool = Pool(num_of_processes)
        processes = 50
        uu.print_log('Create 1x1 plantation type max processors=', processes)
        pool = Pool(processes)
        pool.map(plantation_preparation.create_1x1_plantation_type_from_1x1_planted, planted_list_1x1)
        pool.close()
        pool.join()

        # This rasterizes the plantation removal factor standard deviations 
	      # processes=50 peaks at about 450 GB
        num_of_processes = 50
    	  pool = Pool(num_of_processes)
	      pool.map(plantation_preparation.create_1x1_plantation_stdev_from_1x1_planted, planted_list_1x1)
	      pool.close()
	      pool.join()
def create_climate_zone_tiles(tile_id):
    # Start time
    start = datetime.datetime.now()

    uu.print_log("Getting extent of", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # Makes a 10x10 degree chunk of the global climate zone raster conform to Hansen tile properties.
    # Rather than the usual 40000x1 windows, this creates 1024x1024 windows for filling in missing values (see below).
    # The output of gdalwarp ("climate_zone_intermediate") is not used anywhere else.
    uu.print_log("Warping climate zone tile", tile_id)
    cmd = [
        'gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr',
        str(cn.Hansen_res),
        str(cn.Hansen_res), '-tap', '-te',
        str(xmin),
        str(ymin),
        str(xmax),
        str(ymax), '-dstnodata', '0', '-ot', 'Byte', '-overwrite', '-co',
        'TILED=YES', '-co', 'BLOCKXSIZE=1024', '-co', 'BLOCKYSIZE=1024',
        cn.climate_zone_raw, '{0}_{1}.tif'.format(tile_id,
                                                  "climate_zone_intermediate")
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Fills in empty pixels in the climate zone raster with whatever value is most common (mode) in its 1024x1024 pixel window.
    # That is, any 1024x1024 processing window that has >=1 climate zone pixel in it will have its empty pixels filled in
    # with whatever value is most common in that window.
    # This extends the climate zone raster out into coastal areas and better covers coasts/islands, meaning that more
    # loss pixels will have climate zone pixels available to them during emissions processing.
    # Everything from here down is used to assign pixels without climate zone to a climate zone in the 1024x1024 windows.
    uu.print_log("Re-tiling climate zone for tile", tile_id)

    # Opens climate zone tile
    climate_zone_src = rasterio.open("{0}_{1}.tif".format(
        tile_id, "climate_zone_intermediate"))

    # Grabs metadata about the tif, like its location/projection/cellsize
    kwargs = climate_zone_src.meta

    # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory
    windows = climate_zone_src.block_windows(1)

    # Updates kwargs for the output dataset.
    kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0)

    # Output file name
    climate_zone_processed = '{0}_{1}.tif'.format(tile_id,
                                                  cn.pattern_climate_zone)

    # The output file: climate zone with empty pixels filled in
    dst_climate_zone = rasterio.open(climate_zone_processed, 'w', **kwargs)

    # Iterates across the windows (1024 x 1024 pixel boxes) of the input tile.
    for idx, window in windows:

        # Creates window for input raster
        climate_zone_window = climate_zone_src.read(1, window=window)

        # Turns the 2D array into a 1D array that is n x n long.
        # This makes to easier to remove 0s and find the mode of the remaining climate zone codes
        climate_zone_flat = climate_zone_window.flatten()

        # Removes all zeros from the array, leaving just pixels with climate zone codes
        non_zeros = np.delete(climate_zone_flat,
                              np.where(climate_zone_flat == 0))

        # If there were only pixels without climate zone codes in the array, the mode is assigned 0
        if non_zeros.size < 1:

            mode = 0

        # If there were pixels with climate zone codes, the mode is the most common code among those in the window
        else:

            mode = stats.mode(non_zeros)[0]

        # Assigns all pixels without a climate zone code in that window to that most common code
        climate_zone_window[climate_zone_window == 0] = mode

        # Writes the output window to the output.
        # Although the windows for the input tiles are 1024 x 1024 pixels,
        # the windows for these output files are 40000 x 1 pixels, like all the other tiles in this model,
        # so they should work fine with all the other tiles.
        dst_climate_zone.write_band(1, climate_zone_window, window=window)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_climate_zone)
Exemple #19
0
def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # Files to download for this script.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults]
    }

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['50N_130W'] # test tiles

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.US_annual_gain_AGB_natrl_forest_dir,
        cn.US_annual_gain_BGB_natrl_forest_dir
    ]
    output_pattern_list = [
        cn.pattern_US_annual_gain_AGB_natrl_forest,
        cn.pattern_US_annual_gain_BGB_natrl_forest
    ]

    # By definition, this script is for US-specific removals
    sensit_type = 'US_removals'

    # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir)

    # Only creates FIA region tiles if they don't already exist on s3.
    if FIA_regions_tile_count == 16:
        uu.print_log("FIA region tiles already created. Copying to s3 now...")
        uu.s3_flexible_download(cn.FIA_regions_processed_dir,
                                cn.pattern_FIA_regions_processed,
                                cn.docker_base_dir, 'std', 'all')

    else:
        uu.print_log(
            "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw),
            cn.docker_base_dir, 'std')

        cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # Converts the region shapefile to Hansen tiles
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(US_removal_rates.prep_FIA_regions, tile_id_list)

    # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script.
    US_tile_list = uu.tile_list_spot_machine(
        cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed))
    US_tile_id_list = [i[0:8] for i in US_tile_list]
    # US_tile_id_list = ['50N_130W']    # For testing
    uu.print_log(US_tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(US_tile_id_list))) +
        "\n")

    # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US.
    US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir)

    # Only creates FIA forest age category tiles if they don't already exist on s3.
    if US_age_tile_count == 16:
        uu.print_log(
            "Forest age category tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir,
                                cn.pattern_US_forest_age_cat_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.US_forest_age_cat_raw_dir,
                         cn.name_US_forest_age_cat_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest age category raster to Hansen tiles
        source_raster = cn.name_US_forest_age_cat_raw
        out_pattern = cn.pattern_US_forest_age_cat_processed
        dt = 'Int16'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.US_forest_age_cat_processed_dir,
                            cn.pattern_US_forest_age_cat_processed)

    # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_forest_group_tile_count = uu.count_tiles_s3(
        cn.FIA_forest_group_processed_dir)

    # Only creates FIA forest group tiles if they don't already exist on s3.
    if FIA_forest_group_tile_count == 16:
        uu.print_log(
            "FIA forest group tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.FIA_forest_group_processed_dir,
                                cn.pattern_FIA_forest_group_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_forest_group_raw_dir,
                         cn.name_FIA_forest_group_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest group raster to Hansen forest group tiles
        source_raster = cn.name_FIA_forest_group_raw
        out_pattern = cn.pattern_FIA_forest_group_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.FIA_forest_group_processed_dir,
                            cn.pattern_FIA_forest_group_processed)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                US_tile_id_list)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Imports the table with the region-group-age AGB removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_for_model")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type), US_tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in US_tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict,
    #                                           output_pattern_list, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_inputs_for_C_pools(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    sensit_type = 'std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type)

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir,
        cn.precip_processed_dir
    ]
    output_pattern_list = [
        cn.pattern_bor_tem_trop_processed, cn.pattern_elevation,
        cn.pattern_precip
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads two of the raw input files for creating carbon emitted_pools
    input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir]

    for input in input_files:
        uu.s3_file_download('{}'.format(input), cn.docker_base_dir,
                            sensit_type)

    uu.print_log(
        "Unzipping boreal/temperate/tropical file (from FAO ecozones)")
    cmd = [
        'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d',
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    uu.print_log("Copying elevation (srtm) files")
    uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type)

    uu.print_log("Making elevation (srtm) vrt")
    check_call(
        'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True
    )  # I don't know how to convert this to output to the pipe, so just leaving as is

    # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it.
    processes = int(cn.count / 2)
    uu.print_log('Inputs for C emitted_pools max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(create_inputs_for_C_pools.create_input_files, tile_id_list)

    # # For single processor use
    # for tile_id in tile_id_list:
    #
    #     create_inputs_for_C_pools.create_input_files(tile_id)

    uu.print_log("Uploading output files")
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def main():

    no_upload = False

    sensit_type = "legal_Amazon_loss"

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    Brazil_stages = ['all', 'create_forest_extent', 'create_loss']

    # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run
    parser = argparse.ArgumentParser(
        description=
        'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES'
    )
    parser.add_argument(
        '--stages',
        '-s',
        required=True,
        help=
        'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}'
        .format(Brazil_stages))
    parser.add_argument(
        '--run_through',
        '-r',
        required=True,
        help=
        'Options: true or false. true: run named stage and following stages. false: run only named stage.'
    )
    args = parser.parse_args()
    stage_input = args.stages
    run_through = args.run_through

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (stage_input not in Brazil_stages):
        uu.exception_log(
            no_upload, 'Invalid stage selection. Please provide a stage from',
            Brazil_stages)
    else:
        pass
    if (run_through not in ['true', 'false']):
        uu.exception_log(
            no_upload,
            'Invalid run through option. Please enter true or false.')
    else:
        pass

    actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through,
                                       sensit_type)
    uu.print_log(actual_stages)

    # By definition, this script is for US-specific removals
    sensit_type = 'legal_Amazon_loss'

    # List of output directories and output file name patterns
    master_output_dir_list = [
        cn.Brazil_forest_extent_2000_processed_dir,
        cn.Brazil_annual_loss_processed_dir
    ]

    master_output_pattern_list = [
        cn.pattern_Brazil_forest_extent_2000_processed,
        cn.pattern_Brazil_annual_loss_processed
    ]

    # Creates forest extent 2000 raster from multiple PRODES forest extent rasters
    ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works.
    if 'create_forest_extent' in actual_stages:

        uu.print_log('Creating forest extent tiles')

        # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
        tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
        # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
        # tile_id_list = ['50N_130W'] # test tiles
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir,
                              cn.docker_base_dir, sensit_type)
        raw_forest_extent_inputs = glob.glob(
            '*_AMZ_warped_*tif')  # The list of tiles to merge

        # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]
        uu.print_log(pixelSizeX)
        uu.print_log(pixelSizeY)

        # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out
        # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases.
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged),
            '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte',
            '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            raw_forest_extent_inputs[0], raw_forest_extent_inputs[1],
            raw_forest_extent_inputs[2], raw_forest_extent_inputs[3],
            raw_forest_extent_inputs[4], raw_forest_extent_inputs[5]
        ]
        uu.log_subprocess_output_full(cmd)

        # Uploads the merged forest extent raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir,
                            cn.pattern_Brazil_forest_extent_2000_merged)

        # Creates legal Amazon extent 2000 tiles
        source_raster = '{}.tif'.format(
            cn.pattern_Brazil_forest_extent_2000_merged)
        out_pattern = cn.pattern_Brazil_forest_extent_2000_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        upload_dir = master_output_dir_list[0]
        pattern = master_output_pattern_list[0]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates annual loss raster for 2001-2019 from multiples PRODES rasters
    if 'create_loss' in actual_stages:

        uu.print_log('Creating annual PRODES loss tiles')

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input rasters and lists them
        cmd = [
            'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.',
            '--recursive'
        ]
        uu.log_subprocess_output_full(cmd)

        uu.print_log(
            "Input loss rasters downloaded. Getting resolution of recent raster..."
        )

        # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that.
        raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif')
        prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0])
        transform_2019 = prodes_2019.GetGeoTransform()
        pixelSizeX = transform_2019[1]
        pixelSizeY = -transform_2019[5]

        uu.print_log("  Recent raster resolution: {0} by {1}".format(
            pixelSizeX, pixelSizeY))

        # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out
        # at about 180 GB, then go back to 0.
        # This took about 8 minutes.
        uu.print_log(
            "Merging input loss rasters into a composite for all years...")
        cmd = [
            'gdal_merge.py', '-o',
            '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co',
            'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps',
            '{}'.format(pixelSizeX), '{}'.format(pixelSizeY),
            'Prodes2019_annual_loss_2008_2019.tif',
            'Prodes2014_annual_loss_2001_2007.tif'
        ]
        uu.log_subprocess_output_full(cmd)
        uu.print_log("  Loss rasters combined into composite")

        # Uploads the merged loss raster to s3 for future reference
        uu.upload_final_set(cn.Brazil_annual_loss_merged_dir,
                            cn.pattern_Brazil_annual_loss_merged)

        # Creates annual loss 2001-2015 tiles
        uu.print_log("Warping composite PRODES loss to Hansen tiles...")
        source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged)
        out_pattern = cn.pattern_Brazil_annual_loss_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt,
                    no_upload=no_upload), tile_id_list)
        uu.print_log("  PRODES composite loss raster warped to Hansen tiles")

        # Checks if each tile has data in it. Only tiles with data are uploaded.
        # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough.
        upload_dir = master_output_dir_list[1]
        pattern = master_output_pattern_list[1]
        pool = multiprocessing.Pool(cn.count - 5)
        pool.map(
            partial(uu.check_and_upload,
                    upload_dir=upload_dir,
                    pattern=pattern), tile_id_list)

    # Creates forest age category tiles
    if 'forest_age_category' in actual_stages:

        uu.print_log('Creating forest age category tiles')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[2]

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # With processes=30, peak usage was about 350 GB using WHRC AGB.
        # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that.
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_forest_age_category,
                    sensit_type=sensit_type,
                    output_pattern=output_pattern), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #
        #     legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern)

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

    # Creates tiles of the number of years of removals
    if 'gain_year_count' in actual_stages:

        uu.print_log('Creating gain year count tiles for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.Brazil_annual_loss_processed_dir:
            [cn.pattern_Brazil_annual_loss_processed],
            cn.gain_dir: [cn.pattern_gain],
            cn.WHRC_biomass_2000_non_mang_non_planted_dir:
            [cn.pattern_WHRC_biomass_2000_non_mang_non_planted],
            cn.planted_forest_type_unmasked_dir:
            [cn.pattern_planted_forest_type_unmasked],
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.Brazil_forest_extent_2000_processed_dir:
            [cn.pattern_Brazil_forest_extent_2000_processed]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list)
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list)

        output_pattern = stage_output_pattern_list[3]

        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(
                legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change,
                sensit_type=sensit_type), tile_id_list)

        pool.map(
            partial(legal_AMZ_loss.
                    legal_Amazon_create_gain_year_count_loss_and_gain_standard,
                    sensit_type=sensit_type), tile_id_list)

        pool = multiprocessing.Pool(
            int(cn.count / 8)
        )  # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory.
        pool.map(
            partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge,
                    output_pattern=output_pattern), tile_id_list)

        # # For single processor use
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        #     legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type)
        #
        # for tile_id in tile_id_list:
        # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern)

        # Intermediate output tiles for checking outputs
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only")
        uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change")
        uu.upload_final_set(stage_output_dir_list[3],
                            "growth_years_loss_and_gain")

        # Uploads output from this stage
        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

    # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'annual_removals' in actual_stages:

        uu.print_log('Creating annual removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (annual removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[4:6])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[4:6])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Table with IPCC Table 4.9 default gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel(
            "{}".format(cn.gain_spreadsheet),
            sheet_name="natrl fores gain, for std model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        # Converts gain table from wide to long, so each continent-ecozone-age category has its own row
        gain_table_cont_eco_age = pd.melt(gain_table_simplified,
                                          id_vars=['gainEcoCon'],
                                          value_vars=[
                                              'growth_primary',
                                              'growth_secondary_greater_20',
                                              'growth_secondary_less_20'
                                          ])
        gain_table_cont_eco_age = gain_table_cont_eco_age.dropna()

        # Creates a table that has just the continent-ecozone combinations for adding to the dictionary.
        # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel.
        # Assigns removal rate of 0 when there's no age category.
        gain_table_con_eco_only = gain_table_cont_eco_age
        gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates(
            subset='gainEcoCon', keep='first')
        gain_table_con_eco_only['value'] = 0
        gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[
            'gainEcoCon']

        # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value
        age_dict = {
            'growth_primary': 10000,
            'growth_secondary_greater_20': 20000,
            'growth_secondary_less_20': 30000
        }

        # Creates a unique value for each continent-ecozone-age category
        gain_table_cont_eco_age = gain_table_cont_eco_age.replace(
            {"variable": age_dict})
        gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[
            'gainEcoCon'] + gain_table_cont_eco_age['variable']

        # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes
        gain_table_all_combos = pd.concat(
            [gain_table_con_eco_only, gain_table_cont_eco_age])

        # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary
        gain_table_dict = pd.Series(
            gain_table_all_combos.value.values,
            index=gain_table_all_combos.cont_eco_age).to_dict()

        # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent)
        gain_table_dict[0] = 0

        # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone
        for key, value in age_dict.items():
            gain_table_dict[value] = 0

        # Converts all the keys (continent-ecozone-age codes) to float type
        gain_table_dict = {
            float(key): value
            for key, value in gain_table_dict.items()
        }

        uu.print_log(gain_table_dict)

        # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
        # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
        # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(annual_gain_rate_natrl_forest.annual_gain_rate,
                    sensit_type=sensit_type,
                    gain_table_dict=gain_table_dict,
                    output_pattern_list=output_pattern_list), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_id_list:
        #
        #     annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model
    # removal function
    if 'cumulative_removals' in actual_stages:

        uu.print_log('Creating cumulative removals for natural forest')

        # Files to download for this script.
        download_dict = {
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.gain_year_count_natrl_forest_dir:
            [cn.pattern_gain_year_count_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[6:8])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[6:8])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # Calculates cumulative aboveground carbon gain in non-mangrove planted forests
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)

        # Calculates cumulative belowground carbon gain in non-mangrove planted forests
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type)
        #
        # for tile_id in tile_id_list:
        #     cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type)

        # Uploads outputs from this stage
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground)
    if 'removals_merged' in actual_stages:

        uu.print_log(
            'Creating annual and cumulative removals for all forest types combined (above + belowground)'
        )

        # Files to download for this script
        download_dict = {
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults],
            cn.annual_gain_BGB_mangrove_dir:
            [cn.pattern_annual_gain_BGB_mangrove],
            cn.annual_gain_BGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove],
            cn.annual_gain_BGB_natrl_forest_dir:
            [cn.pattern_annual_gain_BGB_natrl_forest],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.cumul_gain_BGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_mangrove],
            cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_BGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_BGCO2_natrl_forest]
        }

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        # If the model run isn't the standard one, the output directory and file names are changed.
        # This adapts just the relevant items in the output directory and pattern lists (cumulative removals).
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(sensit_type,
                                                  master_output_dir_list[8:10])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[8:10])

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # For multiprocessing
        output_pattern_list = stage_output_pattern_list
        pool = multiprocessing.Pool(int(cn.count / 3))
        pool.map(
            partial(merge_cumulative_annual_gain_all_forest_types.gain_merge,
                    output_pattern_list=output_pattern_list,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type)

        # Uploads output tiles to s3
        for i in range(0, len(stage_output_dir_list)):
            uu.upload_final_set(stage_output_dir_list[i],
                                stage_output_pattern_list[i])

    # Creates carbon emitted_pools in loss year
    if 'carbon_pools' in actual_stages:

        uu.print_log('Creating emissions year carbon emitted_pools')

        # Specifies that carbon emitted_pools are created for loss year rather than in 2000
        extent = 'loss'

        # Files to download for this script
        download_dict = {
            cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
            cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
            cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
            cn.precip_processed_dir: [cn.pattern_precip],
            cn.elevation_processed_dir: [cn.pattern_elevation],
            cn.soil_C_full_extent_2000_dir:
            [cn.pattern_soil_C_full_extent_2000],
            cn.gain_dir: [cn.pattern_gain],
            cn.cumul_gain_AGCO2_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_mangrove],
            cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir:
            [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove],
            cn.cumul_gain_AGCO2_natrl_forest_dir:
            [cn.pattern_cumul_gain_AGCO2_natrl_forest],
            cn.annual_gain_AGB_mangrove_dir:
            [cn.pattern_annual_gain_AGB_mangrove],
            cn.annual_gain_AGB_planted_forest_non_mangrove_dir:
            [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove],
            cn.annual_gain_AGB_IPCC_defaults_dir:
            [cn.pattern_annual_gain_AGB_IPCC_defaults]
        }

        # Adds the correct AGB tiles to the download dictionary depending on the model run
        if sensit_type == 'biomass_swap':
            download_dict[cn.JPL_processed_dir] = [
                cn.pattern_JPL_unmasked_processed
            ]
        else:
            download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [
                cn.pattern_WHRC_biomass_2000_unmasked
            ]

        # Adds the correct loss tile to the download dictionary depending on the model run
        if sensit_type == 'legal_Amazon_loss':
            download_dict[cn.Brazil_annual_loss_processed_dir] = [
                cn.pattern_Brazil_annual_loss_processed
            ]
        else:
            download_dict[cn.loss_dir] = ['']

        tile_id_list = uu.tile_list_s3(
            cn.Brazil_forest_extent_2000_processed_dir)
        # tile_id_list = ['00N_050W']
        uu.print_log(tile_id_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_id_list))) +
            "\n")

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

        # If the model run isn't the standard one, the output directory and file names are changed
        if sensit_type != 'std':
            uu.print_log(
                "Changing output directory and file name pattern based on sensitivity analysis"
            )
            stage_output_dir_list = uu.alter_dirs(
                sensit_type, master_output_dir_list[10:16])
            stage_output_pattern_list = uu.alter_patterns(
                sensit_type, master_output_pattern_list[10:16])

        # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
        cmd = [
            'aws', 's3', 'cp',
            os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
            cn.docker_base_dir
        ]

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        pd.options.mode.chained_assignment = None

        # Imports the table with the ecozone-continent codes and the carbon gain rates
        gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                   sheet_name="mangrove gain, for model")

        # Removes rows with duplicate codes (N. and S. America for the same ecozone)
        gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                           keep='first')

        mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.below_to_above_trop_dry_mang,
            cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang)

        mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.deadwood_to_above_trop_dry_mang,
            cn.deadwood_to_above_trop_wet_mang,
            cn.deadwood_to_above_subtrop_mang)

        mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict(
            gain_table_simplified, cn.litter_to_above_trop_dry_mang,
            cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang)

        if extent == 'loss':

            uu.print_log(
                "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)"
            )
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 410-415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(int(cn.count / 4))
            pool.map(
                partial(create_carbon_pools.create_emitted_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        elif extent == '2000':

            uu.print_log("Creating tiles of aboveground carbon in 2000")
            # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460
            # 14 processors maxes out at 415 GB
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[0]
            pool = multiprocessing.Pool(processes=14)
            pool.map(
                partial(create_carbon_pools.create_2000_AGC,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type)

            uu.upload_final_set(stage_output_dir_list[0],
                                stage_output_pattern_list[0])

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of belowground carbon")
        # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[1]
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(create_carbon_pools.create_BGC,
                    mang_BGB_AGB_ratio=mang_BGB_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type)

        uu.upload_final_set(stage_output_dir_list[1],
                            stage_output_pattern_list[1])

        uu.print_log("Creating tiles of deadwood carbon")
        # processes=16 maxes out at about 430 GB
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[2]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_deadwood,
                    mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type)

        uu.upload_final_set(stage_output_dir_list[2],
                            stage_output_pattern_list[2])

        uu.print_log("Creating tiles of litter carbon")
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[3]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_litter,
                    mang_litter_AGB_ratio=mang_litter_AGB_ratio,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type)

        uu.upload_final_set(stage_output_dir_list[3],
                            stage_output_pattern_list[3])

        if extent == 'loss':

            uu.print_log("Creating tiles of soil carbon")
            # Creates a single filename pattern to pass to the multiprocessor call
            pattern = stage_output_pattern_list[4]
            pool = multiprocessing.Pool(int(cn.count / 3))
            pool.map(
                partial(create_carbon_pools.create_soil,
                        pattern=pattern,
                        sensit_type=sensit_type), tile_id_list)
            pool.close()
            pool.join()

            # # For single processor use
            # for tile_id in tile_id_list:
            #     create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type)

            uu.upload_final_set(stage_output_dir_list[4],
                                stage_output_pattern_list[4])

        elif extent == '2000':
            uu.print_log("Skipping soil for 2000 carbon pool calculation")

        else:
            uu.exception_log(no_upload, "Extent argument not valid")

        uu.print_log("Creating tiles of total carbon")
        # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory
        # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check.
        # Creates a single filename pattern to pass to the multiprocessor call
        pattern = stage_output_pattern_list[5]
        pool = multiprocessing.Pool(int(cn.count / 4))
        pool.map(
            partial(create_carbon_pools.create_total_C,
                    extent=extent,
                    pattern=pattern,
                    sensit_type=sensit_type), tile_id_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile_id in tile_id_list:
        #     create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type)

        uu.upload_final_set(stage_output_dir_list[5],
                            stage_output_pattern_list[5])