def rasterize_pre_2000_plantations(tile_id):

    # Start time
    start = datetime.datetime.now()

    uu.print_log("Getting extent of", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000)

    cmd = [
        'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr',
        '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot',
        'Byte', '-a_nodata', '0', '-te',
        str(xmin),
        str(ymin),
        str(xmax),
        str(ymax), '{}.shp'.format(cn.pattern_plant_pre_2000_raw), out_tile
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_plant_pre_2000)
def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern):

    uu.print_log(
        "Merging loss, gain, no change, and loss/gain pixels into single raster for {}"
        .format(tile_id))

    # start time
    start = datetime.datetime.now()

    # The four rasters from above that are to be merged
    loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id)
    no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
    loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(
        tile_id)

    # All four components are merged together to the final output raster
    age_outfile = '{}_{}.tif'.format(tile_id, output_pattern)
    cmd = [
        'gdal_merge.py', '-o', age_outfile, loss_outfilename,
        no_change_outfilename, loss_and_gain_outfilename, '-co',
        'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_pattern)
Esempio n. 3
0
def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type):

    # start time
    start = datetime.datetime.now()
    date = datetime.datetime.now()
    date_formatted = date.strftime("%Y_%m_%d")

    uu.print_log(sensit_aggreg_flux)
    uu.print_log(std_aggreg_flux)

    # This produces errors about dividing by 0. As far as I can tell, those are fine. It's just trying to divide NoData
    # pixels by NoData pixels, and it doesn't affect the output.
    # For model v1.2.0, this kept producing incorrect values for the biomass_swap analysis. I don't know why. I ended
    # up just using raster calculator in ArcMap to create the percent diff raster for biomass_swap. It worked
    # fine for all the other analyses, though (including legal_Amazon_loss).
    # Maybe that divide by 0 is throwing off other values now.
    perc_diff_calc = '--calc=(A-B)/absolute(B)*100'
    perc_diff_outfilename = '{0}_{1}_{2}.tif'.format(
        cn.pattern_aggreg_sensit_perc_diff, sensit_type, date_formatted)
    perc_diff_outfilearg = '--outfile={}'.format(perc_diff_outfilename)
    # cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg,
    #        '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--quiet']
    cmd = [
        'gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux,
        perc_diff_calc, perc_diff_outfilearg, '--overwrite', '--co',
        'COMPRESS=LZW', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
def legal_Amazon_create_gain_year_count_loss_and_gain_standard(
        tile_id, sensit_type):

    uu.print_log("Gain year count for loss and gain pixels:", tile_id)

    # start time
    start = datetime.datetime.now()

    # Names of the loss, gain and tree cover density tiles
    loss, gain, extent, biomass = tile_names(tile_id, sensit_type)

    # Pixels with both loss and gain, and in PRODES forest 2000
    loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C==1)*((A-1)+({}+1-A)/2))'.format(
        cn.loss_years)
    loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(
        tile_id)
    loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename)
    cmd = [
        'gdal_calc.py', '-A', loss, '-B', gain, '-C', extent,
        loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0',
        '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type):

    uu.print_log("Gain year count for non-loss pixels:", tile_id)

    # start time
    start = datetime.datetime.now()

    # Names of the loss, gain and tree cover density tiles
    loss, gain, extent, biomass = tile_names(tile_id, sensit_type)

    # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it
    # to a vrt so that the 0 pixels are recognized.
    loss_vrt = '{}_loss.vrt'.format(tile_id)
    os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss))

    # Pixels with loss but in areas with PRODES forest 2000 and biomass >0 (same as standard model)
    no_change_calc = '--calc=(A==0)*(B==1)*(C>0)*{}'.format(cn.loss_years)
    no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
    no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
    cmd = [
        'gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass,
        no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite',
        '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change')
def create_mangrove_soil_C(tile_id, no_upload):

    # Start time
    start = datetime.datetime.now()

    # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile.
    if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)):

        uu.print_log("Mangrove aboveground biomass tile found for", tile_id)

        uu.print_log("Getting extent of", tile_id)
        xmin, ymin, xmax, ymax = uu.coords(tile_id)

        uu.print_log("Clipping mangrove soil C from mangrove soil vrt for", tile_id)
        uu.warp_to_Hansen('mangrove_soil_C.vrt', '{0}_mangrove_full_extent.tif'.format(tile_id), xmin, ymin, xmax, ymax, 'Int16')

        mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id)
        mangrove_biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)
        outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id)
        out = '--outfile={}'.format(outname)
        calc = '--calc=A*(B>0)'
        datatype = '--type={}'.format('Int16')

        uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id)
        cmd = ['gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass,
               calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', datatype, '--quiet']
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("No mangrove aboveground biomass tile for", tile_id)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove', no_upload)
Esempio n. 7
0
def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type,
                                                       no_upload):

    uu.print_log(
        "Gain year count for pixels without loss for legal_Amazon_loss:",
        tile_id)

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    # start time
    start = datetime.datetime.now()

    # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it
    # to a vrt so that the 0 pixels are recognized.
    # This was the case with PRODES loss in model v.1.1.2.
    loss_vrt = '{}_loss.vrt'.format(tile_id)
    os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss))

    no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years)
    no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
    no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
    cmd = [
        'gdal_calc.py', '-A', loss_vrt, '-B', model_extent, no_change_calc,
        no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co',
        'COMPRESS=LZW', '--type', 'Byte', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    os.remove(loss_vrt)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload)
Esempio n. 8
0
def main ():

    no_upload = False

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # The list of tiles to iterate through
    tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['00N_110E'] # test tile
    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # By definition, this script is for the biomass swap analysis (replacing WHRC AGB with Saatchi/JPL AGB)
    sensit_type = 'biomass_swap'

    # Downloads a pan-tropical raster that has the erroneous integer values in the oceans removed
    uu.s3_file_download(cn.JPL_raw_dir, cn.JPL_raw_name, sensit_type)

    # Converts the Saatchi AGB vrt to Hansen tiles
    source_raster = cn.JPL_raw_name
    out_pattern = cn.pattern_JPL_unmasked_processed
    dt = 'Float32'
    pool = multiprocessing.Pool(cn.count-5)  # count-5 peaks at 320GB of memory
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list)

    # Checks if each tile has data in it. Only tiles with data are uploaded.
    upload_dir = cn.JPL_processed_dir
    pattern = cn.pattern_JPL_unmasked_processed
    pool = multiprocessing.Pool(cn.count - 5)  # count-5 peaks at 410GB of memory
    pool.map(partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def create_1x1_plantation_type_from_1x1_planted(tile_1x1):

    # Gets the bounding coordinates for the 1x1 degree tile
    coords = tile_1x1.split("_")
    xmin_1x1 = str(coords[3])[:-4]
    xmax_1x1 = int(xmin_1x1) + 1
    ymax_1x1 = int(coords[2])
    ymin_1x1 = ymax_1x1 - 1

    uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:",
                 xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1)

    uu.print_log("There are plantations in {}. Converting to raster...".format(
        tile_1x1))

    # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector
    cmd = [
        'gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res),
        '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu',
        '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1,
                                                           xmin_1x1), '-te',
        str(xmin_1x1),
        str(ymin_1x1),
        str(xmax_1x1),
        str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0', '-ot', 'Byte'
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
Esempio n. 10
0
def create_1x1_plantation_from_1x1_gadm(tile_1x1):

    # Gets the bounding coordinates for the 1x1 degree tile
    coords = tile_1x1.split("_")
    uu.print_log(coords)
    xmin_1x1 = str(coords[2])[:-4]
    xmax_1x1 = int(xmin_1x1) + 1
    ymax_1x1 = int(coords[1])
    ymin_1x1 = ymax_1x1 - 1

    uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1)

    # Connects Python to PostGIS using psycopg2. The credentials work on spot machines as they are currently configured
    # and are based on this: https://github.com/wri/gfw-annual-loss-processing/blob/master/1b_Summary-AOIs-to-TSV/utilities/postgis_util.py
    creds = {'host': 'localhost', 'user': '******', 'dbname': 'ubuntu'}
    conn = psycopg2.connect(**creds)
    cursor = conn.cursor()

    # Intersects the plantations PostGIS table with the 1x1 tile, then saves any growth rates in that tile as a 1x1 tile
    # https://gis.stackexchange.com/questions/30267/how-to-create-a-valid-global-polygon-grid-in-postgis
    # https://stackoverflow.com/questions/48978616/best-way-to-run-st-intersects-on-features-inside-one-table
    # https://postgis.net/docs/ST_Intersects.html
    uu.print_log("Checking if {} has plantations in it".format(tile_1x1))

    # Does the intersect of the PostGIS table and the 1x1 GADM tile
    cursor.execute("SELECT growth FROM all_plant WHERE ST_Intersects(all_plant.wkb_geometry, ST_GeogFromText('POLYGON(({0} {1},{2} {1},{2} {3},{0} {3},{0} {1}))'))".format(
            xmin_1x1, ymax_1x1, xmax_1x1, ymin_1x1))

    # A Python list of the output of the intersection, which in this case is a list of features that were successfully intersected.
    # This is what I use to determine if any PostGIS features were intersected.
    features = cursor.fetchall()
    cursor.close()

    # If any features in the PostGIS table were intersected with the 1x1 GADM tile, then the features in this 1x1 tile
    # are converted to a planted forest gain rate tile and a plantation type tile
    if len(features) > 0:

        uu.print_log("There are plantations in {}. Converting to gain rate and plantation type rasters...".format(tile_1x1))

        # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector
        # For plantation gain rate
        cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0']
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector
        # For plantation type
        cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0']
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    # If no features in the PostGIS table were intersected with the 1x1 GADM tile, nothing happens.
    else:
        uu.print_log("There are no plantations in {}. Not converting to raster.".format(tile_1x1))
Esempio n. 11
0
def reset_nodata(tile_id):

    uu.print_log("Changing 0 from NoData to actual value for tile", tile_id)

    tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed)

    cmd = ['gdal_edit.py', '-unsetnodata', tile]

    uu.print_log("Tile processed")
Esempio n. 12
0
def loss_in_raster(tile_id, raster_type, output_name, lat, mask):

    uu.print_log("Calculating loss area for tile id {0}...".format(tile_id))

    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # start time
    start = datetime.datetime.now()

    # Name of the loss time
    loss_tile = '{0}.tif'.format(tile_id)

    # The raster that loss is being analyzed inside
    raster_of_interest = '{0}_{1}.tif'.format(tile_id, raster_type)

    # Output file name
    outname = '{0}_{1}.tif'.format(tile_id, output_name)

    # Only processes the tile if it is inside the latitude band (north of the specified latitude)
    if ymax > lat and os.path.exists(raster_of_interest):

        uu.print_log("{} inside latitude band and peat tile exists. Processing tile.".format(tile_id))

        # If the user has asked to create just a mask of loss as opposed to the actual output values
        if mask == "True":

            calc = '--calc=(A>=1)*(A+1)/(A+1)*B'

        # If the user has asked to output the actual loss values
        if mask == "False":

            # Equation argument for converting emissions from per hectare to per pixel.
            # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare.
            calc = '--calc=A*B'

        # Argument for outputting file
        out = '--outfile={}'.format(outname)

        uu.print_log("Masking loss in {} by raster of interest...".format(tile_id))
        cmd = ['gdal_calc.py', '-A', loss_tile, '-B', raster_of_interest, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW',
               '--overwrite', '--quiet']
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        uu.print_log("{} masked".format(tile_id))

    else:

        uu.print_log("{} outside of latitude band. Skipped tile.".format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_name)
def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload):

    uu.print_log("Calculating gross emissions for", tile_id, "using",
                 sensit_type, "model type...")

    start = datetime.datetime.now()

    # Runs the correct c++ script given the emitted_pools (biomass+soil or soil_only) and model type selected.
    # soil_only, no_shiftin_ag, and convert_to_grassland have special gross emissions C++ scripts.
    # The other sensitivity analyses and the standard model all use the same gross emissions C++ script.
    if (emitted_pools == 'soil_only') & (sensit_type == 'std'):
        cmd = [
            '{0}/calc_gross_emissions_soil_only.exe'.format(
                cn.c_emis_compile_dst), tile_id, sensit_type, folder
        ]

    elif (emitted_pools == 'biomass_soil') & (
            sensit_type in ['convert_to_grassland', 'no_shifting_ag']):
        cmd = [
            '{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst,
                                                      sensit_type), tile_id,
            sensit_type, folder
        ]

    # This C++ script has an extra argument that names the input carbon emitted_pools and output emissions correctly
    elif (emitted_pools == 'biomass_soil') & (
            sensit_type not in ['no_shifting_ag', 'convert_to_grassland']):
        cmd = [
            '{0}/calc_gross_emissions_generic.exe'.format(
                cn.c_emis_compile_dst), tile_id, sensit_type, folder
        ]

    else:
        uu.exception_log(no_upload,
                         'Pool and/or sensitivity analysis option not valid')

    uu.log_subprocess_output_full(cmd)

    # Identifies which pattern to use for counting tile completion
    pattern = cn.pattern_gross_emis_commod_biomass_soil
    if (emitted_pools == 'biomass_soil') & (sensit_type == 'std'):
        pattern = pattern

    elif (emitted_pools == 'biomass_soil') & (sensit_type != 'std'):
        pattern = pattern + "_" + sensit_type

    elif emitted_pools == 'soil_only':
        pattern = pattern.replace('biomass_soil', 'soil_only')

    else:
        uu.exception_log(no_upload, 'Pool option not valid')

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, pattern, no_upload)
def mp_mangrove_processing(tile_id_list, run_date=None, no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.pixel_area_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads zipped raw mangrove files
    uu.s3_file_download(
        os.path.join(cn.mangrove_biomass_raw_dir,
                     cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std')

    # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument)
    # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that.
    cmd = ['unzip', '-o', '-j', cn.mangrove_biomass_raw_file]
    uu.log_subprocess_output_full(cmd)

    # Creates vrt for the Saatchi biomass rasters
    mangrove_vrt = 'mangrove_biomass.vrt'
    os.system('gdalbuildvrt {} *.tif'.format(mangrove_vrt))

    # Converts the mangrove AGB vrt into Hansen tiles
    source_raster = mangrove_vrt
    out_pattern = cn.pattern_mangrove_biomass_2000
    dt = 'float32'
    processes = int(cn.count / 4)
    uu.print_log('Mangrove preprocessing max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt,
                no_upload=no_upload), tile_id_list)

    # # For single processor use, for testing purposes
    # for tile_id in tile_id_list:
    #
    #     mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern, no_upload)

    # Checks if each tile has data in it. Only tiles with data are uploaded.
    upload_dir = cn.mangrove_biomass_2000_dir
    pattern = cn.pattern_mangrove_biomass_2000
    processes = int(cn.count - 5)
    uu.print_log('Mangrove check for data max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)
Esempio n. 15
0
def stack_ba_hv(hv_tile):

    for year in range(2019,
                      2020):  # End year is not included in burn year product

        # Download hdf files from s3 into folders by h and v
        output_dir = utilities.makedir('{0}/{1}/raw/'.format(hv_tile, year))
        utilities.download_df(year, hv_tile, output_dir)

        # convert hdf to array
        hdf_files = glob.glob(output_dir + "*hdf")

        if len(hdf_files) > 0:
            array_list = []
            for hdf in hdf_files:
                uu.print_log("converting hdf to array")
                array = utilities.hdf_to_array(hdf)
                array_list.append(array)

            # stack arrays, get 1 raster for the year and tile
            stacked_year_array = utilities.stack_arrays(array_list)
            max_stacked_year_array = stacked_year_array.max(0)

            # convert stacked month arrays to 1 raster for the year
            template_hdf = hdf_files[0]

            year_folder = utilities.makedir('{0}/{1}/stacked/'.format(
                hv_tile, year))

            stacked_year_raster = utilities.array_to_raster(
                hv_tile, year, max_stacked_year_array, template_hdf,
                year_folder)

            # upload to s3
            cmd = [
                'aws', 's3', 'cp', stacked_year_raster,
                cn.burn_year_stacked_hv_tif_dir
            ]
            uu.log_subprocess_output_full(cmd)

            # remove files
            shutil.rmtree(output_dir)

        else:
            pass
Esempio n. 16
0
def create_mangrove_soil_C(tile_id):

    # Start time
    start = datetime.datetime.now()

    # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile.
    if os.path.exists('{0}_{1}.tif'.format(tile_id,
                                           cn.pattern_mangrove_biomass_2000)):

        uu.print_log("Mangrove aboveground biomass tile found for", tile_id)

        uu.print_log("Getting extent of", tile_id)
        xmin, ymin, xmax, ymax = uu.coords(tile_id)

        uu.print_log("Clipping mangrove soil C from mangrove soil vrt for",
                     tile_id)
        uu.warp_to_Hansen('mangrove_soil_C.vrt',
                          '{0}_mangrove_full_extent.tif'.format(tile_id), xmin,
                          ymin, xmax, ymax, 'Int16')

        mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id)
        mangrove_biomass = '{0}_{1}.tif'.format(
            tile_id, cn.pattern_mangrove_biomass_2000)
        outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id)
        out = '--outfile={}'.format(outname)
        calc = '--calc=A*(B>0)'
        datatype = '--type={}'.format('Int16')

        uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id)
        cmd = [
            'gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass, calc,
            out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite',
            datatype, '--quiet'
        ]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    else:

        uu.print_log("No mangrove aboveground biomass tile for", tile_id)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove')
Esempio n. 17
0
def recode_tiles(annual_loss):

    uu.print_log("Recoding loss tile by year")

    year = int(annual_loss[-8:-4])
    uu.print_log(year)

    if year < 2001 or year > (2000 + cn.loss_years):

        uu.print_log("Skipping {} because outside of model range".format(year))
        return

    else:

        calc = '--calc={}*(A==100)'.format(int((year - 2000)))
        recoded_output = "Mekong_loss_recoded_{}.tif".format(year)
        outfile = '--outfile={}'.format(recoded_output)

        cmd = [
            'gdal_calc.py', '-A', annual_loss, calc, outfile,
            '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--quiet'
        ]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)
Esempio n. 18
0
def output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type):

    uu.print_log("Calculating per pixel values for", tile_id)

    # Start time
    start = datetime.datetime.now()

    # Names of the input biomass and TCD tiles
    input_model_tile = '{0}_{1}.tif'.format(tile_id, input_pattern)
    area_tile = 'hanson_2013_area_{}.tif'.format(tile_id)
    output_model_tile = '{0}_{1}.tif'.format(tile_id, output_pattern)

    uu.print_log("Converting {} from Mg CO2/ha to Mg CO2/pixel...".format(
        input_model_tile))
    # Equation argument for converting emissions from per hectare to per pixel.
    # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare.
    calc = '--calc=A*B/{}'.format(cn.m2_per_ha)
    out = '--outfile={}'.format(output_model_tile)
    cmd = [
        'gdal_calc.py', '-A', input_model_tile, '-B', area_tile, calc, out,
        '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite', '--quiet'
    ]
    uu.log_subprocess_output_full(cmd)

    uu.print_log(
        "  Per pixel values calculated for {}".format(output_model_tile))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_pattern)
def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type):

    uu.print_log("Loss and gain pixel processing using standard function:",
                 tile_id)

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    # start time
    start = datetime.datetime.now()

    if os.path.exists(loss):
        uu.print_log(
            "Loss tile found for {}. Using it in loss and gain pixel gain year count."
            .format(tile_id))
        loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({}+1-A)/2)))'.format(
            cn.loss_years)
        loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(
            tile_id)
        loss_and_gain_outfilearg = '--outfile={}'.format(
            loss_and_gain_outfilename)
        cmd = [
            'gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent,
            loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0',
            '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
        ]
        uu.log_subprocess_output_full(cmd)
    else:
        uu.print_log(
            "No loss tile found for {}. Skipping loss and gain pixel gain year count."
            .format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload):

    uu.print_log("Gain year count for pixels with neither loss nor gain:", tile_id)

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    # start time
    start = datetime.datetime.now()

    if os.path.exists(loss):
        uu.print_log("Loss tile found for {}. Using it in no change pixel gain year count.".format(tile_id))
        no_change_calc = '--calc=(A==0)*(B==0)*(C>0)*{}'.format(cn.loss_years)
        no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
        no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
        cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, no_change_calc,
               no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet']
        uu.log_subprocess_output_full(cmd)
    else:
        uu.print_log("No loss tile found for {}. Not using it for no change pixel gain year count.".format(tile_id))
        no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years)
        no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id)
        no_change_outfilearg = '--outfile={}'.format(no_change_outfilename)
        cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, no_change_calc,
               no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet']
        uu.log_subprocess_output_full(cmd)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload)
Esempio n. 21
0
def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload):

    uu.print_log("Gain year count for loss only pixels:", tile_id)

    # start time
    start = datetime.datetime.now()

    # Names of the loss, gain and tree cover density tiles
    loss, gain, model_extent = tile_names(tile_id, sensit_type)

    if os.path.exists(loss):
        uu.print_log(
            "Loss tile found for {}. Using it in loss only pixel gain year count."
            .format(tile_id))
        loss_calc = '--calc=(A>0)*(B==0)*(C>0)*(A-1)'
        loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id)
        loss_outfilearg = '--outfile={}'.format(loss_outfilename)
        cmd = [
            'gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent,
            loss_calc, loss_outfilearg, '--NoDataValue=0', '--overwrite',
            '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'
        ]
        uu.log_subprocess_output_full(cmd)
    else:
        uu.print_log(
            "No loss tile found for {}. Skipping loss only pixel gain year count."
            .format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only', no_upload)
def create_combined_soil_C(tile_id, no_upload):

    # Start time
    start = datetime.datetime.now()

    # Input files
    mangrove_soil = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id)
    mineral_soil = '{0}_{1}.tif'.format(tile_id, cn.pattern_soil_C_full_extent_2000_non_mang)

    # Output file
    combined_soil = '{0}_{1}.tif'.format(tile_id, cn.pattern_soil_C_full_extent_2000)

    # Checks if mangrove AGB tile exists. If not, mangrove soil C is not combined with mineral soil C.
    if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)):

        uu.print_log("Mangrove aboveground biomass tile found for", tile_id)

        mangrove_soil_src = rasterio.open(mangrove_soil)
        # Grabs metadata for one of the input tiles, like its location/projection/cellsize
        kwargs = mangrove_soil_src.meta
        # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory
        windows = mangrove_soil_src.block_windows(1)

        mineral_soil_src = rasterio.open(mineral_soil)

        # Updates kwargs for the output dataset.
        # Need to update data type to float 32 so that it can handle fractional gain rates
        kwargs.update(
            driver='GTiff',
            count=1,
            compress='lzw',
            nodata=0
        )

        # The output file: soil C with mangrove soil C taking precedence over mineral soil C
        dst_combined_soil = rasterio.open(combined_soil, 'w', **kwargs)

        uu.print_log("Replacing mineral soil C pixels with mangrove soil C pixels for", tile_id)

        # Iterates across the windows (1 pixel strips) of the input tiles
        for idx, window in windows:

            mangrove_soil_window = mangrove_soil_src.read(1, window=window)
            mineral_soil_window = mineral_soil_src.read(1, window=window)

            combined_soil_window = np.where(mangrove_soil_window>0, mangrove_soil_window, mineral_soil_window)

            dst_combined_soil.write_band(1, combined_soil_window, window=window)

    else:

        uu.print_log("No mangrove aboveground biomass tile for", tile_id)

        # If there is no mangrove soil C tile, the final output of the mineral soil function needs to receive the
        # correct final name.
        os.rename('{0}_{1}.tif'.format(tile_id, cn.pattern_soil_C_full_extent_2000_non_mang), combined_soil)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_soil_C_full_extent_2000, no_upload)
def create_combined_ifl_primary(tile_id):

    # Start time
    start = datetime.datetime.now()

    ifl_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_ifl)
    primary_tile = '{}_primary_2001.tif'.format(tile_id)

    ifl_primary_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_ifl_primary)

    uu.print_log("Getting extent of", tile_id)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # Assigns the correct time (primary forest or ifl)
    if ymax <= 30 and ymax >= -20:

        uu.print_log(
            "{} between 30N and 30S. Using primary forest tile.".format(
                tile_id))

        os.rename(primary_tile, ifl_primary_tile)

    else:

        uu.print_log(
            "{} not between 30N and 30S. Using IFL tile, if it exists.".format(
                tile_id))

        if os.path.exists(ifl_tile):

            os.rename(ifl_tile, ifl_primary_tile)

        else:

            uu.print_log("IFL tile does not exist for {}".format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, cn.pattern_ifl_primary)
def mp_continent_ecozone_tiles(tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(
            cn.pattern_WHRC_biomass_2000_non_mang_non_planted,
            cn.mangrove_biomass_2000_dir)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped
    uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std')

    # Unzips ecozone shapefile
    cmd = ['unzip', cn.cont_eco_zip]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # List of output directories and output file name patterns
    output_dir_list = [cn.cont_eco_raw_dir, cn.cont_eco_dir]
    output_pattern_list = [
        cn.pattern_cont_eco_raw, cn.pattern_cont_eco_processed
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # For multiprocessor use
    processes = int(cn.count / 4)
    uu.print_log('Continent-ecozone tile creation max processors=', processes)
    pool.map(continent_ecozone_tiles.create_continent_ecozone_tiles,
             tile_id_list)

    # Uploads the continent-ecozone tile to s3 before the codes are expanded to pixels in 1024x1024 windows that don't have codes.
    # These are not used for the model. They are for reference and completeness.
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Esempio n. 25
0
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None):

    os.chdir(cn.docker_base_dir)
    pd.options.mode.chained_assignment = None

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for
        # calculating mangrove gain
        mangrove_biomass_tile_list = uu.tile_list_s3(
            cn.mangrove_biomass_2000_dir)
        ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir)
        tile_id_list = list(
            set(mangrove_biomass_tile_list).intersection(ecozone_tile_list))

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    download_dict = {
        cn.cont_eco_dir: [cn.pattern_cont_eco_processed],
        cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000]
    }

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir,
        cn.stdev_annual_gain_AGB_mangrove_dir
    ]
    output_pattern_list = [
        cn.pattern_annual_gain_AGB_mangrove,
        cn.pattern_annual_gain_BGB_mangrove,
        cn.pattern_stdev_annual_gain_AGB_mangrove
    ]

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
    if uu.check_aws_creds():

        for key, values in download_dict.items():
            dir = key
            pattern = values[0]
            uu.s3_flexible_download(dir, pattern, cn.docker_base_dir,
                                    sensit_type, tile_id_list)

    # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet),
        cn.docker_base_dir
    ]
    uu.log_subprocess_output_full(cmd)

    ### To make the removal factor dictionaries

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                               sheet_name="mangrove gain, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon',
                                                       keep='first')

    # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to
    # the "mangType" field in the gain rate spreadsheet.
    # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the
    # keys in this dictionary would need to change accordingly.
    type_ratio_dict = {
        '1': cn.below_to_above_trop_dry_mang,
        '2': cn.below_to_above_trop_wet_mang,
        '3': cn.below_to_above_subtrop_mang
    }
    type_ratio_dict_final = {
        int(k): float(v)
        for k, v in list(type_ratio_dict.items())
    }

    # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get
    # a column of belowground annual gain rates by mangrove type
    gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[
        'mangType'].map(type_ratio_dict_final)
    gain_table_simplified[
        'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio

    # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates
    gain_above_dict = pd.Series(
        gain_table_simplified.AGB_gain_tons_ha_yr.values,
        index=gain_table_simplified.gainEcoCon).to_dict()
    gain_below_dict = pd.Series(
        gain_table_simplified.BGB_annual_rate.values,
        index=gain_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    gain_above_dict[0] = 0
    gain_below_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    gain_above_dict = {
        float(key): value
        for key, value in gain_above_dict.items()
    }
    gain_below_dict = {
        float(key): value
        for key, value in gain_below_dict.items()
    }

    ### To make the removal factor standard deviation dictionary

    # Imports the table with the ecozone-continent codes and the carbon gain rates
    stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet),
                                sheet_name="mangrove stdev, for model")

    # Removes rows with duplicate codes (N. and S. America for the same ecozone)
    stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon',
                                                         keep='first')

    # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs
    stdev_dict = pd.Series(
        stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values,
        index=stdev_table_simplified.gainEcoCon).to_dict()

    # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent)
    stdev_dict[0] = 0

    # Converts all the keys (continent-ecozone codes) to float type
    stdev_dict = {float(key): value for key, value in stdev_dict.items()}

    # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
    # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
    # Ran with 18 processors on r4.16xlarge (430 GB memory peak)
    if cn.count == 96:
        processes = 20  #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak
    else:
        processes = 4
    uu.print_log('Mangrove annual gain rate max processors=', processes)
    pool = multiprocessing.Pool(processes)
    pool.map(
        partial(annual_gain_rate_mangrove.annual_gain_rate,
                sensit_type=sensit_type,
                output_pattern_list=output_pattern_list,
                gain_above_dict=gain_above_dict,
                gain_below_dict=gain_below_dict,
                stdev_dict=stdev_dict), tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile in tile_id_list:
    #
    #     annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list,
    #           gain_above_dict, gain_below_dict, stdev_dict)

    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Esempio n. 26
0
        '-l',
        required=True,
        help=
        'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.'
    )
    parser.add_argument('--run-date',
                        '-d',
                        required=False,
                        help='Date of run. Must be format YYYYMMDD.')
    args = parser.parse_args()
    sensit_type = args.model_type
    tile_id_list = args.tile_id_list
    run_date = args.run_date

    # Disables upload to s3 if no AWS credentials are found in environment
    if not uu.check_aws_creds():
        no_upload = True
        uu.print_log("s3 credentials not found. Uploading to s3 disabled.")

    # Create the output log
    uu.initiate_log(tile_id_list=tile_id_list,
                    sensit_type=sensit_type,
                    run_date=run_date)

    # Checks whether the sensitivity analysis and tile_id_list arguments are valid
    uu.check_sensit_type(sensit_type)
    tile_id_list = uu.tile_id_list_check(tile_id_list)

    mp_annual_gain_rate_mangrove(sensit_type=sensit_type,
                                 tile_id_list=tile_id_list,
                                 run_date=run_date)
Esempio n. 27
0
def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict,
                     output_pattern_list, no_upload):

    # Converts the forest age category decision tree output values to the three age categories--
    # 10000: primary forest; 20000: secondary forest > 20 years; 30000: secondary forest <= 20 years
    # These are five digits so they can easily be added to the four digits of the continent-ecozone code to make unique codes
    # for each continent-ecozone-age combination.
    # The key in the dictionary is the forest age category decision tree endpoints.
    age_dict = {0: 0, 1: 10000, 2: 20000, 3: 30000}

    uu.print_log("Processing:", tile_id)

    # Start time
    start = datetime.datetime.now()

    # Names of the forest age category and continent-ecozone tiles
    age_cat = uu.sensit_tile_rename(sensit_type, tile_id,
                                    cn.pattern_age_cat_IPCC)
    cont_eco = uu.sensit_tile_rename(sensit_type, tile_id,
                                     cn.pattern_cont_eco_processed)

    # Names of the output natural forest gain rate tiles (above and belowground)
    AGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id,
                                                      output_pattern_list[0])
    BGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id,
                                                      output_pattern_list[1])
    AGB_IPCC_default_gain_stdev = '{0}_{1}.tif'.format(tile_id,
                                                       output_pattern_list[2])

    uu.print_log(
        "  Creating IPCC default biomass gain rates and standard deviation for {}"
        .format(tile_id))

    # Opens the input tiles if they exist. kips tile if either input doesn't exist.
    try:
        age_cat_src = rasterio.open(age_cat)
        uu.print_log("   Age category tile found for {}".format(tile_id))
    except:
        return uu.print_log(
            "   No age category tile found for {}. Skipping tile.".format(
                tile_id))

    try:
        cont_eco_src = rasterio.open(cont_eco)
        uu.print_log("   Continent-ecozone tile found for {}".format(tile_id))
    except:
        return uu.print_log(
            "   No continent-ecozone tile found for {}. Skipping tile.".format(
                tile_id))

    # Grabs metadata about the continent ecozone tile, like its location/projection/cellsize
    kwargs = cont_eco_src.meta

    # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory
    windows = cont_eco_src.block_windows(1)

    # Updates kwargs for the output dataset.
    # Need to update data type to float 32 so that it can handle fractional gain rates
    kwargs.update(driver='GTiff',
                  count=1,
                  compress='lzw',
                  nodata=0,
                  dtype='float32')

    # The output files, aboveground and belowground biomass gain rates
    dst_above = rasterio.open(AGB_IPCC_default_gain_rate, 'w', **kwargs)
    # Adds metadata tags to the output raster
    uu.add_rasterio_tags(dst_above, sensit_type)
    dst_above.update_tags(
        units='megagrams aboveground biomass (AGB or dry matter)/ha/yr')
    dst_above.update_tags(
        source='IPCC Guidelines 2019 refinement, forest section, Table 4.9')
    dst_above.update_tags(
        extent=
        'Full model extent, even though these rates will not be used over the full model extent'
    )

    dst_below = rasterio.open(BGB_IPCC_default_gain_rate, 'w', **kwargs)
    # Adds metadata tags to the output raster
    uu.add_rasterio_tags(dst_below, sensit_type)
    dst_below.update_tags(
        units='megagrams belowground biomass (AGB or dry matter)/ha/yr')
    dst_below.update_tags(
        source='IPCC Guidelines 2019 refinement, forest section, Table 4.9')
    dst_below.update_tags(
        extent=
        'Full model extent, even though these rates will not be used over the full model extent'
    )

    dst_stdev_above = rasterio.open(AGB_IPCC_default_gain_stdev, 'w', **kwargs)
    # Adds metadata tags to the output raster
    uu.add_rasterio_tags(dst_stdev_above, sensit_type)
    dst_stdev_above.update_tags(
        units=
        'standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr'
    )
    dst_stdev_above.update_tags(
        source='IPCC Guidelines 2019 refinement, forest section, Table 4.9')
    dst_stdev_above.update_tags(
        extent=
        'Full model extent, even though these standard deviations will not be used over the full model extent'
    )

    # Iterates across the windows (1 pixel strips) of the input tiles
    for idx, window in windows:

        # Creates a processing window for each input raster
        try:
            cont_eco_window = cont_eco_src.read(1, window=window)
        except:
            cont_eco_window = np.zeros((window.height, window.width),
                                       dtype='uint8')

        try:
            age_cat_window = age_cat_src.read(1, window=window)
        except:
            age_cat_window = np.zeros((window.height, window.width),
                                      dtype='uint8')

        # Recodes the input forest age category array with 10 different decision tree end values into the 3 actual age categories
        age_recode = np.vectorize(age_dict.get)(age_cat_window)

        # Adds the age category codes to the continent-ecozone codes to create an array of unique continent-ecozone-age codes
        cont_eco_age = cont_eco_window + age_recode

        ## Aboveground removal factors
        # Converts the continent-ecozone array to float so that the values can be replaced with fractional gain rates
        gain_rate_AGB = cont_eco_age.astype('float32')

        # Applies the dictionary of continent-ecozone-age gain rates to the continent-ecozone-age array to
        # get annual gain rates (metric tons aboveground biomass/yr) for each pixel
        for key, value in gain_table_dict.items():
            gain_rate_AGB[gain_rate_AGB == key] = value

        # Writes the output window to the output file
        dst_above.write_band(1, gain_rate_AGB, window=window)

        ## Belowground removal factors
        # Calculates belowground annual removal rates
        gain_rate_BGB = gain_rate_AGB * cn.below_to_above_non_mang

        # Writes the output window to the output file
        dst_below.write_band(1, gain_rate_BGB, window=window)

        ## Aboveground removal factor standard deviation
        # Converts the continent-ecozone array to float so that the values can be replaced with fractional standard deviations
        gain_stdev_AGB = cont_eco_age.astype('float32')

        # Applies the dictionary of continent-ecozone-age gain rate standard deviations to the continent-ecozone-age array to
        # get annual gain rate standard deviations (metric tons aboveground biomass/yr) for each pixel
        for key, value in stdev_table_dict.items():
            gain_stdev_AGB[gain_stdev_AGB == key] = value

        # Writes the output window to the output file
        dst_stdev_above.write_band(1, gain_stdev_AGB, window=window)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_pattern_list[0], no_upload)
Esempio n. 28
0
def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload):

    uu.print_log(
        "Merging loss, gain, no change, and loss/gain pixels into single raster for {}"
        .format(tile_id))

    # start time
    start = datetime.datetime.now()

    # The four rasters from above that are to be merged
    no_change_gain_years = '{}_growth_years_no_change.tif'.format(tile_id)
    loss_only_gain_years = '{}_growth_years_loss_only.tif'.format(tile_id)
    gain_only_gain_years = '{}_growth_years_gain_only.tif'.format(tile_id)
    loss_and_gain_gain_years = '{}_growth_years_loss_and_gain.tif'.format(
        tile_id)

    # Names of the output tiles
    gain_year_count_merged = '{0}_{1}.tif'.format(tile_id, pattern)

    # Opens no change gain year count tile. This should exist for all tiles.
    with rasterio.open(no_change_gain_years) as no_change_gain_years_src:

        # Grabs metadata about the tif, like its location/projection/cellsize
        kwargs = no_change_gain_years_src.meta

        # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory
        windows = no_change_gain_years_src.block_windows(1)

        # Updates kwargs for the output dataset
        kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0)

        uu.print_log(
            "   No change tile exists for {} by default".format(tile_id))

        # Opens the other gain year count tiles. They may not exist for all other tiles.
        try:
            loss_only_gain_years_src = rasterio.open(loss_only_gain_years)
            uu.print_log("   Loss only tile found for {}".format(tile_id))
        except:
            uu.print_log("   No loss only tile found for {}".format(tile_id))

        try:
            gain_only_gain_years_src = rasterio.open(gain_only_gain_years)
            uu.print_log("   Gain only tile found for {}".format(tile_id))
        except:
            uu.print_log("   No gain only tile found for {}".format(tile_id))

        try:
            loss_and_gain_gain_years_src = rasterio.open(
                loss_and_gain_gain_years)
            uu.print_log("   Loss and gain tile found for {}".format(tile_id))
        except:
            uu.print_log(
                "   No loss and gain tile found for {}".format(tile_id))

        # Opens the output tile, giving it the arguments of the input tiles
        gain_year_count_merged_dst = rasterio.open(gain_year_count_merged, 'w',
                                                   **kwargs)

        # Adds metadata tags to the output raster
        uu.add_rasterio_tags(gain_year_count_merged_dst, sensit_type)
        gain_year_count_merged_dst.update_tags(units='years')
        gain_year_count_merged_dst.update_tags(min_possible_value='0')
        gain_year_count_merged_dst.update_tags(
            max_possible_value=cn.loss_years)
        gain_year_count_merged_dst.update_tags(
            source=
            'Gain years are assigned based on the combination of Hansen loss and gain in each pixel. There are four combinations: neither loss nor gain, loss only, gain only, loss and gain.'
        )
        gain_year_count_merged_dst.update_tags(extent='Full model extent')

        # Iterates across the windows (1 pixel strips) of the input tile
        for idx, window in windows:

            no_change_gain_years_window = no_change_gain_years_src.read(
                1, window=window)

            try:
                loss_only_gain_years_window = loss_only_gain_years_src.read(
                    1, window=window)
            except:
                loss_only_gain_years_window = np.zeros(
                    (window.height, window.width), dtype='uint8')

            try:
                gain_only_gain_years_window = gain_only_gain_years_src.read(
                    1, window=window)
            except:
                gain_only_gain_years_window = np.zeros(
                    (window.height, window.width), dtype='uint8')

            try:
                loss_and_gain_gain_years_window = loss_and_gain_gain_years_src.read(
                    1, window=window)
            except:
                loss_and_gain_gain_years_window = np.zeros(
                    (window.height, window.width), dtype='uint8')


            gain_year_count_merged_window = loss_only_gain_years_window + gain_only_gain_years_window + \
                                            no_change_gain_years_window + loss_and_gain_gain_years_window

            gain_year_count_merged_dst.write_band(
                1, gain_year_count_merged_window, window=window)

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, pattern, no_upload)
Esempio n. 29
0
def mp_prep_other_inputs(tile_id_list, run_date):

    os.chdir(cn.docker_base_dir)
    sensit_type='std'

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir,
                                             cn.mangrove_biomass_2000_dir,
                                             set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir
                                             )

    uu.print_log(tile_id_list)
    uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n")


    # List of output directories and output file name patterns
    output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir,
                       cn.drivers_processed_dir, cn.ifl_primary_processed_dir,
                       cn.annual_gain_AGC_natrl_forest_young_dir,
                       cn.stdev_annual_gain_AGC_natrl_forest_young_dir,
                       cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir,
                       cn.FIA_forest_group_processed_dir,
                       cn.age_cat_natrl_forest_US_dir,
                       cn.FIA_regions_processed_dir]
    output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000,
                           cn.pattern_drivers, cn.pattern_ifl_primary,
                           cn.pattern_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_stdev_annual_gain_AGC_natrl_forest_young,
                           cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe,
                           cn.pattern_FIA_forest_group_processed,
                           cn.pattern_age_cat_natrl_forest_US,
                           cn.pattern_FIA_regions_processed]


    # If the model run isn't the standard one, the output directory and file names are changed
    if sensit_type != 'std':

        uu.print_log("Changing output directory and file name pattern based on sensitivity analysis")
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)
        output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list)


    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)


    # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest
    uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type)
    uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type)
    # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster.
    # Thus, using wget instead.
    cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)]
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)
    uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type)
    cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive']
    uu.log_subprocess_output_full(cmd)

    uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Unzipping pre-2000 plantations...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)]
    uu.log_subprocess_output_full(cmd)

    uu.print_log("Unzipping drivers...")
    cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)]
    uu.log_subprocess_output_full(cmd)


    # Creates tree cover loss driver tiles
    source_raster = '{}.tif'.format(cn.pattern_drivers_raw)
    out_pattern = cn.pattern_drivers
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates young natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates young natural forest removal rate standard deviation tiles
    source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young
    dt = 'float32'
    if cn.count == 96:
        processes = 80  # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates pre-2000 oil palm plantation tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 100 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list)
    pool.close()
    pool.join()


    # Creates climate zone tiles
    if cn.count == 96:
        processes = 80  # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating climate zone tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest removal rate tiles
    source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 60  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates European natural forest standard deviation of removal rate tiles
    source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw
    out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe
    dt = 'float32'
    if cn.count == 96:
        processes = 32  # 32 processors = 60 GB peak; 60 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters
    uu.print_log("Creating vrt of humid tropial primary forest...")
    primary_vrt = 'primary_2001.vrt'
    os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt))
    uu.print_log("  Humid tropical primary forest vrt created")

    # Creates primary forest tiles
    source_raster = primary_vrt
    out_pattern = 'primary_2001'
    dt = 'Byte'
    if cn.count == 96:
        processes = 45  # 45 processors = 650 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating primary forest tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    # Creates a combined IFL/primary forest raster
    # Uses very little memory since it's just file renaming
    if cn.count == 96:
        processes = 60  # 60 processors = 10 GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list)
    pool.close()
    pool.join()


    # Creates forest age category tiles for US forests
    source_raster = cn.name_age_cat_natrl_forest_US_raw
    out_pattern = cn.pattern_age_cat_natrl_forest_US
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest age category tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates forest groups for US forests
    source_raster = cn.name_FIA_forest_group_raw
    out_pattern = cn.pattern_FIA_forest_group_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 80  # 32 processors = 25 GB peak; 80 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest group tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()

    # Creates FIA regions for US forests
    source_raster = cn.name_FIA_regions_raw
    out_pattern = cn.pattern_FIA_regions_processed
    dt = 'Byte'
    if cn.count == 96:
        processes = 70  # 32 processors = 35 GB peak; 70 = XXX GB peak
    else:
        processes = int(cn.count/2)
    uu.print_log("Creating US forest region tiles with {} processors...".format(processes))
    pool = multiprocessing.Pool(processes)
    pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list)
    pool.close()
    pool.join()


    for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:

        # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData)
        # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not
        # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't
        # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version.
        if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()

        if cn.count == 96:
            processes = 50  # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        elif cn.count <= 2: # For local tests
            processes = 1
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        else:
            processes = int(cn.count / 2)
            uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes))
            pool = multiprocessing.Pool(processes)
            pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list)
            pool.close()
            pool.join()
        uu.print_log('\n')


    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
Esempio n. 30
0
def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir)
    # tile_id_list = ['50N_130W'] # test tiles
    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Downloads the Mekong loss folder. Each year of loss has its own raster
    uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir,
                          sensit_type)

    # The list of all annual loss rasters
    annual_loss_list = glob.glob('Loss_20*tif')
    uu.print_log(annual_loss_list)

    uu.print_log(
        "Creating first year of loss Hansen tiles for Mekong region...")
    # Recodes raw loss rasters with their loss year (for model years only)
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(Mekong_loss.recode_tiles, annual_loss_list)

    # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years,
    # the earlier loss gets)
    uu.print_log("Merging all loss years within model range...")
    loss_composite = "Mekong_loss_2001_2015.tif"
    cmd = [
        'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW',
        '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif",
        "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif",
        "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif",
        "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif",
        "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif",
        "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif",
        "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif",
        "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif"
    ]
    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Creates Hansen tiles out of the composite Mekong loss
    source_raster = loss_composite
    out_pattern = cn.pattern_Mekong_loss_processed
    dt = 'Byte'
    pool.map(
        partial(uu.mp_warp_to_Hansen,
                source_raster=source_raster,
                out_pattern=out_pattern,
                dt=dt), tile_id_list)

    # This is necessary for changing NoData values to 0s (so they are recognized as 0s)
    pool.map(Mekong_loss.recode_tiles, tile_id_list)

    # Only uploads tiles that actually have Mekong loss in them
    upload_dir = cn.Mekong_loss_processed_dir
    pattern = cn.pattern_Mekong_loss_processed
    pool.map(
        partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern),
        tile_id_list)