def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): uu.print_log("Gain year count for non-loss pixels:", tile_id) # start time start = datetime.datetime.now() # Names of the loss, gain and tree cover density tiles loss, gain, extent, biomass = tile_names(tile_id, sensit_type) # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it # to a vrt so that the 0 pixels are recognized. loss_vrt = '{}_loss.vrt'.format(tile_id) os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss)) # Pixels with loss but in areas with PRODES forest 2000 and biomass >0 (same as standard model) no_change_calc = '--calc=(A==0)*(B==1)*(C>0)*{}'.format(cn.loss_years) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = [ 'gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change')
def create_10x10_plantation_type(tile_id, plant_type_1x1_vrt): uu.print_log("Getting bounding coordinates for tile", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) uu.print_log(" xmin:", xmin, "; xmax:", xmax, "; ymin", ymin, "; ymax:", ymax) tile_10x10 = '{0}_{1}.tif'.format(tile_id, cn.pattern_planted_forest_type_unmasked) uu.print_log("Rasterizing", tile_10x10) cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), '-co', 'COMPRESS=LZW', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', '-ot', 'Byte', plant_type_1x1_vrt, tile_10x10] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.print_log("Checking if {} contains any data...".format(tile_id)) stats = uu.check_for_data(tile_10x10) if stats[0] > 0: uu.print_log(" Data found in {}. Copying tile to s3...".format(tile_id)) uu.upload_final(cn.planted_forest_type_unmasked_dir, tile_id, cn.pattern_planted_forest_type_unmasked) uu.print_log(" Tile converted and copied to s3") else: print(" No data found. Not copying {}.".format(tile_id))
def rasterize_pre_2000_plantations(tile_id): # Start time start = datetime.datetime.now() uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000) cmd = [ 'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot', 'Byte', '-a_nodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '{}.shp'.format(cn.pattern_plant_pre_2000_raw), out_tile ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, cn.pattern_plant_pre_2000)
def legal_Amazon_create_gain_year_count_loss_and_gain_standard( tile_id, sensit_type): uu.print_log("Gain year count for loss and gain pixels:", tile_id) # start time start = datetime.datetime.now() # Names of the loss, gain and tree cover density tiles loss, gain, extent, biomass = tile_names(tile_id, sensit_type) # Pixels with both loss and gain, and in PRODES forest 2000 loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C==1)*((A-1)+({}+1-A)/2))'.format( cn.loss_years) loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format( tile_id) loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = [ 'gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern): uu.print_log( "Merging loss, gain, no change, and loss/gain pixels into single raster for {}" .format(tile_id)) # start time start = datetime.datetime.now() # The four rasters from above that are to be merged loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format( tile_id) # All four components are merged together to the final output raster age_outfile = '{}_{}.tif'.format(tile_id, output_pattern) cmd = [ 'gdal_merge.py', '-o', age_outfile, loss_outfilename, no_change_outfilename, loss_and_gain_outfilename, '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_pattern)
def recode_tiles(annual_loss): uu.print_log("Recoding loss tile by year") year = int(annual_loss[-8:-4]) uu.print_log(year) if year < 2001 or year > (2000 + cn.loss_years): uu.print_log("Skipping {} because outside of model range".format(year)) return else: calc = '--calc={}*(A==100)'.format(int((year - 2000))) recoded_output = "Mekong_loss_recoded_{}.tif".format(year) outfile = '--outfile={}'.format(recoded_output) cmd = [ 'gdal_calc.py', '-A', annual_loss, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout)
def create_1x1_plantation_type_from_1x1_planted(tile_1x1): # Gets the bounding coordinates for the 1x1 degree tile coords = tile_1x1.split("_") xmin_1x1 = str(coords[3])[:-4] xmax_1x1 = int(xmin_1x1) + 1 ymax_1x1 = int(coords[2]) ymin_1x1 = ymax_1x1 - 1 uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1) uu.print_log("There are plantations in {}. Converting to raster...".format( tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector cmd = [ 'gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0', '-ot', 'Byte' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout)
def create_1x1_plantation_from_1x1_gadm(tile_1x1): # Gets the bounding coordinates for the 1x1 degree tile coords = tile_1x1.split("_") uu.print_log(coords) xmin_1x1 = str(coords[2])[:-4] xmax_1x1 = int(xmin_1x1) + 1 ymax_1x1 = int(coords[1]) ymin_1x1 = ymax_1x1 - 1 uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1) # Connects Python to PostGIS using psycopg2. The credentials work on spot machines as they are currently configured # and are based on this: https://github.com/wri/gfw-annual-loss-processing/blob/master/1b_Summary-AOIs-to-TSV/utilities/postgis_util.py creds = {'host': 'localhost', 'user': '******', 'dbname': 'ubuntu'} conn = psycopg2.connect(**creds) cursor = conn.cursor() # Intersects the plantations PostGIS table with the 1x1 tile, then saves any growth rates in that tile as a 1x1 tile # https://gis.stackexchange.com/questions/30267/how-to-create-a-valid-global-polygon-grid-in-postgis # https://stackoverflow.com/questions/48978616/best-way-to-run-st-intersects-on-features-inside-one-table # https://postgis.net/docs/ST_Intersects.html uu.print_log("Checking if {} has plantations in it".format(tile_1x1)) # Does the intersect of the PostGIS table and the 1x1 GADM tile cursor.execute("SELECT growth FROM all_plant WHERE ST_Intersects(all_plant.wkb_geometry, ST_GeogFromText('POLYGON(({0} {1},{2} {1},{2} {3},{0} {3},{0} {1}))'))".format( xmin_1x1, ymax_1x1, xmax_1x1, ymin_1x1)) # A Python list of the output of the intersection, which in this case is a list of features that were successfully intersected. # This is what I use to determine if any PostGIS features were intersected. features = cursor.fetchall() cursor.close() # If any features in the PostGIS table were intersected with the 1x1 GADM tile, then the features in this 1x1 tile # are converted to a planted forest gain rate tile and a plantation type tile if len(features) > 0: uu.print_log("There are plantations in {}. Converting to gain rate and plantation type rasters...".format(tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector # For plantation gain rate cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector # For plantation type cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # If no features in the PostGIS table were intersected with the 1x1 GADM tile, nothing happens. else: uu.print_log("There are no plantations in {}. Not converting to raster.".format(tile_1x1))
def rasterize_gadm_1x1(tile_id): uu.print_log("Getting bounding coordinates for tile", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) uu.print_log(" xmin:", xmin, "; xmax:", xmax, "; ymin", ymin, "; ymax:", ymax) # Degrees of tile in x and y dimensions x_size = abs(int(xmin) - int(xmax)) y_size = abs(int(ymin) - int(ymax)) # Iterates through input 10x10 tile by 1x1 degree for x in range(x_size): xmin_1x1 = int(xmin) + x xmax_1x1 = int(xmin) + x + 1 for y in range(y_size): ymin_1x1 = int(ymin) + y ymax_1x1 = int(ymin) + y + 1 uu.print_log(" xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1) tile_1x1 = 'GADM_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1) uu.print_log("Rasterizing", tile_1x1) cmd = [ 'gdal_rasterize', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), '-co', 'COMPRESS=LZW', '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-burn', '1', '-a_nodata', '0', cn.gadm_iso, tile_1x1 ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Only keeps 1x1 GADM tiles if they actually include a country; many 1x1 tiles created out of 10x10 tiles # don't actually include a country. uu.print_log( "Checking if {} contains any data...".format(tile_1x1)) stats = uu.check_for_data(tile_1x1) if stats[1] > 0: uu.print_log( " Data found in {}. Keeping tile".format(tile_1x1)) else: uu.print_log( " No data found in {}. Deleting.".format(tile_1x1)) os.remove(tile_1x1)
def download_df(year, hv_tile, output_dir): include = '*A{0}*{1}*'.format(year, hv_tile) cmd = [ 'aws', 's3', 'cp', cn.burn_year_hdf_raw_dir, output_dir, '--recursive', '--exclude', "*", '--include', include ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout)
def loss_in_raster(tile_id, raster_type, output_name, lat, mask): uu.print_log("Calculating loss area for tile id {0}...".format(tile_id)) xmin, ymin, xmax, ymax = uu.coords(tile_id) # start time start = datetime.datetime.now() # Name of the loss time loss_tile = '{0}.tif'.format(tile_id) # The raster that loss is being analyzed inside raster_of_interest = '{0}_{1}.tif'.format(tile_id, raster_type) # Output file name outname = '{0}_{1}.tif'.format(tile_id, output_name) # Only processes the tile if it is inside the latitude band (north of the specified latitude) if ymax > lat and os.path.exists(raster_of_interest): uu.print_log("{} inside latitude band and peat tile exists. Processing tile.".format(tile_id)) # If the user has asked to create just a mask of loss as opposed to the actual output values if mask == "True": calc = '--calc=(A>=1)*(A+1)/(A+1)*B' # If the user has asked to output the actual loss values if mask == "False": # Equation argument for converting emissions from per hectare to per pixel. # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare. calc = '--calc=A*B' # Argument for outputting file out = '--outfile={}'.format(outname) uu.print_log("Masking loss in {} by raster of interest...".format(tile_id)) cmd = ['gdal_calc.py', '-A', loss_tile, '-B', raster_of_interest, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite', '--quiet'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.print_log("{} masked".format(tile_id)) else: uu.print_log("{} outside of latitude band. Skipped tile.".format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_name)
def mp_mangrove_processing(tile_id_list, run_date = None): os.chdir(cn.docker_base_dir) sensit_type = 'std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads zipped raw mangrove files uu.s3_file_download(os.path.join(cn.mangrove_biomass_raw_dir, cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std') # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument) # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that. cmd = ['unzip', '-o', '-j', cn.mangrove_biomass_raw_file] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Creates vrt for the Saatchi biomass rasters mangrove_vrt = 'mangrove_biomass.vrt' os.system('gdalbuildvrt {} *.tif'.format(mangrove_vrt)) # Converts the mangrove AGB vrt into Hansen tiles source_raster = mangrove_vrt out_pattern = cn.pattern_mangrove_biomass_2000 dt = 'float32' processes=int(cn.count/4) uu.print_log('Mangrove preprocessing max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # # For single processor use, for testing purposes # for tile_id in tile_id_list: # # mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.mangrove_biomass_2000_dir pattern = cn.pattern_mangrove_biomass_2000 processes=int(cn.count-5) uu.print_log('Mangrove check for data max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def mp_continent_ecozone_tiles(tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list( cn.pattern_WHRC_biomass_2000_non_mang_non_planted, cn.mangrove_biomass_2000_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std') # Unzips ecozone shapefile cmd = ['unzip', cn.cont_eco_zip] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # List of output directories and output file name patterns output_dir_list = [cn.cont_eco_raw_dir, cn.cont_eco_dir] output_pattern_list = [ cn.pattern_cont_eco_raw, cn.pattern_cont_eco_processed ] # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # For multiprocessor use processes = int(cn.count / 4) uu.print_log('Continent-ecozone tile creation max processors=', processes) pool.map(continent_ecozone_tiles.create_continent_ecozone_tiles, tile_id_list) # Uploads the continent-ecozone tile to s3 before the codes are expanded to pixels in 1024x1024 windows that don't have codes. # These are not used for the model. They are for reference and completeness. for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def create_mangrove_soil_C(tile_id): # Start time start = datetime.datetime.now() # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile. if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)): uu.print_log("Mangrove aboveground biomass tile found for", tile_id) uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) uu.print_log("Clipping mangrove soil C from mangrove soil vrt for", tile_id) uu.warp_to_Hansen('mangrove_soil_C.vrt', '{0}_mangrove_full_extent.tif'.format(tile_id), xmin, ymin, xmax, ymax, 'Int16') mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id) mangrove_biomass = '{0}_{1}.tif'.format( tile_id, cn.pattern_mangrove_biomass_2000) outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id) out = '--outfile={}'.format(outname) calc = '--calc=A*(B>0)' datatype = '--type={}'.format('Int16') uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id) cmd = [ 'gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', datatype, '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) else: uu.print_log("No mangrove aboveground biomass tile for", tile_id) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove')
def mp_prep_other_inputs(tile_id_list, run_date): os.chdir(cn.docker_base_dir) sensit_type='std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir ) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # List of output directories and output file name patterns output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, cn.drivers_processed_dir, cn.ifl_primary_processed_dir, cn.annual_gain_AGC_natrl_forest_young_dir, cn.stdev_annual_gain_AGC_natrl_forest_young_dir, cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, cn.FIA_forest_group_processed_dir, cn.age_cat_natrl_forest_US_dir, cn.FIA_regions_processed_dir] output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000, cn.pattern_drivers, cn.pattern_ifl_primary, cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, cn.pattern_FIA_forest_group_processed, cn.pattern_age_cat_natrl_forest_US, cn.pattern_FIA_regions_processed] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type) # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. # Thus, using wget instead. cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)] process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type) cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive'] uu.log_subprocess_output_full(cmd) uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Unzipping pre-2000 plantations...") cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] uu.log_subprocess_output_full(cmd) uu.print_log("Unzipping drivers...") cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)] uu.log_subprocess_output_full(cmd) # Creates tree cover loss driver tiles source_raster = '{}.tif'.format(cn.pattern_drivers_raw) out_pattern = cn.pattern_drivers dt = 'Byte' if cn.count == 96: processes = 80 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates young natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young dt = 'float32' if cn.count == 96: processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates young natural forest removal rate standard deviation tiles source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young dt = 'float32' if cn.count == 96: processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates pre-2000 oil palm plantation tiles if cn.count == 96: processes = 80 # 45 processors = 100 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list) pool.close() pool.join() # Creates climate zone tiles if cn.count == 96: processes = 80 # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating climate zone tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list) pool.close() pool.join() # Creates European natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe dt = 'float32' if cn.count == 96: processes = 60 # 32 processors = 60 GB peak; 60 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates European natural forest standard deviation of removal rate tiles source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe dt = 'float32' if cn.count == 96: processes = 32 # 32 processors = 60 GB peak; 60 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters uu.print_log("Creating vrt of humid tropial primary forest...") primary_vrt = 'primary_2001.vrt' os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt)) uu.print_log(" Humid tropical primary forest vrt created") # Creates primary forest tiles source_raster = primary_vrt out_pattern = 'primary_2001' dt = 'Byte' if cn.count == 96: processes = 45 # 45 processors = 650 GB peak else: processes = int(cn.count/2) uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates a combined IFL/primary forest raster # Uses very little memory since it's just file renaming if cn.count == 96: processes = 60 # 60 processors = 10 GB peak else: processes = int(cn.count/2) uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list) pool.close() pool.join() # Creates forest age category tiles for US forests source_raster = cn.name_age_cat_natrl_forest_US_raw out_pattern = cn.pattern_age_cat_natrl_forest_US dt = 'Byte' if cn.count == 96: processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest age category tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates forest groups for US forests source_raster = cn.name_FIA_forest_group_raw out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' if cn.count == 96: processes = 80 # 32 processors = 25 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest group tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates FIA regions for US forests source_raster = cn.name_FIA_regions_raw out_pattern = cn.pattern_FIA_regions_processed dt = 'Byte' if cn.count == 96: processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest region tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]: # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version. if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]: processes = int(cn.count / 2) uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() if cn.count == 96: processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() elif cn.count <= 2: # For local tests processes = 1 uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = int(cn.count / 2) uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() uu.print_log('\n') # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def main(): # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads the Mekong loss folder. Each year of loss has its own raster uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir, sensit_type) # The list of all annual loss rasters annual_loss_list = glob.glob('Loss_20*tif') uu.print_log(annual_loss_list) uu.print_log( "Creating first year of loss Hansen tiles for Mekong region...") # Recodes raw loss rasters with their loss year (for model years only) pool = multiprocessing.Pool(int(cn.count / 2)) pool.map(Mekong_loss.recode_tiles, annual_loss_list) # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years, # the earlier loss gets) uu.print_log("Merging all loss years within model range...") loss_composite = "Mekong_loss_2001_2015.tif" cmd = [ 'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif", "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif", "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif", "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif", "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif", "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif", "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif", "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif" ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Creates Hansen tiles out of the composite Mekong loss source_raster = loss_composite out_pattern = cn.pattern_Mekong_loss_processed dt = 'Byte' pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # This is necessary for changing NoData values to 0s (so they are recognized as 0s) pool.map(Mekong_loss.recode_tiles, tile_id_list) # Only uploads tiles that actually have Mekong loss in them upload_dir = cn.Mekong_loss_processed_dir pattern = cn.pattern_Mekong_loss_processed pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def mp_plantation_preparation(gadm_index_shp, planted_index_shp): os.chdir(cn.docker_base_dir) # ## Not actually using this but leaving it here in case I want to add this functionality eventually. This # # was to allow users to run plantations for a select (contiguous) area rather than for the whole planet. # # List of bounding box coordinates # bound_list = args.bounding_box # # Checks if bounding box coordinates are in multiples of 10 (10 degree tiles). If they're not, the script stops. # for bound in bound_list: # if bound%10: # uu.exception_log(bound, 'not a multiple of 10. Please make bounding box coordinates are multiples of 10.') # Checks the validity of the two arguments. If either one is invalid, the script ends. if (gadm_index_path not in cn.gadm_plant_1x1_index_dir or planted_index_path not in cn.gadm_plant_1x1_index_dir): uu.exception_log('Invalid inputs. Please provide None or s3 shapefile locations for both arguments.') # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles) total_tile_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log("Number of possible 10x10 tiles to evaluate:", len(total_tile_list)) # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman. # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968. # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest). # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands. planted_lat_tile_list = [tile for tile in total_tile_list if '90N' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80N' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '50S' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '60S' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '70S' not in tile] planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80S' not in tile] # planted_lat_tile_list = ['10N_080W'] uu.print_log(planted_lat_tile_list) uu.print_log("Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len(planted_lat_tile_list)) # If a planted forest extent 1x1 tile index shapefile isn't supplied if 'None' in args.planted_tile_index: ### Entry point 1: # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created. # This runs the process from the very beginning and will take a few days. if 'None' in args.gadm_tile_index: uu.print_log("No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch...") # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas uu.s3_file_download(cn.gadm_path, cn.docker_base_dir) cmd = ['unzip', cn.gadm_zip] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Creates a new GADM shapefile with just the countries that have planted forests in them. # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries. # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list # in constants_and_names.py must be updated, too. uu.print_log("Creating shapefile of countries with planted forests...") os.system('''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp'''.format(str(cn.plantation_countries)[1:-1], cn.gadm_iso)) # Creates 1x1 degree tiles of countries that have planted forests in them. # I think this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small. # This takes several days to run because it iterates through at least 250 10x10 tiles. # For multiprocessor use. processes = 50 uu.print_log('Rasterize GADM 1x1 max processors=', processes) pool = Pool(processes) pool.map(plantation_preparation.rasterize_gadm_1x1, planted_lat_tile_list) pool.close() pool.join() # # Creates 1x1 degree tiles of countries that have planted forests in them. # # For single processor use. # for tile in planted_lat_tile_list: # # plantation_preparation.rasterize_gadm_1x1(tile) # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(cn.pattern_gadm_1x1_index, uu.date_time_today)) cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # # Saves the 1x1 country extent tiles to s3 # # Only use if the entire process can't run in one go on the spot machine # cmd = ['aws', 's3', 'cp', cn.docker_base_dir, 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive'] # # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging # process = Popen(cmd, stdout=PIPE, stderr=STDOUT) # with process.stdout: # uu.log_subprocess_output(process.stdout) # Delete the aux.xml files os.system('''rm GADM*.tif.*''') # List of all 1x1 degree countey extent tiles created gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_") uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1) uu.print_log(len(gadm_list_1x1)) ### Entry point 2: # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied, # a list of the 1x1 tiles is created from the shapefile. # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied # in the shapefile. elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index: uu.print_log("Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles...") uu.print_log('{}/'.format(gadm_index_path)) # Copies the shapefile of 1x1 tiles of extent of countries with planted forests cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Gets the attribute table of the country extent 1x1 tile shapefile gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0] # Converts the attribute table to a dataframe dbf = Dbf5(gadm) df = dbf.to_dataframe() # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list gadm_list_1x1 = df['location'].tolist() gadm_list_1x1 = [str(y) for y in gadm_list_1x1] uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1) uu.print_log("There are", len(gadm_list_1x1), "1x1 country extent tiles to iterate through.") # In case some other arguments are provided else: uu.exception_log('Invalid GADM tile index shapefile provided. Please provide a valid shapefile.') # Creates 1x1 degree tiles of plantation growth wherever there are plantations. # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that # 1x1 country extent tile. # 55 processors seems to use about 350 GB of memory, which seems fine. But there was some error about "PQconnectdb failed-- sorry, too many clients already". # So, moved the number of processors down to 48. # For multiprocessor use processes = 48 uu.print_log('Create 1x1 plantation from 1x1 gadm max processors=', processes) pool = Pool(processes) pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm, gadm_list_1x1) pool.close() pool.join() # # Creates 1x1 degree tiles of plantation growth wherever there are plantations # # For single processor use # for tile in gadm_list_1x1: # # plantation_preparation.create_1x1_plantation(tile) # Creates a shapefile in which each feature is the extent of a plantation extent tile. # This index shapefile can be used the next time this process is run if starting with Entry Point 3. os.system('''gdaltindex {0}_{1}.shp plant_gain_*.tif'''.format(cn.pattern_plant_1x1_index, uu.date_time_today)) cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) ### Entry point 3 # If a shapefile of the extents of 1x1 planted forest tiles is provided. # This is the part that actually creates the sequestration rate and forest type tiles. if cn.pattern_plant_1x1_index in args.planted_tile_index: uu.print_log("Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth rate and forest type tiles...") # Copies the shapefile of 1x1 tiles of extent of planted forests cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(planted_index_shp), '--recursive'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Gets the attribute table of the planted forest extent 1x1 tile shapefile gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0] # Converts the attribute table to a dataframe dbf = Dbf5(gadm) df = dbf.to_dataframe() # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list planted_list_1x1 = df['location'].tolist() planted_list_1x1 = [str(y) for y in planted_list_1x1] uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1) uu.print_log("There are", len(planted_list_1x1), "1x1 planted forest extent tiles to iterate through.") # Creates 1x1 degree tiles of plantation growth and type wherever there are plantations. # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run # of this script), it does not need to check whether there are planted forests in this tile. It goes directly # to intersecting the planted forest table with the 1x1 tile. # For single processor use #for tile in planted_list_1x1: # plantation_preparation.create_1x1_plantation_growth_from_1x1_planted(tile) # For multiprocessor use # processes=40 uses about 360 GB of memory. Works on r4.16xlarge with space to spare # processes=52 uses about 465 GB of memory (quite stably), so this is basically the max. num_of_processes = 52 pool = Pool(num_of_processes) pool.map(plantation_preparation.create_1x1_plantation_growth_from_1x1_planted, planted_list_1x1) pool.close() pool.join() # This works with 50 processors on an r4.16xlarge marchine. Uses about 430 GB out of 480 GB. num_of_processes = 52 pool = Pool(num_of_processes) processes = 50 uu.print_log('Create 1x1 plantation type max processors=', processes) pool = Pool(processes) pool.map(plantation_preparation.create_1x1_plantation_type_from_1x1_planted, planted_list_1x1) pool.close() pool.join() # This rasterizes the plantation removal factor standard deviations # processes=50 peaks at about 450 GB num_of_processes = 50 pool = Pool(num_of_processes) pool.map(plantation_preparation.create_1x1_plantation_stdev_from_1x1_planted, planted_list_1x1) pool.close() pool.join()
def create_climate_zone_tiles(tile_id): # Start time start = datetime.datetime.now() uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) # Makes a 10x10 degree chunk of the global climate zone raster conform to Hansen tile properties. # Rather than the usual 40000x1 windows, this creates 1024x1024 windows for filling in missing values (see below). # The output of gdalwarp ("climate_zone_intermediate") is not used anywhere else. uu.print_log("Warping climate zone tile", tile_id) cmd = [ 'gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-ot', 'Byte', '-overwrite', '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=1024', '-co', 'BLOCKYSIZE=1024', cn.climate_zone_raw, '{0}_{1}.tif'.format(tile_id, "climate_zone_intermediate") ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Fills in empty pixels in the climate zone raster with whatever value is most common (mode) in its 1024x1024 pixel window. # That is, any 1024x1024 processing window that has >=1 climate zone pixel in it will have its empty pixels filled in # with whatever value is most common in that window. # This extends the climate zone raster out into coastal areas and better covers coasts/islands, meaning that more # loss pixels will have climate zone pixels available to them during emissions processing. # Everything from here down is used to assign pixels without climate zone to a climate zone in the 1024x1024 windows. uu.print_log("Re-tiling climate zone for tile", tile_id) # Opens climate zone tile climate_zone_src = rasterio.open("{0}_{1}.tif".format( tile_id, "climate_zone_intermediate")) # Grabs metadata about the tif, like its location/projection/cellsize kwargs = climate_zone_src.meta # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory windows = climate_zone_src.block_windows(1) # Updates kwargs for the output dataset. kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) # Output file name climate_zone_processed = '{0}_{1}.tif'.format(tile_id, cn.pattern_climate_zone) # The output file: climate zone with empty pixels filled in dst_climate_zone = rasterio.open(climate_zone_processed, 'w', **kwargs) # Iterates across the windows (1024 x 1024 pixel boxes) of the input tile. for idx, window in windows: # Creates window for input raster climate_zone_window = climate_zone_src.read(1, window=window) # Turns the 2D array into a 1D array that is n x n long. # This makes to easier to remove 0s and find the mode of the remaining climate zone codes climate_zone_flat = climate_zone_window.flatten() # Removes all zeros from the array, leaving just pixels with climate zone codes non_zeros = np.delete(climate_zone_flat, np.where(climate_zone_flat == 0)) # If there were only pixels without climate zone codes in the array, the mode is assigned 0 if non_zeros.size < 1: mode = 0 # If there were pixels with climate zone codes, the mode is the most common code among those in the window else: mode = stats.mode(non_zeros)[0] # Assigns all pixels without a climate zone code in that window to that most common code climate_zone_window[climate_zone_window == 0] = mode # Writes the output window to the output. # Although the windows for the input tiles are 1024 x 1024 pixels, # the windows for these output files are 40000 x 1 pixels, like all the other tiles in this model, # so they should work fine with all the other tiles. dst_climate_zone.write_band(1, climate_zone_window, window=window) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, cn.pattern_climate_zone)
def main(): # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # Files to download for this script. download_dict = { cn.gain_dir: [cn.pattern_gain], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] } # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles # List of output directories and output file name patterns output_dir_list = [ cn.US_annual_gain_AGB_natrl_forest_dir, cn.US_annual_gain_BGB_natrl_forest_dir ] output_pattern_list = [ cn.pattern_US_annual_gain_AGB_natrl_forest, cn.pattern_US_annual_gain_BGB_natrl_forest ] # By definition, this script is for US-specific removals sensit_type = 'US_removals' # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US. FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir) # Only creates FIA region tiles if they don't already exist on s3. if FIA_regions_tile_count == 16: uu.print_log("FIA region tiles already created. Copying to s3 now...") uu.s3_flexible_download(cn.FIA_regions_processed_dir, cn.pattern_FIA_regions_processed, cn.docker_base_dir, 'std', 'all') else: uu.print_log( "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..." ) uu.s3_file_download( os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, 'std') cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Converts the region shapefile to Hansen tiles pool = multiprocessing.Pool(int(cn.count / 2)) pool.map(US_removal_rates.prep_FIA_regions, tile_id_list) # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script. US_tile_list = uu.tile_list_spot_machine( cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed)) US_tile_id_list = [i[0:8] for i in US_tile_list] # US_tile_id_list = ['50N_130W'] # For testing uu.print_log(US_tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(US_tile_id_list))) + "\n") # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US. US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir) # Only creates FIA forest age category tiles if they don't already exist on s3. if US_age_tile_count == 16: uu.print_log( "Forest age category tiles already created. Copying to spot machine now..." ) uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir, cn.pattern_US_forest_age_cat_processed, '', 'std', US_tile_id_list) else: uu.print_log( "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..." ) uu.s3_file_download( os.path.join(cn.US_forest_age_cat_raw_dir, cn.name_US_forest_age_cat_raw), cn.docker_base_dir, 'std') # Converts the national forest age category raster to Hansen tiles source_raster = cn.name_US_forest_age_cat_raw out_pattern = cn.pattern_US_forest_age_cat_processed dt = 'Int16' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), US_tile_id_list) uu.upload_final_set(cn.US_forest_age_cat_processed_dir, cn.pattern_US_forest_age_cat_processed) # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US. FIA_forest_group_tile_count = uu.count_tiles_s3( cn.FIA_forest_group_processed_dir) # Only creates FIA forest group tiles if they don't already exist on s3. if FIA_forest_group_tile_count == 16: uu.print_log( "FIA forest group tiles already created. Copying to spot machine now..." ) uu.s3_flexible_download(cn.FIA_forest_group_processed_dir, cn.pattern_FIA_forest_group_processed, '', 'std', US_tile_id_list) else: uu.print_log( "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..." ) uu.s3_file_download( os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, 'std') # Converts the national forest group raster to Hansen forest group tiles source_raster = cn.name_FIA_forest_group_raw out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), US_tile_id_list) uu.upload_final_set(cn.FIA_forest_group_processed_dir, cn.pattern_FIA_forest_group_processed) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, US_tile_id_list) # Table with US-specific removal rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Imports the table with the region-group-age AGB removal rates gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate), sheet_name="US_rates_for_model") # Converts gain table from wide to long, so each region-group-age category has its own row gain_table_group_region_by_age = pd.melt( gain_table, id_vars=['FIA_region_code', 'forest_group_code'], value_vars=['growth_young', 'growth_middle', 'growth_old']) gain_table_group_region_by_age = gain_table_group_region_by_age.dropna() # In the forest age category raster, each category has this value age_dict = { 'growth_young': 1000, 'growth_middle': 2000, 'growth_old': 3000 } # Creates a unique value for each forest group-region-age category in the table. # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for # pixels that have Hansen gain (see below). gain_table_group_region_age = gain_table_group_region_by_age.replace( {"variable": age_dict}) gain_table_group_region_age[ 'age_cat'] = gain_table_group_region_age['variable'] * 10 gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \ gain_table_group_region_age['forest_group_code']*100 + \ gain_table_group_region_age['FIA_region_code'] # Converts the forest group-region-age codes and corresponding gain rates to a dictionary, # where the key is the unique group-region-age code and the value is the AGB removal rate. gain_table_group_region_age_dict = pd.Series( gain_table_group_region_age.value.values, index=gain_table_group_region_age.group_region_age_combined).to_dict() uu.print_log(gain_table_group_region_age_dict) # Creates a unique value for each forest group-region category using just young forest rates. # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the # forest age category raster. gain_table_group_region = gain_table_group_region_age.drop( gain_table_group_region_age[ gain_table_group_region_age.age_cat != 10000].index) gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \ gain_table_group_region['FIA_region_code'] # Converts the forest group-region codes and corresponding gain rates to a dictionary, # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate. gain_table_group_region_dict = pd.Series( gain_table_group_region.value.values, index=gain_table_group_region.group_region_combined).to_dict() uu.print_log(gain_table_group_region_dict) # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial( US_removal_rates.US_removal_rate_calc, gain_table_group_region_age_dict=gain_table_group_region_age_dict, gain_table_group_region_dict=gain_table_group_region_dict, output_pattern_list=output_pattern_list, sensit_type=sensit_type), US_tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in US_tile_id_list: # # US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict, # output_pattern_list, sensit_type) # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_create_inputs_for_C_pools(tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) sensit_type = 'std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.model_extent_dir, sensit_type) # List of output directories and output file name patterns output_dir_list = [ cn.bor_tem_trop_processed_dir, cn.elevation_processed_dir, cn.precip_processed_dir ] output_pattern_list = [ cn.pattern_bor_tem_trop_processed, cn.pattern_elevation, cn.pattern_precip ] # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Downloads two of the raw input files for creating carbon emitted_pools input_files = [cn.fao_ecozone_raw_dir, cn.precip_raw_dir] for input in input_files: uu.s3_file_download('{}'.format(input), cn.docker_base_dir, sensit_type) uu.print_log( "Unzipping boreal/temperate/tropical file (from FAO ecozones)") cmd = [ 'unzip', '{}'.format(cn.pattern_fao_ecozone_raw), '-d', cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.print_log("Copying elevation (srtm) files") uu.s3_folder_download(cn.srtm_raw_dir, './srtm', sensit_type) uu.print_log("Making elevation (srtm) vrt") check_call( 'gdalbuildvrt srtm.vrt srtm/*.tif', shell=True ) # I don't know how to convert this to output to the pipe, so just leaving as is # Worked with count/3 on an r4.16xlarge (140 out of 480 GB used). I think it should be fine with count/2 but didn't try it. processes = int(cn.count / 2) uu.print_log('Inputs for C emitted_pools max processors=', processes) pool = multiprocessing.Pool(processes) pool.map(create_inputs_for_C_pools.create_input_files, tile_id_list) # # For single processor use # for tile_id in tile_id_list: # # create_inputs_for_C_pools.create_input_files(tile_id) uu.print_log("Uploading output files") for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def main(): no_upload = False sensit_type = "legal_Amazon_loss" # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) Brazil_stages = ['all', 'create_forest_extent', 'create_loss'] # The argument for what kind of model run is being done: standard conditions or a sensitivity analysis run parser = argparse.ArgumentParser( description= 'Create tiles of forest extent in legal Amazon in 2000 and annual loss according to PRODES' ) parser.add_argument( '--stages', '-s', required=True, help= 'Stages of creating Brazil legal Amazon-specific gross cumulative removals. Options are {}' .format(Brazil_stages)) parser.add_argument( '--run_through', '-r', required=True, help= 'Options: true or false. true: run named stage and following stages. false: run only named stage.' ) args = parser.parse_args() stage_input = args.stages run_through = args.run_through # Checks the validity of the two arguments. If either one is invalid, the script ends. if (stage_input not in Brazil_stages): uu.exception_log( no_upload, 'Invalid stage selection. Please provide a stage from', Brazil_stages) else: pass if (run_through not in ['true', 'false']): uu.exception_log( no_upload, 'Invalid run through option. Please enter true or false.') else: pass actual_stages = uu.analysis_stages(Brazil_stages, stage_input, run_through, sensit_type) uu.print_log(actual_stages) # By definition, this script is for US-specific removals sensit_type = 'legal_Amazon_loss' # List of output directories and output file name patterns master_output_dir_list = [ cn.Brazil_forest_extent_2000_processed_dir, cn.Brazil_annual_loss_processed_dir ] master_output_pattern_list = [ cn.pattern_Brazil_forest_extent_2000_processed, cn.pattern_Brazil_annual_loss_processed ] # Creates forest extent 2000 raster from multiple PRODES forest extent rasters ###NOTE: Didn't redo this for model v1.2.0, so I don't know if it still works. if 'create_forest_extent' in actual_stages: uu.print_log('Creating forest extent tiles') # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them uu.s3_folder_download(cn.Brazil_forest_extent_2000_raw_dir, cn.docker_base_dir, sensit_type) raw_forest_extent_inputs = glob.glob( '*_AMZ_warped_*tif') # The list of tiles to merge # Gets the resolution of a more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('*2019_AMZ_warped_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(pixelSizeX) uu.print_log(pixelSizeY) # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases. cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), raw_forest_extent_inputs[0], raw_forest_extent_inputs[1], raw_forest_extent_inputs[2], raw_forest_extent_inputs[3], raw_forest_extent_inputs[4], raw_forest_extent_inputs[5] ] uu.log_subprocess_output_full(cmd) # Uploads the merged forest extent raster to s3 for future reference uu.upload_final_set(cn.Brazil_forest_extent_2000_merged_dir, cn.pattern_Brazil_forest_extent_2000_merged) # Creates legal Amazon extent 2000 tiles source_raster = '{}.tif'.format( cn.pattern_Brazil_forest_extent_2000_merged) out_pattern = cn.pattern_Brazil_forest_extent_2000_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = master_output_dir_list[0] pattern = master_output_pattern_list[0] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates annual loss raster for 2001-2019 from multiples PRODES rasters if 'create_loss' in actual_stages: uu.print_log('Creating annual PRODES loss tiles') tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input rasters and lists them cmd = [ 'aws', 's3', 'cp', cn.Brazil_annual_loss_raw_dir, '.', '--recursive' ] uu.log_subprocess_output_full(cmd) uu.print_log( "Input loss rasters downloaded. Getting resolution of recent raster..." ) # Gets the resolution of the more recent PRODES raster, which has a higher resolution. The merged output matches that. raw_forest_extent_input_2019 = glob.glob('Prodes2019_*tif') prodes_2019 = gdal.Open(raw_forest_extent_input_2019[0]) transform_2019 = prodes_2019.GetGeoTransform() pixelSizeX = transform_2019[1] pixelSizeY = -transform_2019[5] uu.print_log(" Recent raster resolution: {0} by {1}".format( pixelSizeX, pixelSizeY)) # This merges both loss rasters together, so it takes a lot of memory and time. It seems to max out # at about 180 GB, then go back to 0. # This took about 8 minutes. uu.print_log( "Merging input loss rasters into a composite for all years...") cmd = [ 'gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), 'Prodes2019_annual_loss_2008_2019.tif', 'Prodes2014_annual_loss_2001_2007.tif' ] uu.log_subprocess_output_full(cmd) uu.print_log(" Loss rasters combined into composite") # Uploads the merged loss raster to s3 for future reference uu.upload_final_set(cn.Brazil_annual_loss_merged_dir, cn.pattern_Brazil_annual_loss_merged) # Creates annual loss 2001-2015 tiles uu.print_log("Warping composite PRODES loss to Hansen tiles...") source_raster = '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged) out_pattern = cn.pattern_Brazil_annual_loss_processed dt = 'Byte' pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) uu.print_log(" PRODES composite loss raster warped to Hansen tiles") # Checks if each tile has data in it. Only tiles with data are uploaded. # In practice, every Amazon tile has loss in it but I figured I'd do this just to be thorough. upload_dir = master_output_dir_list[1] pattern = master_output_pattern_list[1] pool = multiprocessing.Pool(cn.count - 5) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list) # Creates forest age category tiles if 'forest_age_category' in actual_stages: uu.print_log('Creating forest age category tiles') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[2] # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # With processes=30, peak usage was about 350 GB using WHRC AGB. # processes=26 maxes out above 480 GB for biomass_swap, so better to use fewer than that. pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(legal_AMZ_loss.legal_Amazon_forest_age_category, sensit_type=sensit_type, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # # legal_AMZ_loss.legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern) # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) # Creates tiles of the number of years of removals if 'gain_year_count' in actual_stages: uu.print_log('Creating gain year count tiles for natural forest') # Files to download for this script. download_dict = { cn.Brazil_annual_loss_processed_dir: [cn.pattern_Brazil_annual_loss_processed], cn.gain_dir: [cn.pattern_gain], cn.WHRC_biomass_2000_non_mang_non_planted_dir: [cn.pattern_WHRC_biomass_2000_non_mang_non_planted], cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.Brazil_forest_extent_2000_processed_dir: [cn.pattern_Brazil_forest_extent_2000_processed] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list) output_pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only, sensit_type=sensit_type), tile_id_list) pool.map( partial( legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change, sensit_type=sensit_type), tile_id_list) pool.map( partial(legal_AMZ_loss. legal_Amazon_create_gain_year_count_loss_and_gain_standard, sensit_type=sensit_type), tile_id_list) pool = multiprocessing.Pool( int(cn.count / 8) ) # count/5 uses more than 160GB of memory. count/8 uses about 120GB of memory. pool.map( partial(legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge, output_pattern=output_pattern), tile_id_list) # # For single processor use # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type) # # for tile_id in tile_id_list: # legal_AMZ_loss.legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern) # Intermediate output tiles for checking outputs uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_gain_only") uu.upload_final_set(stage_output_dir_list[3], "growth_years_no_change") uu.upload_final_set(stage_output_dir_list[3], "growth_years_loss_and_gain") # Uploads output from this stage uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) # Creates tiles of annual AGB and BGB gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'annual_removals' in actual_stages: uu.print_log('Creating annual removals for natural forest') # Files to download for this script. download_dict = { cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.plant_pre_2000_processed_dir: [cn.pattern_plant_pre_2000] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (annual removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[4:6]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[4:6]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Table 4.9 default gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel( "{}".format(cn.gain_spreadsheet), sheet_name="natrl fores gain, for std model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts gain table from wide to long, so each continent-ecozone-age category has its own row gain_table_cont_eco_age = pd.melt(gain_table_simplified, id_vars=['gainEcoCon'], value_vars=[ 'growth_primary', 'growth_secondary_greater_20', 'growth_secondary_less_20' ]) gain_table_cont_eco_age = gain_table_cont_eco_age.dropna() # Creates a table that has just the continent-ecozone combinations for adding to the dictionary. # These will be used whenever there is just a continent-ecozone pixel without a forest age pixel. # Assigns removal rate of 0 when there's no age category. gain_table_con_eco_only = gain_table_cont_eco_age gain_table_con_eco_only = gain_table_con_eco_only.drop_duplicates( subset='gainEcoCon', keep='first') gain_table_con_eco_only['value'] = 0 gain_table_con_eco_only['cont_eco_age'] = gain_table_con_eco_only[ 'gainEcoCon'] # Creates a code for each age category so that each continent-ecozone-age combo can have its own unique value age_dict = { 'growth_primary': 10000, 'growth_secondary_greater_20': 20000, 'growth_secondary_less_20': 30000 } # Creates a unique value for each continent-ecozone-age category gain_table_cont_eco_age = gain_table_cont_eco_age.replace( {"variable": age_dict}) gain_table_cont_eco_age['cont_eco_age'] = gain_table_cont_eco_age[ 'gainEcoCon'] + gain_table_cont_eco_age['variable'] # Merges the table of just continent-ecozone codes and the table of continent-ecozone-age codes gain_table_all_combos = pd.concat( [gain_table_con_eco_only, gain_table_cont_eco_age]) # Converts the continent-ecozone-age codes and corresponding gain rates to a dictionary gain_table_dict = pd.Series( gain_table_all_combos.value.values, index=gain_table_all_combos.cont_eco_age).to_dict() # Adds a dictionary entry for where the ecozone-continent-age code is 0 (not in a continent) gain_table_dict[0] = 0 # Adds a dictionary entry for each forest age code for pixels that have forest age but no continent-ecozone for key, value in age_dict.items(): gain_table_dict[value] = 0 # Converts all the keys (continent-ecozone-age codes) to float type gain_table_dict = { float(key): value for key, value in gain_table_dict.items() } uu.print_log(gain_table_dict) # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # processes=24 peaks at about 440 GB of memory on an r4.16xlarge machine output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(annual_gain_rate_natrl_forest.annual_gain_rate, sensit_type=sensit_type, gain_table_dict=gain_table_dict, output_pattern_list=output_pattern_list), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # # annual_gain_rate_natrl_forest.annual_gain_rate(tile, sensit_type, gain_table_dict, stage_output_pattern_list) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of cumulative AGCO2 and BGCO2 gain rate for non-mangrove, non-planted forest using the standard model # removal function if 'cumulative_removals' in actual_stages: uu.print_log('Creating cumulative removals for natural forest') # Files to download for this script. download_dict = { cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.gain_year_count_natrl_forest_dir: [cn.pattern_gain_year_count_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[6:8]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[6:8]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Calculates cumulative aboveground carbon gain in non-mangrove planted forests output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_AGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) # Calculates cumulative belowground carbon gain in non-mangrove planted forests pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(cumulative_gain_natrl_forest.cumulative_gain_BGCO2, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_AGCO2(tile_id, stage_output_pattern_list[0], sensit_type) # # for tile_id in tile_id_list: # cumulative_gain_natrl_forest.cumulative_gain_BGCO2(tile_id, stage_output_pattern_list[1], sensit_type) # Uploads outputs from this stage for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates tiles of annual gain rate and cumulative removals for all forest types (above + belowground) if 'removals_merged' in actual_stages: uu.print_log( 'Creating annual and cumulative removals for all forest types combined (above + belowground)' ) # Files to download for this script download_dict = { cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], cn.annual_gain_BGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_BGB_planted_forest_non_mangrove], cn.annual_gain_BGB_natrl_forest_dir: [cn.pattern_annual_gain_BGB_natrl_forest], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.cumul_gain_BGCO2_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_mangrove], cn.cumul_gain_BGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_BGCO2_planted_forest_non_mangrove], cn.cumul_gain_BGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_BGCO2_natrl_forest] } tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # If the model run isn't the standard one, the output directory and file names are changed. # This adapts just the relevant items in the output directory and pattern lists (cumulative removals). if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs(sensit_type, master_output_dir_list[8:10]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[8:10]) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # For multiprocessing output_pattern_list = stage_output_pattern_list pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(merge_cumulative_annual_gain_all_forest_types.gain_merge, output_pattern_list=output_pattern_list, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # merge_cumulative_annual_gain_all_forest_types.gain_merge(tile_id, output_pattern_list, sensit_type) # Uploads output tiles to s3 for i in range(0, len(stage_output_dir_list)): uu.upload_final_set(stage_output_dir_list[i], stage_output_pattern_list[i]) # Creates carbon emitted_pools in loss year if 'carbon_pools' in actual_stages: uu.print_log('Creating emissions year carbon emitted_pools') # Specifies that carbon emitted_pools are created for loss year rather than in 2000 extent = 'loss' # Files to download for this script download_dict = { cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000], cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed], cn.precip_processed_dir: [cn.pattern_precip], cn.elevation_processed_dir: [cn.pattern_elevation], cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], cn.gain_dir: [cn.pattern_gain], cn.cumul_gain_AGCO2_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_mangrove], cn.cumul_gain_AGCO2_planted_forest_non_mangrove_dir: [cn.pattern_cumul_gain_AGCO2_planted_forest_non_mangrove], cn.cumul_gain_AGCO2_natrl_forest_dir: [cn.pattern_cumul_gain_AGCO2_natrl_forest], cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], cn.annual_gain_AGB_planted_forest_non_mangrove_dir: [cn.pattern_annual_gain_AGB_planted_forest_non_mangrove], cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults] } # Adds the correct AGB tiles to the download dictionary depending on the model run if sensit_type == 'biomass_swap': download_dict[cn.JPL_processed_dir] = [ cn.pattern_JPL_unmasked_processed ] else: download_dict[cn.WHRC_biomass_2000_unmasked_dir] = [ cn.pattern_WHRC_biomass_2000_unmasked ] # Adds the correct loss tile to the download dictionary depending on the model run if sensit_type == 'legal_Amazon_loss': download_dict[cn.Brazil_annual_loss_processed_dir] = [ cn.pattern_Brazil_annual_loss_processed ] else: download_dict[cn.loss_dir] = [''] tile_id_list = uu.tile_list_s3( cn.Brazil_forest_extent_2000_processed_dir) # tile_id_list = ['00N_050W'] uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log( "Changing output directory and file name pattern based on sensitivity analysis" ) stage_output_dir_list = uu.alter_dirs( sensit_type, master_output_dir_list[10:16]) stage_output_pattern_list = uu.alter_patterns( sensit_type, master_output_pattern_list[10:16]) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) pd.options.mode.chained_assignment = None # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') mang_BGB_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.below_to_above_trop_dry_mang, cn.below_to_above_trop_wet_mang, cn.below_to_above_subtrop_mang) mang_deadwood_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.deadwood_to_above_trop_dry_mang, cn.deadwood_to_above_trop_wet_mang, cn.deadwood_to_above_subtrop_mang) mang_litter_AGB_ratio = create_carbon_pools.mangrove_pool_ratio_dict( gain_table_simplified, cn.litter_to_above_trop_dry_mang, cn.litter_to_above_trop_wet_mang, cn.litter_to_above_subtrop_mang) if extent == 'loss': uu.print_log( "Creating tiles of emitted aboveground carbon (carbon 2000 + carbon accumulation until loss year)" ) # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 410-415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_emitted_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_emitted_AGC(tile_id, stage_output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) elif extent == '2000': uu.print_log("Creating tiles of aboveground carbon in 2000") # 16 processors seems to use more than 460 GB-- I don't know exactly how much it uses because I stopped it at 460 # 14 processors maxes out at 415 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[0] pool = multiprocessing.Pool(processes=14) pool.map( partial(create_carbon_pools.create_2000_AGC, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_2000_AGC(tile_id, output_pattern_list[0], sensit_type) uu.upload_final_set(stage_output_dir_list[0], stage_output_pattern_list[0]) else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of belowground carbon") # 18 processors used between 300 and 400 GB memory, so it was okay on a r4.16xlarge spot machine # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[1] pool = multiprocessing.Pool(int(cn.count / 2)) pool.map( partial(create_carbon_pools.create_BGC, mang_BGB_AGB_ratio=mang_BGB_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_BGC(tile_id, mang_BGB_AGB_ratio, extent, stage_output_pattern_list[1], sensit_type) uu.upload_final_set(stage_output_dir_list[1], stage_output_pattern_list[1]) uu.print_log("Creating tiles of deadwood carbon") # processes=16 maxes out at about 430 GB # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[2] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_deadwood, mang_deadwood_AGB_ratio=mang_deadwood_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_deadwood(tile_id, mang_deadwood_AGB_ratio, extent, stage_output_pattern_list[2], sensit_type) uu.upload_final_set(stage_output_dir_list[2], stage_output_pattern_list[2]) uu.print_log("Creating tiles of litter carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[3] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_litter, mang_litter_AGB_ratio=mang_litter_AGB_ratio, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_litter(tile_id, mang_litter_AGB_ratio, extent, stage_output_pattern_list[3], sensit_type) uu.upload_final_set(stage_output_dir_list[3], stage_output_pattern_list[3]) if extent == 'loss': uu.print_log("Creating tiles of soil carbon") # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[4] pool = multiprocessing.Pool(int(cn.count / 3)) pool.map( partial(create_carbon_pools.create_soil, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_soil(tile_id, stage_output_pattern_list[4], sensit_type) uu.upload_final_set(stage_output_dir_list[4], stage_output_pattern_list[4]) elif extent == '2000': uu.print_log("Skipping soil for 2000 carbon pool calculation") else: uu.exception_log(no_upload, "Extent argument not valid") uu.print_log("Creating tiles of total carbon") # I tried several different processor numbers for this. Ended up using 14 processors, which used about 380 GB memory # at peak. Probably could've handled 16 processors on an r4.16xlarge machine but I didn't feel like taking the time to check. # Creates a single filename pattern to pass to the multiprocessor call pattern = stage_output_pattern_list[5] pool = multiprocessing.Pool(int(cn.count / 4)) pool.map( partial(create_carbon_pools.create_total_C, extent=extent, pattern=pattern, sensit_type=sensit_type), tile_id_list) pool.close() pool.join() # # For single processor use # for tile_id in tile_id_list: # create_carbon_pools.create_total_C(tile_id, extent, stage_output_pattern_list[5], sensit_type) uu.upload_final_set(stage_output_dir_list[5], stage_output_pattern_list[5])