def rasterize_pre_2000_plantations(tile_id): # Start time start = datetime.datetime.now() uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000) cmd = [ 'gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot', 'Byte', '-a_nodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '{}.shp'.format(cn.pattern_plant_pre_2000_raw), out_tile ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, cn.pattern_plant_pre_2000)
def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern): uu.print_log( "Merging loss, gain, no change, and loss/gain pixels into single raster for {}" .format(tile_id)) # start time start = datetime.datetime.now() # The four rasters from above that are to be merged loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format( tile_id) # All four components are merged together to the final output raster age_outfile = '{}_{}.tif'.format(tile_id, output_pattern) cmd = [ 'gdal_merge.py', '-o', age_outfile, loss_outfilename, no_change_outfilename, loss_and_gain_outfilename, '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_pattern)
def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type): # start time start = datetime.datetime.now() date = datetime.datetime.now() date_formatted = date.strftime("%Y_%m_%d") uu.print_log(sensit_aggreg_flux) uu.print_log(std_aggreg_flux) # This produces errors about dividing by 0. As far as I can tell, those are fine. It's just trying to divide NoData # pixels by NoData pixels, and it doesn't affect the output. # For model v1.2.0, this kept producing incorrect values for the biomass_swap analysis. I don't know why. I ended # up just using raster calculator in ArcMap to create the percent diff raster for biomass_swap. It worked # fine for all the other analyses, though (including legal_Amazon_loss). # Maybe that divide by 0 is throwing off other values now. perc_diff_calc = '--calc=(A-B)/absolute(B)*100' perc_diff_outfilename = '{0}_{1}_{2}.tif'.format( cn.pattern_aggreg_sensit_perc_diff, sensit_type, date_formatted) perc_diff_outfilearg = '--outfile={}'.format(perc_diff_outfilename) # cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, # '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--quiet'] cmd = [ 'gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, '--overwrite', '--co', 'COMPRESS=LZW', '--quiet' ] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, 'global', sensit_aggreg_flux)
def legal_Amazon_create_gain_year_count_loss_and_gain_standard( tile_id, sensit_type): uu.print_log("Gain year count for loss and gain pixels:", tile_id) # start time start = datetime.datetime.now() # Names of the loss, gain and tree cover density tiles loss, gain, extent, biomass = tile_names(tile_id, sensit_type) # Pixels with both loss and gain, and in PRODES forest 2000 loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C==1)*((A-1)+({}+1-A)/2))'.format( cn.loss_years) loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format( tile_id) loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = [ 'gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): uu.print_log("Gain year count for non-loss pixels:", tile_id) # start time start = datetime.datetime.now() # Names of the loss, gain and tree cover density tiles loss, gain, extent, biomass = tile_names(tile_id, sensit_type) # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it # to a vrt so that the 0 pixels are recognized. loss_vrt = '{}_loss.vrt'.format(tile_id) os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss)) # Pixels with loss but in areas with PRODES forest 2000 and biomass >0 (same as standard model) no_change_calc = '--calc=(A==0)*(B==1)*(C>0)*{}'.format(cn.loss_years) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = [ 'gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change')
def create_mangrove_soil_C(tile_id, no_upload): # Start time start = datetime.datetime.now() # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile. if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)): uu.print_log("Mangrove aboveground biomass tile found for", tile_id) uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) uu.print_log("Clipping mangrove soil C from mangrove soil vrt for", tile_id) uu.warp_to_Hansen('mangrove_soil_C.vrt', '{0}_mangrove_full_extent.tif'.format(tile_id), xmin, ymin, xmax, ymax, 'Int16') mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id) mangrove_biomass = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000) outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id) out = '--outfile={}'.format(outname) calc = '--calc=A*(B>0)' datatype = '--type={}'.format('Int16') uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id) cmd = ['gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', datatype, '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No mangrove aboveground biomass tile for", tile_id) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove', no_upload)
def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type, no_upload): uu.print_log( "Gain year count for pixels without loss for legal_Amazon_loss:", tile_id) # Names of the loss, gain and tree cover density tiles loss, gain, model_extent = tile_names(tile_id, sensit_type) # start time start = datetime.datetime.now() # For unclear reasons, gdal_calc doesn't register the 0 (NoData) pixels in the loss tile, so I have to convert it # to a vrt so that the 0 pixels are recognized. # This was the case with PRODES loss in model v.1.1.2. loss_vrt = '{}_loss.vrt'.format(tile_id) os.system('gdalbuildvrt -vrtnodata None {0} {1}'.format(loss_vrt, loss)) no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = [ 'gdal_calc.py', '-A', loss_vrt, '-B', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] uu.log_subprocess_output_full(cmd) os.remove(loss_vrt) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload)
def main (): no_upload = False # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # The list of tiles to iterate through tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles # tile_id_list = ['00N_110E'] # test tile uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # By definition, this script is for the biomass swap analysis (replacing WHRC AGB with Saatchi/JPL AGB) sensit_type = 'biomass_swap' # Downloads a pan-tropical raster that has the erroneous integer values in the oceans removed uu.s3_file_download(cn.JPL_raw_dir, cn.JPL_raw_name, sensit_type) # Converts the Saatchi AGB vrt to Hansen tiles source_raster = cn.JPL_raw_name out_pattern = cn.pattern_JPL_unmasked_processed dt = 'Float32' pool = multiprocessing.Pool(cn.count-5) # count-5 peaks at 320GB of memory pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.JPL_processed_dir pattern = cn.pattern_JPL_unmasked_processed pool = multiprocessing.Pool(cn.count - 5) # count-5 peaks at 410GB of memory pool.map(partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def create_1x1_plantation_type_from_1x1_planted(tile_1x1): # Gets the bounding coordinates for the 1x1 degree tile coords = tile_1x1.split("_") xmin_1x1 = str(coords[3])[:-4] xmax_1x1 = int(xmin_1x1) + 1 ymax_1x1 = int(coords[2]) ymin_1x1 = ymax_1x1 - 1 uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1) uu.print_log("There are plantations in {}. Converting to raster...".format( tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector cmd = [ 'gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0', '-ot', 'Byte' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout)
def create_1x1_plantation_from_1x1_gadm(tile_1x1): # Gets the bounding coordinates for the 1x1 degree tile coords = tile_1x1.split("_") uu.print_log(coords) xmin_1x1 = str(coords[2])[:-4] xmax_1x1 = int(xmin_1x1) + 1 ymax_1x1 = int(coords[1]) ymin_1x1 = ymax_1x1 - 1 uu.print_log("For", tile_1x1, "-- xmin_1x1:", xmin_1x1, "; xmax_1x1:", xmax_1x1, "; ymin_1x1", ymin_1x1, "; ymax_1x1:", ymax_1x1) # Connects Python to PostGIS using psycopg2. The credentials work on spot machines as they are currently configured # and are based on this: https://github.com/wri/gfw-annual-loss-processing/blob/master/1b_Summary-AOIs-to-TSV/utilities/postgis_util.py creds = {'host': 'localhost', 'user': '******', 'dbname': 'ubuntu'} conn = psycopg2.connect(**creds) cursor = conn.cursor() # Intersects the plantations PostGIS table with the 1x1 tile, then saves any growth rates in that tile as a 1x1 tile # https://gis.stackexchange.com/questions/30267/how-to-create-a-valid-global-polygon-grid-in-postgis # https://stackoverflow.com/questions/48978616/best-way-to-run-st-intersects-on-features-inside-one-table # https://postgis.net/docs/ST_Intersects.html uu.print_log("Checking if {} has plantations in it".format(tile_1x1)) # Does the intersect of the PostGIS table and the 1x1 GADM tile cursor.execute("SELECT growth FROM all_plant WHERE ST_Intersects(all_plant.wkb_geometry, ST_GeogFromText('POLYGON(({0} {1},{2} {1},{2} {3},{0} {3},{0} {1}))'))".format( xmin_1x1, ymax_1x1, xmax_1x1, ymin_1x1)) # A Python list of the output of the intersection, which in this case is a list of features that were successfully intersected. # This is what I use to determine if any PostGIS features were intersected. features = cursor.fetchall() cursor.close() # If any features in the PostGIS table were intersected with the 1x1 GADM tile, then the features in this 1x1 tile # are converted to a planted forest gain rate tile and a plantation type tile if len(features) > 0: uu.print_log("There are plantations in {}. Converting to gain rate and plantation type rasters...".format(tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector # For plantation gain rate cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector # For plantation type cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # If no features in the PostGIS table were intersected with the 1x1 GADM tile, nothing happens. else: uu.print_log("There are no plantations in {}. Not converting to raster.".format(tile_1x1))
def reset_nodata(tile_id): uu.print_log("Changing 0 from NoData to actual value for tile", tile_id) tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_Mekong_loss_processed) cmd = ['gdal_edit.py', '-unsetnodata', tile] uu.print_log("Tile processed")
def loss_in_raster(tile_id, raster_type, output_name, lat, mask): uu.print_log("Calculating loss area for tile id {0}...".format(tile_id)) xmin, ymin, xmax, ymax = uu.coords(tile_id) # start time start = datetime.datetime.now() # Name of the loss time loss_tile = '{0}.tif'.format(tile_id) # The raster that loss is being analyzed inside raster_of_interest = '{0}_{1}.tif'.format(tile_id, raster_type) # Output file name outname = '{0}_{1}.tif'.format(tile_id, output_name) # Only processes the tile if it is inside the latitude band (north of the specified latitude) if ymax > lat and os.path.exists(raster_of_interest): uu.print_log("{} inside latitude band and peat tile exists. Processing tile.".format(tile_id)) # If the user has asked to create just a mask of loss as opposed to the actual output values if mask == "True": calc = '--calc=(A>=1)*(A+1)/(A+1)*B' # If the user has asked to output the actual loss values if mask == "False": # Equation argument for converting emissions from per hectare to per pixel. # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare. calc = '--calc=A*B' # Argument for outputting file out = '--outfile={}'.format(outname) uu.print_log("Masking loss in {} by raster of interest...".format(tile_id)) cmd = ['gdal_calc.py', '-A', loss_tile, '-B', raster_of_interest, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite', '--quiet'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.print_log("{} masked".format(tile_id)) else: uu.print_log("{} outside of latitude band. Skipped tile.".format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_name)
def calc_emissions(tile_id, emitted_pools, sensit_type, folder, no_upload): uu.print_log("Calculating gross emissions for", tile_id, "using", sensit_type, "model type...") start = datetime.datetime.now() # Runs the correct c++ script given the emitted_pools (biomass+soil or soil_only) and model type selected. # soil_only, no_shiftin_ag, and convert_to_grassland have special gross emissions C++ scripts. # The other sensitivity analyses and the standard model all use the same gross emissions C++ script. if (emitted_pools == 'soil_only') & (sensit_type == 'std'): cmd = [ '{0}/calc_gross_emissions_soil_only.exe'.format( cn.c_emis_compile_dst), tile_id, sensit_type, folder ] elif (emitted_pools == 'biomass_soil') & ( sensit_type in ['convert_to_grassland', 'no_shifting_ag']): cmd = [ '{0}/calc_gross_emissions_{1}.exe'.format(cn.c_emis_compile_dst, sensit_type), tile_id, sensit_type, folder ] # This C++ script has an extra argument that names the input carbon emitted_pools and output emissions correctly elif (emitted_pools == 'biomass_soil') & ( sensit_type not in ['no_shifting_ag', 'convert_to_grassland']): cmd = [ '{0}/calc_gross_emissions_generic.exe'.format( cn.c_emis_compile_dst), tile_id, sensit_type, folder ] else: uu.exception_log(no_upload, 'Pool and/or sensitivity analysis option not valid') uu.log_subprocess_output_full(cmd) # Identifies which pattern to use for counting tile completion pattern = cn.pattern_gross_emis_commod_biomass_soil if (emitted_pools == 'biomass_soil') & (sensit_type == 'std'): pattern = pattern elif (emitted_pools == 'biomass_soil') & (sensit_type != 'std'): pattern = pattern + "_" + sensit_type elif emitted_pools == 'soil_only': pattern = pattern.replace('biomass_soil', 'soil_only') else: uu.exception_log(no_upload, 'Pool option not valid') # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, pattern, no_upload)
def mp_mangrove_processing(tile_id_list, run_date=None, no_upload=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.tile_list_s3(cn.pixel_area_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads zipped raw mangrove files uu.s3_file_download( os.path.join(cn.mangrove_biomass_raw_dir, cn.mangrove_biomass_raw_file), cn.docker_base_dir, 'std') # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument) # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that. cmd = ['unzip', '-o', '-j', cn.mangrove_biomass_raw_file] uu.log_subprocess_output_full(cmd) # Creates vrt for the Saatchi biomass rasters mangrove_vrt = 'mangrove_biomass.vrt' os.system('gdalbuildvrt {} *.tif'.format(mangrove_vrt)) # Converts the mangrove AGB vrt into Hansen tiles source_raster = mangrove_vrt out_pattern = cn.pattern_mangrove_biomass_2000 dt = 'float32' processes = int(cn.count / 4) uu.print_log('Mangrove preprocessing max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt, no_upload=no_upload), tile_id_list) # # For single processor use, for testing purposes # for tile_id in tile_id_list: # # mangrove_processing.create_mangrove_tiles(tile_id, source_raster, out_pattern, no_upload) # Checks if each tile has data in it. Only tiles with data are uploaded. upload_dir = cn.mangrove_biomass_2000_dir pattern = cn.pattern_mangrove_biomass_2000 processes = int(cn.count - 5) uu.print_log('Mangrove check for data max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)
def stack_ba_hv(hv_tile): for year in range(2019, 2020): # End year is not included in burn year product # Download hdf files from s3 into folders by h and v output_dir = utilities.makedir('{0}/{1}/raw/'.format(hv_tile, year)) utilities.download_df(year, hv_tile, output_dir) # convert hdf to array hdf_files = glob.glob(output_dir + "*hdf") if len(hdf_files) > 0: array_list = [] for hdf in hdf_files: uu.print_log("converting hdf to array") array = utilities.hdf_to_array(hdf) array_list.append(array) # stack arrays, get 1 raster for the year and tile stacked_year_array = utilities.stack_arrays(array_list) max_stacked_year_array = stacked_year_array.max(0) # convert stacked month arrays to 1 raster for the year template_hdf = hdf_files[0] year_folder = utilities.makedir('{0}/{1}/stacked/'.format( hv_tile, year)) stacked_year_raster = utilities.array_to_raster( hv_tile, year, max_stacked_year_array, template_hdf, year_folder) # upload to s3 cmd = [ 'aws', 's3', 'cp', stacked_year_raster, cn.burn_year_stacked_hv_tif_dir ] uu.log_subprocess_output_full(cmd) # remove files shutil.rmtree(output_dir) else: pass
def create_mangrove_soil_C(tile_id): # Start time start = datetime.datetime.now() # Checks if mangrove biomass exists. If not, it won't create a mangrove soil C tile. if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)): uu.print_log("Mangrove aboveground biomass tile found for", tile_id) uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) uu.print_log("Clipping mangrove soil C from mangrove soil vrt for", tile_id) uu.warp_to_Hansen('mangrove_soil_C.vrt', '{0}_mangrove_full_extent.tif'.format(tile_id), xmin, ymin, xmax, ymax, 'Int16') mangrove_soil = '{0}_mangrove_full_extent.tif'.format(tile_id) mangrove_biomass = '{0}_{1}.tif'.format( tile_id, cn.pattern_mangrove_biomass_2000) outname = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id) out = '--outfile={}'.format(outname) calc = '--calc=A*(B>0)' datatype = '--type={}'.format('Int16') uu.print_log("Masking mangrove soil to mangrove biomass for", tile_id) cmd = [ 'gdal_calc.py', '-A', mangrove_soil, '-B', mangrove_biomass, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', datatype, '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) else: uu.print_log("No mangrove aboveground biomass tile for", tile_id) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'mangrove_masked_to_mangrove')
def recode_tiles(annual_loss): uu.print_log("Recoding loss tile by year") year = int(annual_loss[-8:-4]) uu.print_log(year) if year < 2001 or year > (2000 + cn.loss_years): uu.print_log("Skipping {} because outside of model range".format(year)) return else: calc = '--calc={}*(A==100)'.format(int((year - 2000))) recoded_output = "Mekong_loss_recoded_{}.tif".format(year) outfile = '--outfile={}'.format(recoded_output) cmd = [ 'gdal_calc.py', '-A', annual_loss, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--quiet' ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout)
def output_per_pixel(tile_id, input_pattern, output_pattern, sensit_type): uu.print_log("Calculating per pixel values for", tile_id) # Start time start = datetime.datetime.now() # Names of the input biomass and TCD tiles input_model_tile = '{0}_{1}.tif'.format(tile_id, input_pattern) area_tile = 'hanson_2013_area_{}.tif'.format(tile_id) output_model_tile = '{0}_{1}.tif'.format(tile_id, output_pattern) uu.print_log("Converting {} from Mg CO2/ha to Mg CO2/pixel...".format( input_model_tile)) # Equation argument for converting emissions from per hectare to per pixel. # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare. calc = '--calc=A*B/{}'.format(cn.m2_per_ha) out = '--outfile={}'.format(output_model_tile) cmd = [ 'gdal_calc.py', '-A', input_model_tile, '-B', area_tile, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite', '--quiet' ] uu.log_subprocess_output_full(cmd) uu.print_log( " Per pixel values calculated for {}".format(output_model_tile)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_pattern)
def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type): uu.print_log("Loss and gain pixel processing using standard function:", tile_id) # Names of the loss, gain and tree cover density tiles loss, gain, model_extent = tile_names(tile_id, sensit_type) # start time start = datetime.datetime.now() if os.path.exists(loss): uu.print_log( "Loss tile found for {}. Using it in loss and gain pixel gain year count." .format(tile_id)) loss_and_gain_calc = '--calc=((A>0)*(B==1)*(C>0)*((A-1)+floor(({}+1-A)/2)))'.format( cn.loss_years) loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format( tile_id) loss_and_gain_outfilearg = '--outfile={}'.format( loss_and_gain_outfilename) cmd = [ 'gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] uu.log_subprocess_output_full(cmd) else: uu.print_log( "No loss tile found for {}. Skipping loss and gain pixel gain year count." .format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_and_gain')
def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload): uu.print_log("Gain year count for pixels with neither loss nor gain:", tile_id) # Names of the loss, gain and tree cover density tiles loss, gain, model_extent = tile_names(tile_id, sensit_type) # start time start = datetime.datetime.now() if os.path.exists(loss): uu.print_log("Loss tile found for {}. Using it in no change pixel gain year count.".format(tile_id)) no_change_calc = '--calc=(A==0)*(B==0)*(C>0)*{}'.format(cn.loss_years) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Not using it for no change pixel gain year count.".format(tile_id)) no_change_calc = '--calc=(A==0)*(B>0)*{}'.format(cn.loss_years) no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, no_change_calc, no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_no_change', no_upload)
def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload): uu.print_log("Gain year count for loss only pixels:", tile_id) # start time start = datetime.datetime.now() # Names of the loss, gain and tree cover density tiles loss, gain, model_extent = tile_names(tile_id, sensit_type) if os.path.exists(loss): uu.print_log( "Loss tile found for {}. Using it in loss only pixel gain year count." .format(tile_id)) loss_calc = '--calc=(A>0)*(B==0)*(C>0)*(A-1)' loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) loss_outfilearg = '--outfile={}'.format(loss_outfilename) cmd = [ 'gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet' ] uu.log_subprocess_output_full(cmd) else: uu.print_log( "No loss tile found for {}. Skipping loss only pixel gain year count." .format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, 'growth_years_loss_only', no_upload)
def create_combined_soil_C(tile_id, no_upload): # Start time start = datetime.datetime.now() # Input files mangrove_soil = '{0}_mangrove_masked_to_mangrove.tif'.format(tile_id) mineral_soil = '{0}_{1}.tif'.format(tile_id, cn.pattern_soil_C_full_extent_2000_non_mang) # Output file combined_soil = '{0}_{1}.tif'.format(tile_id, cn.pattern_soil_C_full_extent_2000) # Checks if mangrove AGB tile exists. If not, mangrove soil C is not combined with mineral soil C. if os.path.exists('{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)): uu.print_log("Mangrove aboveground biomass tile found for", tile_id) mangrove_soil_src = rasterio.open(mangrove_soil) # Grabs metadata for one of the input tiles, like its location/projection/cellsize kwargs = mangrove_soil_src.meta # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory windows = mangrove_soil_src.block_windows(1) mineral_soil_src = rasterio.open(mineral_soil) # Updates kwargs for the output dataset. # Need to update data type to float 32 so that it can handle fractional gain rates kwargs.update( driver='GTiff', count=1, compress='lzw', nodata=0 ) # The output file: soil C with mangrove soil C taking precedence over mineral soil C dst_combined_soil = rasterio.open(combined_soil, 'w', **kwargs) uu.print_log("Replacing mineral soil C pixels with mangrove soil C pixels for", tile_id) # Iterates across the windows (1 pixel strips) of the input tiles for idx, window in windows: mangrove_soil_window = mangrove_soil_src.read(1, window=window) mineral_soil_window = mineral_soil_src.read(1, window=window) combined_soil_window = np.where(mangrove_soil_window>0, mangrove_soil_window, mineral_soil_window) dst_combined_soil.write_band(1, combined_soil_window, window=window) else: uu.print_log("No mangrove aboveground biomass tile for", tile_id) # If there is no mangrove soil C tile, the final output of the mineral soil function needs to receive the # correct final name. os.rename('{0}_{1}.tif'.format(tile_id, cn.pattern_soil_C_full_extent_2000_non_mang), combined_soil) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, cn.pattern_soil_C_full_extent_2000, no_upload)
def create_combined_ifl_primary(tile_id): # Start time start = datetime.datetime.now() ifl_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_ifl) primary_tile = '{}_primary_2001.tif'.format(tile_id) ifl_primary_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_ifl_primary) uu.print_log("Getting extent of", tile_id) xmin, ymin, xmax, ymax = uu.coords(tile_id) # Assigns the correct time (primary forest or ifl) if ymax <= 30 and ymax >= -20: uu.print_log( "{} between 30N and 30S. Using primary forest tile.".format( tile_id)) os.rename(primary_tile, ifl_primary_tile) else: uu.print_log( "{} not between 30N and 30S. Using IFL tile, if it exists.".format( tile_id)) if os.path.exists(ifl_tile): os.rename(ifl_tile, ifl_primary_tile) else: uu.print_log("IFL tile does not exist for {}".format(tile_id)) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, cn.pattern_ifl_primary)
def mp_continent_ecozone_tiles(tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list( cn.pattern_WHRC_biomass_2000_non_mang_non_planted, cn.mangrove_biomass_2000_dir) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # if the continent-ecozone shapefile hasn't already been downloaded, it will be downloaded and unzipped uu.s3_file_download(cn.cont_eco_s3_zip, cn.docker_base_dir, 'std') # Unzips ecozone shapefile cmd = ['unzip', cn.cont_eco_zip] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # List of output directories and output file name patterns output_dir_list = [cn.cont_eco_raw_dir, cn.cont_eco_dir] output_pattern_list = [ cn.pattern_cont_eco_raw, cn.pattern_cont_eco_processed ] # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # For multiprocessor use processes = int(cn.count / 4) uu.print_log('Continent-ecozone tile creation max processors=', processes) pool.map(continent_ecozone_tiles.create_continent_ecozone_tiles, tile_id_list) # Uploads the continent-ecozone tile to s3 before the codes are expanded to pixels in 1024x1024 windows that don't have codes. # These are not used for the model. They are for reference and completeness. for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def mp_annual_gain_rate_mangrove(sensit_type, tile_id_list, run_date=None): os.chdir(cn.docker_base_dir) pd.options.mode.chained_assignment = None # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for # calculating mangrove gain mangrove_biomass_tile_list = uu.tile_list_s3( cn.mangrove_biomass_2000_dir) ecozone_tile_list = uu.tile_list_s3(cn.cont_eco_dir) tile_id_list = list( set(mangrove_biomass_tile_list).intersection(ecozone_tile_list)) uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") download_dict = { cn.cont_eco_dir: [cn.pattern_cont_eco_processed], cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000] } # List of output directories and output file name patterns output_dir_list = [ cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir, cn.stdev_annual_gain_AGB_mangrove_dir ] output_pattern_list = [ cn.pattern_annual_gain_AGB_mangrove, cn.pattern_annual_gain_BGB_mangrove, cn.pattern_stdev_annual_gain_AGB_mangrove ] # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found if uu.check_aws_creds(): for key, values in download_dict.items(): dir = key pattern = values[0] uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type, tile_id_list) # Table with IPCC Wetland Supplement Table 4.4 default mangrove gain rates cmd = [ 'aws', 's3', 'cp', os.path.join(cn.gain_spreadsheet_dir, cn.gain_spreadsheet), cn.docker_base_dir ] uu.log_subprocess_output_full(cmd) ### To make the removal factor dictionaries # Imports the table with the ecozone-continent codes and the carbon gain rates gain_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove gain, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) gain_table_simplified = gain_table.drop_duplicates(subset='gainEcoCon', keep='first') # Creates belowground:aboveground biomass ratio dictionary for the three mangrove types, where the keys correspond to # the "mangType" field in the gain rate spreadsheet. # If the assignment of mangTypes to ecozones changes, that column in the spreadsheet may need to change and the # keys in this dictionary would need to change accordingly. type_ratio_dict = { '1': cn.below_to_above_trop_dry_mang, '2': cn.below_to_above_trop_wet_mang, '3': cn.below_to_above_subtrop_mang } type_ratio_dict_final = { int(k): float(v) for k, v in list(type_ratio_dict.items()) } # Applies the belowground:aboveground biomass ratios for the three mangrove types to the annual aboveground gain rates to get # a column of belowground annual gain rates by mangrove type gain_table_simplified['BGB_AGB_ratio'] = gain_table_simplified[ 'mangType'].map(type_ratio_dict_final) gain_table_simplified[ 'BGB_annual_rate'] = gain_table_simplified.AGB_gain_tons_ha_yr * gain_table_simplified.BGB_AGB_ratio # Converts the continent-ecozone codes and corresponding gain rates to dictionaries for aboveground and belowground gain rates gain_above_dict = pd.Series( gain_table_simplified.AGB_gain_tons_ha_yr.values, index=gain_table_simplified.gainEcoCon).to_dict() gain_below_dict = pd.Series( gain_table_simplified.BGB_annual_rate.values, index=gain_table_simplified.gainEcoCon).to_dict() # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent) gain_above_dict[0] = 0 gain_below_dict[0] = 0 # Converts all the keys (continent-ecozone codes) to float type gain_above_dict = { float(key): value for key, value in gain_above_dict.items() } gain_below_dict = { float(key): value for key, value in gain_below_dict.items() } ### To make the removal factor standard deviation dictionary # Imports the table with the ecozone-continent codes and the carbon gain rates stdev_table = pd.read_excel("{}".format(cn.gain_spreadsheet), sheet_name="mangrove stdev, for model") # Removes rows with duplicate codes (N. and S. America for the same ecozone) stdev_table_simplified = stdev_table.drop_duplicates(subset='gainEcoCon', keep='first') # Converts the continent-ecozone codes and corresponding gain rate standard deviations to dictionaries for aboveground and belowground gain rate stdevs stdev_dict = pd.Series( stdev_table_simplified.AGB_gain_stdev_tons_ha_yr.values, index=stdev_table_simplified.gainEcoCon).to_dict() # Adds a dictionary entry for where the ecozone-continent code is 0 (not in a continent) stdev_dict[0] = 0 # Converts all the keys (continent-ecozone codes) to float type stdev_dict = {float(key): value for key, value in stdev_dict.items()} # This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function # It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html # Ran with 18 processors on r4.16xlarge (430 GB memory peak) if cn.count == 96: processes = 20 #26 processors = >740 GB peak; 18 = 550 GB peak; 20 = 610 GB peak; 23 = 700 GB peak; 24 > 750 GB peak else: processes = 4 uu.print_log('Mangrove annual gain rate max processors=', processes) pool = multiprocessing.Pool(processes) pool.map( partial(annual_gain_rate_mangrove.annual_gain_rate, sensit_type=sensit_type, output_pattern_list=output_pattern_list, gain_above_dict=gain_above_dict, gain_below_dict=gain_below_dict, stdev_dict=stdev_dict), tile_id_list) pool.close() pool.join() # # For single processor use # for tile in tile_id_list: # # annual_gain_rate_mangrove.annual_gain_rate(tile, sensit_type, output_pattern_list, # gain_above_dict, gain_below_dict, stdev_dict) for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
'-l', required=True, help= 'List of tile ids to use in the model. Should be of form 00N_110E or 00N_110E,00N_120E or all.' ) parser.add_argument('--run-date', '-d', required=False, help='Date of run. Must be format YYYYMMDD.') args = parser.parse_args() sensit_type = args.model_type tile_id_list = args.tile_id_list run_date = args.run_date # Disables upload to s3 if no AWS credentials are found in environment if not uu.check_aws_creds(): no_upload = True uu.print_log("s3 credentials not found. Uploading to s3 disabled.") # Create the output log uu.initiate_log(tile_id_list=tile_id_list, sensit_type=sensit_type, run_date=run_date) # Checks whether the sensitivity analysis and tile_id_list arguments are valid uu.check_sensit_type(sensit_type) tile_id_list = uu.tile_id_list_check(tile_id_list) mp_annual_gain_rate_mangrove(sensit_type=sensit_type, tile_id_list=tile_id_list, run_date=run_date)
def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, output_pattern_list, no_upload): # Converts the forest age category decision tree output values to the three age categories-- # 10000: primary forest; 20000: secondary forest > 20 years; 30000: secondary forest <= 20 years # These are five digits so they can easily be added to the four digits of the continent-ecozone code to make unique codes # for each continent-ecozone-age combination. # The key in the dictionary is the forest age category decision tree endpoints. age_dict = {0: 0, 1: 10000, 2: 20000, 3: 30000} uu.print_log("Processing:", tile_id) # Start time start = datetime.datetime.now() # Names of the forest age category and continent-ecozone tiles age_cat = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_age_cat_IPCC) cont_eco = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_cont_eco_processed) # Names of the output natural forest gain rate tiles (above and belowground) AGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[0]) BGB_IPCC_default_gain_rate = '{0}_{1}.tif'.format(tile_id, output_pattern_list[1]) AGB_IPCC_default_gain_stdev = '{0}_{1}.tif'.format(tile_id, output_pattern_list[2]) uu.print_log( " Creating IPCC default biomass gain rates and standard deviation for {}" .format(tile_id)) # Opens the input tiles if they exist. kips tile if either input doesn't exist. try: age_cat_src = rasterio.open(age_cat) uu.print_log(" Age category tile found for {}".format(tile_id)) except: return uu.print_log( " No age category tile found for {}. Skipping tile.".format( tile_id)) try: cont_eco_src = rasterio.open(cont_eco) uu.print_log(" Continent-ecozone tile found for {}".format(tile_id)) except: return uu.print_log( " No continent-ecozone tile found for {}. Skipping tile.".format( tile_id)) # Grabs metadata about the continent ecozone tile, like its location/projection/cellsize kwargs = cont_eco_src.meta # Grabs the windows of the tile (stripes) to iterate over the entire tif without running out of memory windows = cont_eco_src.block_windows(1) # Updates kwargs for the output dataset. # Need to update data type to float 32 so that it can handle fractional gain rates kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0, dtype='float32') # The output files, aboveground and belowground biomass gain rates dst_above = rasterio.open(AGB_IPCC_default_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster uu.add_rasterio_tags(dst_above, sensit_type) dst_above.update_tags( units='megagrams aboveground biomass (AGB or dry matter)/ha/yr') dst_above.update_tags( source='IPCC Guidelines 2019 refinement, forest section, Table 4.9') dst_above.update_tags( extent= 'Full model extent, even though these rates will not be used over the full model extent' ) dst_below = rasterio.open(BGB_IPCC_default_gain_rate, 'w', **kwargs) # Adds metadata tags to the output raster uu.add_rasterio_tags(dst_below, sensit_type) dst_below.update_tags( units='megagrams belowground biomass (AGB or dry matter)/ha/yr') dst_below.update_tags( source='IPCC Guidelines 2019 refinement, forest section, Table 4.9') dst_below.update_tags( extent= 'Full model extent, even though these rates will not be used over the full model extent' ) dst_stdev_above = rasterio.open(AGB_IPCC_default_gain_stdev, 'w', **kwargs) # Adds metadata tags to the output raster uu.add_rasterio_tags(dst_stdev_above, sensit_type) dst_stdev_above.update_tags( units= 'standard deviation, in terms of megagrams aboveground biomass (AGB or dry matter)/ha/yr' ) dst_stdev_above.update_tags( source='IPCC Guidelines 2019 refinement, forest section, Table 4.9') dst_stdev_above.update_tags( extent= 'Full model extent, even though these standard deviations will not be used over the full model extent' ) # Iterates across the windows (1 pixel strips) of the input tiles for idx, window in windows: # Creates a processing window for each input raster try: cont_eco_window = cont_eco_src.read(1, window=window) except: cont_eco_window = np.zeros((window.height, window.width), dtype='uint8') try: age_cat_window = age_cat_src.read(1, window=window) except: age_cat_window = np.zeros((window.height, window.width), dtype='uint8') # Recodes the input forest age category array with 10 different decision tree end values into the 3 actual age categories age_recode = np.vectorize(age_dict.get)(age_cat_window) # Adds the age category codes to the continent-ecozone codes to create an array of unique continent-ecozone-age codes cont_eco_age = cont_eco_window + age_recode ## Aboveground removal factors # Converts the continent-ecozone array to float so that the values can be replaced with fractional gain rates gain_rate_AGB = cont_eco_age.astype('float32') # Applies the dictionary of continent-ecozone-age gain rates to the continent-ecozone-age array to # get annual gain rates (metric tons aboveground biomass/yr) for each pixel for key, value in gain_table_dict.items(): gain_rate_AGB[gain_rate_AGB == key] = value # Writes the output window to the output file dst_above.write_band(1, gain_rate_AGB, window=window) ## Belowground removal factors # Calculates belowground annual removal rates gain_rate_BGB = gain_rate_AGB * cn.below_to_above_non_mang # Writes the output window to the output file dst_below.write_band(1, gain_rate_BGB, window=window) ## Aboveground removal factor standard deviation # Converts the continent-ecozone array to float so that the values can be replaced with fractional standard deviations gain_stdev_AGB = cont_eco_age.astype('float32') # Applies the dictionary of continent-ecozone-age gain rate standard deviations to the continent-ecozone-age array to # get annual gain rate standard deviations (metric tons aboveground biomass/yr) for each pixel for key, value in stdev_table_dict.items(): gain_stdev_AGB[gain_stdev_AGB == key] = value # Writes the output window to the output file dst_stdev_above.write_band(1, gain_stdev_AGB, window=window) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, output_pattern_list[0], no_upload)
def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): uu.print_log( "Merging loss, gain, no change, and loss/gain pixels into single raster for {}" .format(tile_id)) # start time start = datetime.datetime.now() # The four rasters from above that are to be merged no_change_gain_years = '{}_growth_years_no_change.tif'.format(tile_id) loss_only_gain_years = '{}_growth_years_loss_only.tif'.format(tile_id) gain_only_gain_years = '{}_growth_years_gain_only.tif'.format(tile_id) loss_and_gain_gain_years = '{}_growth_years_loss_and_gain.tif'.format( tile_id) # Names of the output tiles gain_year_count_merged = '{0}_{1}.tif'.format(tile_id, pattern) # Opens no change gain year count tile. This should exist for all tiles. with rasterio.open(no_change_gain_years) as no_change_gain_years_src: # Grabs metadata about the tif, like its location/projection/cellsize kwargs = no_change_gain_years_src.meta # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory windows = no_change_gain_years_src.block_windows(1) # Updates kwargs for the output dataset kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) uu.print_log( " No change tile exists for {} by default".format(tile_id)) # Opens the other gain year count tiles. They may not exist for all other tiles. try: loss_only_gain_years_src = rasterio.open(loss_only_gain_years) uu.print_log(" Loss only tile found for {}".format(tile_id)) except: uu.print_log(" No loss only tile found for {}".format(tile_id)) try: gain_only_gain_years_src = rasterio.open(gain_only_gain_years) uu.print_log(" Gain only tile found for {}".format(tile_id)) except: uu.print_log(" No gain only tile found for {}".format(tile_id)) try: loss_and_gain_gain_years_src = rasterio.open( loss_and_gain_gain_years) uu.print_log(" Loss and gain tile found for {}".format(tile_id)) except: uu.print_log( " No loss and gain tile found for {}".format(tile_id)) # Opens the output tile, giving it the arguments of the input tiles gain_year_count_merged_dst = rasterio.open(gain_year_count_merged, 'w', **kwargs) # Adds metadata tags to the output raster uu.add_rasterio_tags(gain_year_count_merged_dst, sensit_type) gain_year_count_merged_dst.update_tags(units='years') gain_year_count_merged_dst.update_tags(min_possible_value='0') gain_year_count_merged_dst.update_tags( max_possible_value=cn.loss_years) gain_year_count_merged_dst.update_tags( source= 'Gain years are assigned based on the combination of Hansen loss and gain in each pixel. There are four combinations: neither loss nor gain, loss only, gain only, loss and gain.' ) gain_year_count_merged_dst.update_tags(extent='Full model extent') # Iterates across the windows (1 pixel strips) of the input tile for idx, window in windows: no_change_gain_years_window = no_change_gain_years_src.read( 1, window=window) try: loss_only_gain_years_window = loss_only_gain_years_src.read( 1, window=window) except: loss_only_gain_years_window = np.zeros( (window.height, window.width), dtype='uint8') try: gain_only_gain_years_window = gain_only_gain_years_src.read( 1, window=window) except: gain_only_gain_years_window = np.zeros( (window.height, window.width), dtype='uint8') try: loss_and_gain_gain_years_window = loss_and_gain_gain_years_src.read( 1, window=window) except: loss_and_gain_gain_years_window = np.zeros( (window.height, window.width), dtype='uint8') gain_year_count_merged_window = loss_only_gain_years_window + gain_only_gain_years_window + \ no_change_gain_years_window + loss_and_gain_gain_years_window gain_year_count_merged_dst.write_band( 1, gain_year_count_merged_window, window=window) # Prints information about the tile that was just processed uu.end_of_fx_summary(start, tile_id, pattern, no_upload)
def mp_prep_other_inputs(tile_id_list, run_date): os.chdir(cn.docker_base_dir) sensit_type='std' # If a full model run is specified, the correct set of tiles for the particular script is listed if tile_id_list == 'all': # List of tiles to run in the model tile_id_list = uu.create_combined_tile_list(cn.WHRC_biomass_2000_unmasked_dir, cn.mangrove_biomass_2000_dir, set3=cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir ) uu.print_log(tile_id_list) uu.print_log("There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # List of output directories and output file name patterns output_dir_list = [cn.climate_zone_processed_dir, cn.plant_pre_2000_processed_dir, cn.drivers_processed_dir, cn.ifl_primary_processed_dir, cn.annual_gain_AGC_natrl_forest_young_dir, cn.stdev_annual_gain_AGC_natrl_forest_young_dir, cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir, cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir, cn.FIA_forest_group_processed_dir, cn.age_cat_natrl_forest_US_dir, cn.FIA_regions_processed_dir] output_pattern_list = [cn.pattern_climate_zone, cn.pattern_plant_pre_2000, cn.pattern_drivers, cn.pattern_ifl_primary, cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young, cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe, cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe, cn.pattern_FIA_forest_group_processed, cn.pattern_age_cat_natrl_forest_US, cn.pattern_FIA_regions_processed] # If the model run isn't the standard one, the output directory and file names are changed if sensit_type != 'std': uu.print_log("Changing output directory and file name pattern based on sensitivity analysis") output_dir_list = uu.alter_dirs(sensit_type, output_dir_list) output_pattern_list = uu.alter_patterns(sensit_type, output_pattern_list) # A date can optionally be provided by the full model script or a run of this script. # This replaces the date in constants_and_names. if run_date is not None: output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date) # Files to process: climate zone, IDN/MYS plantations before 2000, tree cover loss drivers, combine IFL and primary forest uu.s3_file_download(os.path.join(cn.climate_zone_raw_dir, cn.climate_zone_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.plant_pre_2000_raw_dir, '{}.zip'.format(cn.pattern_plant_pre_2000_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.drivers_raw_dir, '{}.zip'.format(cn.pattern_drivers_raw)), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw_dir, cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.age_cat_natrl_forest_US_raw_dir, cn.name_age_cat_natrl_forest_US_raw), cn.docker_base_dir, sensit_type) uu.s3_file_download(os.path.join(cn.FIA_forest_group_raw_dir, cn.name_FIA_forest_group_raw), cn.docker_base_dir, sensit_type) # For some reason, using uu.s3_file_download or otherwise using AWSCLI as a subprocess doesn't work for this raster. # Thus, using wget instead. cmd = ['wget', '{}'.format(cn.annual_gain_AGC_natrl_forest_young_raw_URL), '-P', '{}'.format(cn.docker_base_dir)] process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) uu.s3_file_download(cn.stdev_annual_gain_AGC_natrl_forest_young_raw_URL, cn.docker_base_dir, sensit_type) cmd = ['aws', 's3', 'cp', cn.primary_raw_dir, cn.docker_base_dir, '--recursive'] uu.log_subprocess_output_full(cmd) uu.s3_flexible_download(cn.ifl_dir, cn.pattern_ifl, cn.docker_base_dir, sensit_type, tile_id_list) uu.print_log("Unzipping pre-2000 plantations...") cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_plant_pre_2000_raw)] uu.log_subprocess_output_full(cmd) uu.print_log("Unzipping drivers...") cmd = ['unzip', '-j', '{}.zip'.format(cn.pattern_drivers_raw)] uu.log_subprocess_output_full(cmd) # Creates tree cover loss driver tiles source_raster = '{}.tif'.format(cn.pattern_drivers_raw) out_pattern = cn.pattern_drivers dt = 'Byte' if cn.count == 96: processes = 80 # 45 processors = 70 GB peak; 70 = 90 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating tree cover loss driver tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates young natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_annual_gain_AGC_natrl_forest_young dt = 'float32' if cn.count == 96: processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating young natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates young natural forest removal rate standard deviation tiles source_raster = cn.name_stdev_annual_gain_AGC_natrl_forest_young_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_natrl_forest_young dt = 'float32' if cn.count == 96: processes = 80 # 32 processors = 210 GB peak; 60 = 370 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating standard deviation for young natural forest removal rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates pre-2000 oil palm plantation tiles if cn.count == 96: processes = 80 # 45 processors = 100 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating pre-2000 oil palm plantation tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.rasterize_pre_2000_plantations, tile_id_list) pool.close() pool.join() # Creates climate zone tiles if cn.count == 96: processes = 80 # 45 processors = 230 GB peak (on second step); 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating climate zone tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.create_climate_zone_tiles, tile_id_list) pool.close() pool.join() # Creates European natural forest removal rate tiles source_raster = cn.name_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe dt = 'float32' if cn.count == 96: processes = 60 # 32 processors = 60 GB peak; 60 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating European natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates European natural forest standard deviation of removal rate tiles source_raster = cn.name_stdev_annual_gain_AGC_BGC_natrl_forest_Europe_raw out_pattern = cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe dt = 'float32' if cn.count == 96: processes = 32 # 32 processors = 60 GB peak; 60 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating standard deviation for European natural forest gain rate tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates a vrt of the primary forests with nodata=0 from the continental primary forest rasters uu.print_log("Creating vrt of humid tropial primary forest...") primary_vrt = 'primary_2001.vrt' os.system('gdalbuildvrt -srcnodata 0 {} *2001_primary.tif'.format(primary_vrt)) uu.print_log(" Humid tropical primary forest vrt created") # Creates primary forest tiles source_raster = primary_vrt out_pattern = 'primary_2001' dt = 'Byte' if cn.count == 96: processes = 45 # 45 processors = 650 GB peak else: processes = int(cn.count/2) uu.print_log("Creating primary forest tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates a combined IFL/primary forest raster # Uses very little memory since it's just file renaming if cn.count == 96: processes = 60 # 60 processors = 10 GB peak else: processes = int(cn.count/2) uu.print_log("Assigning each tile to ifl2000 or primary forest with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(prep_other_inputs.create_combined_ifl_primary, tile_id_list) pool.close() pool.join() # Creates forest age category tiles for US forests source_raster = cn.name_age_cat_natrl_forest_US_raw out_pattern = cn.pattern_age_cat_natrl_forest_US dt = 'Byte' if cn.count == 96: processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest age category tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates forest groups for US forests source_raster = cn.name_FIA_forest_group_raw out_pattern = cn.pattern_FIA_forest_group_processed dt = 'Byte' if cn.count == 96: processes = 80 # 32 processors = 25 GB peak; 80 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest group tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() # Creates FIA regions for US forests source_raster = cn.name_FIA_regions_raw out_pattern = cn.pattern_FIA_regions_processed dt = 'Byte' if cn.count == 96: processes = 70 # 32 processors = 35 GB peak; 70 = XXX GB peak else: processes = int(cn.count/2) uu.print_log("Creating US forest region tiles with {} processors...".format(processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) pool.close() pool.join() for output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]: # For some reason I can't figure out, the young forest rasters (rate and stdev) have NaN values in some places where 0 (NoData) # should be. These NaN values show up as values when the check_and_delete_if_empty function runs, making the tiles not # deleted even if they have no data. However, the light version (which uses gdalinfo rather than rasterio masks) doesn't # have this problem. So I'm forcing the young forest rates to and stdev to have their emptiness checked by the gdalinfo version. if output_pattern in [cn.pattern_annual_gain_AGC_natrl_forest_young, cn.pattern_stdev_annual_gain_AGC_natrl_forest_young]: processes = int(cn.count / 2) uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() if cn.count == 96: processes = 50 # 60 processors = >730 GB peak (for European natural forest forest removal rates); 50 = XXX GB peak uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() elif cn.count <= 2: # For local tests processes = 1 uu.print_log("Checking for empty tiles of {0} pattern with {1} processors using light function...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty_light, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() else: processes = int(cn.count / 2) uu.print_log("Checking for empty tiles of {0} pattern with {1} processors...".format(output_pattern, processes)) pool = multiprocessing.Pool(processes) pool.map(partial(uu.check_and_delete_if_empty, output_pattern=output_pattern), tile_id_list) pool.close() pool.join() uu.print_log('\n') # Uploads output tiles to s3 for i in range(0, len(output_dir_list)): uu.upload_final_set(output_dir_list[i], output_pattern_list[i])
def main(): # Create the output log uu.initiate_log() os.chdir(cn.docker_base_dir) # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist. tile_id_list = uu.tile_list_s3(cn.WHRC_biomass_2000_unmasked_dir) # tile_id_list = ['50N_130W'] # test tiles uu.print_log(tile_id_list) uu.print_log( "There are {} tiles to process".format(str(len(tile_id_list))) + "\n") # Downloads the Mekong loss folder. Each year of loss has its own raster uu.s3_folder_download(cn.Mekong_loss_raw_dir, cn.docker_base_dir, sensit_type) # The list of all annual loss rasters annual_loss_list = glob.glob('Loss_20*tif') uu.print_log(annual_loss_list) uu.print_log( "Creating first year of loss Hansen tiles for Mekong region...") # Recodes raw loss rasters with their loss year (for model years only) pool = multiprocessing.Pool(int(cn.count / 2)) pool.map(Mekong_loss.recode_tiles, annual_loss_list) # Makes a single raster of all first loss year pixels in the Mekong (i.e. where loss occurred in multiple years, # the earlier loss gets) uu.print_log("Merging all loss years within model range...") loss_composite = "Mekong_loss_2001_2015.tif" cmd = [ 'gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif", "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif", "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif", "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif", "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif", "Mekong_loss_recoded_2006.tif", "Mekong_loss_recoded_2005.tif", "Mekong_loss_recoded_2004.tif", "Mekong_loss_recoded_2003.tif", "Mekong_loss_recoded_2002.tif", "Mekong_loss_recoded_2001.tif" ] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: uu.log_subprocess_output(process.stdout) # Creates Hansen tiles out of the composite Mekong loss source_raster = loss_composite out_pattern = cn.pattern_Mekong_loss_processed dt = 'Byte' pool.map( partial(uu.mp_warp_to_Hansen, source_raster=source_raster, out_pattern=out_pattern, dt=dt), tile_id_list) # This is necessary for changing NoData values to 0s (so they are recognized as 0s) pool.map(Mekong_loss.recode_tiles, tile_id_list) # Only uploads tiles that actually have Mekong loss in them upload_dir = cn.Mekong_loss_processed_dir pattern = cn.pattern_Mekong_loss_processed pool.map( partial(uu.check_and_upload, upload_dir=upload_dir, pattern=pattern), tile_id_list)