### This script calculates the cumulative above and belowground carbon gain in mangrove forest pixels from 2001-2015. ### It multiplies the annual biomass gain rate by the number of years of gain by the biomass-to-carbon conversion. import multiprocessing import cumulative_gain_mangrove import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu mangrove_biomass_tile_list = uu.tile_list(cn.annual_gain_AGB_mangrove_dir) # biomass_tile_list = ['20S_110E', '30S_110E'] # test tiles # mangrove_biomass_tile_list = ['10N_080W'] # test tiles print mangrove_biomass_tile_list print "There are {} tiles to process".format( str(len(mangrove_biomass_tile_list))) # For downloading all tiles in the input folders download_list = [ cn.annual_gain_AGB_mangrove_dir, cn.annual_gain_BGB_mangrove_dir, cn.gain_year_count_mangrove_dir ] for input in download_list: uu.s3_folder_download(input, '.') # # For copying individual tiles to spot machine for testing # for tile in mangrove_biomass_tile_list: # # uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_AGB_mangrove_dir, tile, cn.pattern_annual_gain_AGB_mangrove), '.') # annual AGB gain rate tiles # uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_BGB_mangrove_dir, tile, cn.pattern_annual_gain_BGB_mangrove), '.') # annual AGB gain rate tiles
import constants_and_names as cn import universal_util as uu # Downloads zipped raw mangrove files uu.s3_file_download( os.path.join(cn.mangrove_biomass_raw_dir, cn.mangrove_biomass_raw_file), '.') # Unzips mangrove images into a flat structure (all tifs into main folder using -j argument) # NOTE: Unzipping some tifs (e.g., Australia, Indonesia) takes a very long time, so don't worry if the script appears to stop on that. cmd = ['unzip', '-j', cn.mangrove_biomass_raw_file] subprocess.check_call(cmd) # Creates vrt of all raw mangrove tifs utilities.build_vrt(utilities.mangrove_vrt) # Iterates through all possible tiles (not just WHRC biomass tiles) to create mangrove biomass tiles that don't have analogous WHRC tiles total_tile_list = uu.tile_list(cn.pixel_area_dir) # biomass_tile_list = ['00N_000E', '20S_120W', '00N_120E'] # test tile print total_tile_list # For multiprocessor use # This script worked with count/4 on an r3.16xlarge machine. count = multiprocessing.cpu_count() pool = multiprocessing.Pool(processes=count / 4) pool.map(mangrove_processing.create_mangrove_tiles, total_tile_list) # # For single processor use, for testing purposes # for tile in total_tile_list: # # mangrove_processing.create_mangrove_tiles(tile)
### This script calculates the cumulative above and belowground carbon gain in non-mangrove, non-planted natural forest pixels from 2001-2015. ### It multiplies the annual biomass gain rate by the number of years of gain by the biomass-to-carbon conversion. import multiprocessing import cumulative_gain_natrl_forest import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu biomass_tile_list = uu.tile_list(cn.WHRC_biomass_2000_non_mang_non_planted_dir) # biomass_tile_list = ['20S_110E', '30S_110E'] # test tiles # biomass_tile_list = ['20S_110E'] # test tiles print biomass_tile_list print "There are {} tiles to process".format(str(len(biomass_tile_list))) # For downloading all tiles in the input folders download_list = [ cn.annual_gain_AGB_natrl_forest_dir, cn.annual_gain_BGB_natrl_forest_dir, cn.gain_year_count_natrl_forest_dir ] for input in download_list: uu.s3_folder_download(input, '.') # # For copying individual tiles to spot machine for testing # for tile in biomass_tile_list: # # uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_AGB_natrl_forest_dir, tile, cn.pattern_annual_gain_AGB_natrl_forest), '.') # annual AGB gain rate tiles # uu.s3_file_download('{0}{1}_{2}.tif'.format(cn.annual_gain_BGB_natrl_forest_dir, tile, cn.pattern_annual_gain_BGB_natrl_forest), '.') # annual AGB gain rate tiles
### Calculates the net emissions over the study period, with units of CO2/ha on a pixel-by-pixel basis import multiprocessing import utilities import net_emissions import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu biomass_tile_list = uu.tile_list(cn.natrl_forest_biomass_2000_dir) # biomass_tile_list = ['10N_080W', '40N_120E'] # test tiles # biomass_tile_list = ['00N_000E'] # test tiles print biomass_tile_list print "There are {} tiles to process".format(str(len(biomass_tile_list))) # For downloading all tiles in the input folders download_list = [cn.cumul_gain_combo_dir, cn.gross_emissions_dir] for input in download_list: utilities.s3_folder_download('{}'.format(input), '.') # # For copying individual tiles to spot machine for testing # for tile in biomass_tile_list: # # utilities.s3_file_download('{0}{1}_{2}.tif'.format(cn.cumul_gain_combo_dir, cn.pattern_cumul_gain_combo, tile), '.') # cumulative aboveand belowground carbon gain for all forest types # utilities.s3_file_download('{0}{1}_{2}.tif'.format(cn.gross_emissions_dir, tile, cn.pattern_gross_emissions), '.') # emissions from all drivers count = multiprocessing.cpu_count() pool = multiprocessing.Pool(count / 4) pool.map(net_emissions.net_calc, biomass_tile_list)
from multiprocessing.pool import Pool from functools import partial import annual_gain_rate_mangrove import pandas as pd import subprocess import os import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu pd.options.mode.chained_assignment = None # Lists the tiles that have both mangrove biomass and FAO ecozone information because both of these are necessary for # calculating mangrove gain mangrove_biomass_tile_list = uu.tile_list(cn.mangrove_biomass_2000_dir) ecozone_tile_list = uu.tile_list(cn.cont_eco_dir) mangrove_ecozone_list = list( set(mangrove_biomass_tile_list).intersection(ecozone_tile_list)) # mangrove_ecozone_list = ['10N_080W', '00N_110E'] # test tiles # mangrove_ecozone_list = ['10N_080W'] # test tiles print mangrove_ecozone_list print "There are {} tiles to process".format(str(len(mangrove_ecozone_list))) # For downloading all tiles in the input folders download_list = [cn.cont_eco_dir, cn.mangrove_biomass_2000_dir] for input in download_list: uu.s3_folder_download(input, '.') # # For copying individual tiles to spot machine for testing
### they are the best we have for parsing planted forests into the component values. ### We want to separate the above+below rate into above and below and convert to biomass so that we can make global ### maps of annual above and below biomass gain rates separately; the natural forests and mangroves already use ### separate above and below annual biomass gain rate files, so this brings planted forests into line with them. import multiprocessing import annual_gain_rate_planted_forest import pandas as pd import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu pd.options.mode.chained_assignment = None tile_list = uu.tile_list(cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir) # tile_list = ['80N_020E', '00N_000E', '00N_020E', '00N_110E'] # test tiles: no mangrove or planted forest, mangrove only, planted forest only, mangrove and planted forest # tile_list = ['00N_020E', '00N_110E'] # test tiles: mangrove and planted forest print tile_list print "There are {} tiles to process".format(str(len(tile_list))) # For downloading all tiles in the input folders download_list = [ cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir, cn.mangrove_biomass_2000_dir ] for input in download_list: uu.s3_folder_download(input, '.') # # For copying individual tiles to spot machine for testing
### Masks out mangrove and planted forest pixels from WHRC biomass 2000 raster so that ### only non-mangrove, non-planted forest pixels are left of the WHRC biomass 2000 raster import multiprocessing import non_mangrove_non_planted_WHRC_biomass_2000 import pandas as pd import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu biomass_tile_list = uu.tile_list(cn.WHRC_biomass_2000_unmasked_dir) # biomass_tile_list = ['80N_020E', '00N_000E', '00N_020E', '00N_110E'] # test tiles: no mangrove or planted forest, mangrove only, planted forest only, mangrove and planted forest # biomass_tile_list = ['80N_020E', '00N_020E', '00N_000E', '00N_110E'] # test tiles: no mangrove or planted forest, planted forest only, mangrove only, mangrove and planted forest # biomass_tile_list = ['00N_000E'] print biomass_tile_list print "There are {} tiles to process".format(str(len(biomass_tile_list))) # For downloading all tiles in the input folders. # Mangrove biomass and full-extent planted forests are used to mask out mangrove and planted forests from the natural forests. download_list = [ cn.mangrove_biomass_2000_dir, cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir, cn.WHRC_biomass_2000_unmasked_dir ] for input in download_list: uu.s3_folder_download(input, '.') # # For copying individual tiles to spot machine for testing # for tile in biomass_tile_list:
### The gain years for each of these conditions are calculated according to rules that are found in the function called by the multiprocessor commands. ### More gdalcalc commands can be run at the same time than gdalmerge so that's why the number of processors being used is higher ### for the first four processing steps (which use gdalcalc). ### At this point, those rules are the same as for mangrove forests. ### Then it combines those four rasters into a single gain year raster for each tile using gdalmerge. ### If different input rasters for loss (e.g., 2001-2017) and gain (e.g., 2000-2018) are used, the year count constants in constants_and_names.py must be changed. import multiprocessing import gain_year_count_planted_forest import sys sys.path.append('../') import constants_and_names as cn import universal_util as uu # The list of tiles to iterate through tile_list = uu.tile_list(cn.annual_gain_AGB_planted_forest_non_mangrove_dir) # biomass_tile_list = ['10N_080W'] # test tile print tile_list print "There are {} tiles to process".format(str(len(tile_list))) # For downloading all tiles in the folders download_list = [ cn.loss_dir, cn.gain_dir, cn.ifl_dir, cn.annual_gain_AGB_planted_forest_non_mangrove_dir ] for input in download_list: uu.s3_folder_download(input, '.') # # For copying individual tiles to s3 for testing # for tile in tile_list:
def main(): parser = argparse.ArgumentParser( description='Create planted forest carbon gain rate tiles') parser.add_argument( '--gadm-tile-index', '-gi', required=True, help= 'Shapefile of 1x1 degree tiles of countries that contain planted forests (i.e. countries with planted forests rasterized to 1x1 deg). If no shapefile, write None.' ) parser.add_argument( '--planted-tile-index', '-pi', required=True, help= 'Shapefile of 1x1 degree tiles of that contain planted forests (i.e. planted forest extent rasterized to 1x1 deg). If no shapefile, write None.' ) args = parser.parse_args() # Creates the directory and shapefile names for the two possible arguments (index shapefiles) gadm_index = os.path.split(args.gadm_tile_index) gadm_index_path = gadm_index[0] gadm_index_shp = gadm_index[1] gadm_index_shp = gadm_index_shp[:-4] planted_index = os.path.split(args.planted_tile_index) planted_index_path = planted_index[0] planted_index_shp = planted_index[1] planted_index_shp = planted_index_shp[:-4] # Checks the validity of the two arguments. If either one is invalid, the script ends. if (gadm_index_path not in cn.gadm_plant_1x1_index_dir or planted_index_path not in cn.gadm_plant_1x1_index_dir): raise Exception( 'Invalid inputs. Please provide None or s3 shapefile locations for both arguments.' ) # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles) total_tile_list = uu.tile_list(cn.pixel_area_dir) print "Number of possible 10x10 tiles to evaluate:", len(total_tile_list) # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman. # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968. # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest). # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands. planted_lat_tile_list = [ tile for tile in total_tile_list if '90N' not in tile ] planted_lat_tile_list = [ tile for tile in planted_lat_tile_list if '80N' not in tile ] planted_lat_tile_list = [ tile for tile in planted_lat_tile_list if '50S' not in tile ] planted_lat_tile_list = [ tile for tile in planted_lat_tile_list if '60S' not in tile ] planted_lat_tile_list = [ tile for tile in planted_lat_tile_list if '70S' not in tile ] planted_lat_tile_list = [ tile for tile in planted_lat_tile_list if '80S' not in tile ] # planted_lat_tile_list = ['10N_080W'] print planted_lat_tile_list print "Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len( planted_lat_tile_list) # If a planted forest extent 1x1 tile index shapefile isn't supplied if 'None' in args.planted_tile_index: ### Entry point 1: # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created. # This runs the process from the very beginning and will take a few days. if 'None' in args.gadm_tile_index: print "No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch..." # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas uu.s3_file_download(cn.gadm_path, '.') cmd = ['unzip', cn.gadm_zip] subprocess.check_call(cmd) # Creates a new GADM shapefile with just the countries that have planted forests in them. # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries. # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list # in constants_and_names.py must be updated, too. print "Creating shapefile of countries with planted forests..." os.system( '''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp''' .format(str(cn.plantation_countries)[1:-1], cn.gadm_iso)) # Creates 1x1 degree tiles of countries that have planted forests in them. # I assume this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small. # This takes several days to run because it iterates through at least 250 10x10 tiles. # For multiprocessor use. num_of_processes = 50 pool = Pool(num_of_processes) pool.map(plantation_preparation.rasterize_gadm_1x1, planted_lat_tile_list) pool.close() pool.join() # # Creates 1x1 degree tiles of countries that have planted forests in them. # # For single processor use. # for tile in planted_lat_tile_list: # # plantation_preparation.rasterize_gadm_1x1(tile) # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format( cn.pattern_gadm_1x1_index, uu.date)) cmd = [ 'aws', 's3', 'cp', '.', cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive' ] subprocess.check_call(cmd) # # Saves the 1x1 country extent tiles to s3 # # Only use if the entire process can't run in one go on the spot machine # cmd = ['aws', 's3', 'cp', '.', 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive'] # subprocess.check_call(cmd) # Delete the aux.xml files os.system('''rm GADM*.tif.*''') # List of all 1x1 degree countey extent tiles created gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_") print "List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1 print len(gadm_list_1x1) ### Entry point 2: # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied, # a list of the 1x1 tiles is created from the shapefile. # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied # in the shapefile. elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index: print "Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles..." # Copies the shapefile of 1x1 tiles of extent of countries with planted forests cmd = [ 'aws', 's3', 'cp', '{}/'.format(gadm_index_path), '.', '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp), '--recursive' ] subprocess.check_call(cmd) # Gets the attribute table of the country extent 1x1 tile shapefile gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0] # Converts the attribute table to a dataframe dbf = Dbf5(gadm) df = dbf.to_dataframe() # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list gadm_list_1x1 = df['location'].tolist() gadm_list_1x1 = [str(y) for y in gadm_list_1x1] print "List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1 print "There are", len( gadm_list_1x1), "1x1 country extent tiles to iterate through." # In case some other arguments are provided else: raise Exception( 'Invalid GADM tile index shapefile provided. Please provide a valid shapefile.' ) # Creates 1x1 degree tiles of plantation growth wherever there are plantations. # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that # 1x1 country extent tile. # For multiprocessor use num_of_processes = 30 pool = Pool(num_of_processes) pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm, gadm_list_1x1) pool.close() pool.join() # # Creates 1x1 degree tiles of plantation growth wherever there are plantations # # For single processor use # for tile in gadm_list_1x1: # # plantation_preparation.create_1x1_plantation(tile) # Creates a shapefile in which each feature is the extent of a plantation extent tile. # This index shapefile can be used the next time this process is run if starting with Entry Point 3. os.system('''gdaltindex {0}_{1}.shp plant_*.tif'''.format( cn.pattern_plant_1x1_index, uu.date)) cmd = [ 'aws', 's3', 'cp', '.', cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive' ] subprocess.check_call(cmd) ### Entry point 3 # If a shapefile of the extents of 1x1 planted forest tiles is provided if cn.pattern_plant_1x1_index in args.planted_tile_index: print "Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth tiles..." # Copies the shapefile of 1x1 tiles of extent of planted forests cmd = [ 'aws', 's3', 'cp', '{}/'.format(planted_index_path), '.', '--recursive', '--exclude', '*', '--include', '{}*'.format(planted_index_shp), '--recursive' ] subprocess.check_call(cmd) # Gets the attribute table of the planted forest extent 1x1 tile shapefile gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0] # Converts the attribute table to a dataframe dbf = Dbf5(gadm) df = dbf.to_dataframe() # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list planted_list_1x1 = df['location'].tolist() planted_list_1x1 = [str(y) for y in planted_list_1x1] print "List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1 print "There are", len( planted_list_1x1 ), "1x1 planted forest extent tiles to iterate through." # Creates 1x1 degree tiles of plantation growth wherever there are plantations. # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run # of this script), it does not need to check whether there are planted forests in this tile. It goes directly # to intersecting the planted forest table with the 1x1 tile. # For multiprocessor use # This works with 30 processors on an r4.16xlarge. num_of_processes = 50 pool = Pool(num_of_processes) pool.map(plantation_preparation.create_1x1_plantation_from_1x1_planted, planted_list_1x1) pool.close() pool.join() ### All entry points meet here: creation of 10x10 degree planted forest tiles from 1x1 degree planted forest tiles # Name of the vrt of 1x1 planted forest tiles plant_1x1_vrt = 'plant_1x1.vrt' # Creates a mosaic of all the 1x1 plantation growth rate tiles print "Creating vrt of 1x1 plantation growth rate tiles" os.system('gdalbuildvrt {} plant_*.tif'.format(plant_1x1_vrt)) # Creates 10x10 degree tiles of plantation growth by iterating over the pixel area tiles that are in latitudes with planted forests # For multiprocessor use num_of_processes = 20 pool = Pool(num_of_processes) pool.map( partial(plantation_preparation.create_10x10_plantation, plant_1x1_vrt=plant_1x1_vrt), planted_lat_tile_list) pool.close() pool.join()