Ejemplos de tile_list_spot_machine en Python

Lenguaje de programación: Python

Namespace/Package Name: universal_util

Método / Función: tile_list_spot_machine

Ejemplos en hotexamples.com: 5

Python tile_list_spot_machine - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de universal_util.tile_list_spot_machine extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: mp_plantation_preparation.py Proyecto: xiajz/carbon-budget

def mp_plantation_preparation(gadm_index_shp, planted_index_shp):

    os.chdir(cn.docker_base_dir)

    # ## Not actually using this but leaving it here in case I want to add this functionality eventually. This
    # # was to allow users to run plantations for a select (contiguous) area rather than for the whole planet.
    # # List of bounding box coordinates
    # bound_list = args.bounding_box
    # # Checks if bounding box coordinates are in multiples of 10 (10 degree tiles). If they're not, the script stops.
    # for bound in bound_list:
    #     if bound%10:
    #         uu.exception_log(bound, 'not a multiple of 10. Please make bounding box coordinates are multiples of 10.')

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (gadm_index_path not in cn.gadm_plant_1x1_index_dir or planted_index_path not in cn.gadm_plant_1x1_index_dir):
        uu.exception_log('Invalid inputs. Please provide None or s3 shapefile locations for both arguments.')

    # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles)
    total_tile_list = uu.tile_list_s3(cn.pixel_area_dir)
    uu.print_log("Number of possible 10x10 tiles to evaluate:", len(total_tile_list))

    # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman.
    # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968.
    # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest).
    # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands.
    planted_lat_tile_list = [tile for tile in total_tile_list if '90N' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80N' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '50S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '60S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '70S' not in tile]
    planted_lat_tile_list = [tile for tile in planted_lat_tile_list if '80S' not in tile]
    # planted_lat_tile_list = ['10N_080W']

    uu.print_log(planted_lat_tile_list)
    uu.print_log("Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len(planted_lat_tile_list))


    # If a planted forest extent 1x1 tile index shapefile isn't supplied
    if 'None' in args.planted_tile_index:

        ### Entry point 1:
        # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created.
        # This runs the process from the very beginning and will take a few days.
        if 'None' in args.gadm_tile_index:

            uu.print_log("No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch...")

            # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas
            uu.s3_file_download(cn.gadm_path, cn.docker_base_dir)
            cmd = ['unzip', cn.gadm_zip]
            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Creates a new GADM shapefile with just the countries that have planted forests in them.
            # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries.
            # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list
            # in constants_and_names.py must be updated, too.
            uu.print_log("Creating shapefile of countries with planted forests...")
            os.system('''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp'''.format(str(cn.plantation_countries)[1:-1], cn.gadm_iso))

            # Creates 1x1 degree tiles of countries that have planted forests in them.
            # I think this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small.
            # This takes several days to run because it iterates through at least 250 10x10 tiles.
            # For multiprocessor use.
            processes = 50
            uu.print_log('Rasterize GADM 1x1 max processors=', processes)
            pool = Pool(processes)
            pool.map(plantation_preparation.rasterize_gadm_1x1, planted_lat_tile_list)
            pool.close()
            pool.join()

            # # Creates 1x1 degree tiles of countries that have planted forests in them.
            # # For single processor use.
            # for tile in planted_lat_tile_list:
            #
            #     plantation_preparation.rasterize_gadm_1x1(tile)

            # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests
            os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(cn.pattern_gadm_1x1_index, uu.date_time_today))
            cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive']

            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)


            # # Saves the 1x1 country extent tiles to s3
            # # Only use if the entire process can't run in one go on the spot machine
            # cmd = ['aws', 's3', 'cp', cn.docker_base_dir, 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive']

            # # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            # process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            # with process.stdout:
            #     uu.log_subprocess_output(process.stdout)


            # Delete the aux.xml files
            os.system('''rm GADM*.tif.*''')

            # List of all 1x1 degree countey extent tiles created
            gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_")
            uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1)
            uu.print_log(len(gadm_list_1x1))

        ### Entry point 2:
        # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied,
        # a list of the 1x1 tiles is created from the shapefile.
        # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied
        # in the shapefile.
        elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index:

            uu.print_log("Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles...")

            uu.print_log('{}/'.format(gadm_index_path))

            # Copies the shapefile of 1x1 tiles of extent of countries with planted forests
            cmd = ['aws', 's3', 'cp', '{}/'.format(gadm_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include', '{}*'.format(gadm_index_shp)]

            # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
            process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
            with process.stdout:
                uu.log_subprocess_output(process.stdout)

            # Gets the attribute table of the country extent 1x1 tile shapefile
            gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0]

            # Converts the attribute table to a dataframe
            dbf = Dbf5(gadm)
            df = dbf.to_dataframe()

            # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
            gadm_list_1x1 = df['location'].tolist()
            gadm_list_1x1 = [str(y) for y in gadm_list_1x1]
            uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1)
            uu.print_log("There are", len(gadm_list_1x1), "1x1 country extent tiles to iterate through.")

        # In case some other arguments are provided
        else:
            uu.exception_log('Invalid GADM tile index shapefile provided. Please provide a valid shapefile.')

        # Creates 1x1 degree tiles of plantation growth wherever there are plantations.
        # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks
        # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that
        # 1x1 country extent tile.
        # 55 processors seems to use about 350 GB of memory, which seems fine. But there was some error about "PQconnectdb failed-- sorry, too many clients already".
        # So, moved the number of processors down to 48.
        # For multiprocessor use
        processes = 48
        uu.print_log('Create 1x1 plantation from 1x1 gadm max processors=', processes)
        pool = Pool(processes)
        pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm, gadm_list_1x1)
        pool.close()
        pool.join()

        # # Creates 1x1 degree tiles of plantation growth wherever there are plantations
        # # For single processor use
        # for tile in gadm_list_1x1:
        #
        #     plantation_preparation.create_1x1_plantation(tile)

        # Creates a shapefile in which each feature is the extent of a plantation extent tile.
        # This index shapefile can be used the next time this process is run if starting with Entry Point 3.
        os.system('''gdaltindex {0}_{1}.shp plant_gain_*.tif'''.format(cn.pattern_plant_1x1_index, uu.date_time_today))
        cmd = ['aws', 's3', 'cp', cn.docker_base_dir, cn.gadm_plant_1x1_index_dir, '--exclude', '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index), '--recursive']

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

    ### Entry point 3
    # If a shapefile of the extents of 1x1 planted forest tiles is provided.
    # This is the part that actually creates the sequestration rate and forest type tiles.
    
    if cn.pattern_plant_1x1_index in args.planted_tile_index:

        uu.print_log("Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth rate and forest type tiles...")

        # Copies the shapefile of 1x1 tiles of extent of planted forests
        cmd = ['aws', 's3', 'cp', '{}/'.format(planted_index_path), cn.docker_base_dir, '--recursive', '--exclude', '*', '--include',
               '{}*'.format(planted_index_shp), '--recursive']

        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)


        # Gets the attribute table of the planted forest extent 1x1 tile shapefile
        gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0]

        # Converts the attribute table to a dataframe
        dbf = Dbf5(gadm)
        df = dbf.to_dataframe()

        # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
        planted_list_1x1 = df['location'].tolist()
        planted_list_1x1 = [str(y) for y in planted_list_1x1]
        uu.print_log("List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1)
        uu.print_log("There are", len(planted_list_1x1), "1x1 planted forest extent tiles to iterate through.")

        # Creates 1x1 degree tiles of plantation growth and type wherever there are plantations.
        # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run
        # of this script), it does not need to check whether there are planted forests in this tile. It goes directly
        # to intersecting the planted forest table with the 1x1 tile.

        # For single processor use
        #for tile in planted_list_1x1:
        #    plantation_preparation.create_1x1_plantation_growth_from_1x1_planted(tile)

        # For multiprocessor use
        # processes=40 uses about 360 GB of memory. Works on r4.16xlarge with space to spare
      	# processes=52 uses about 465 GB of memory (quite stably), so this is basically the max.
        num_of_processes = 52
        pool = Pool(num_of_processes)
        pool.map(plantation_preparation.create_1x1_plantation_growth_from_1x1_planted, planted_list_1x1)
        pool.close()
        pool.join()

        # This works with 50 processors on an r4.16xlarge marchine. Uses about 430 GB out of 480 GB.
        num_of_processes = 52
        pool = Pool(num_of_processes)
        processes = 50
        uu.print_log('Create 1x1 plantation type max processors=', processes)
        pool = Pool(processes)
        pool.map(plantation_preparation.create_1x1_plantation_type_from_1x1_planted, planted_list_1x1)
        pool.close()
        pool.join()

        # This rasterizes the plantation removal factor standard deviations 
	      # processes=50 peaks at about 450 GB
        num_of_processes = 50
    	  pool = Pool(num_of_processes)
	      pool.map(plantation_preparation.create_1x1_plantation_stdev_from_1x1_planted, planted_list_1x1)
	      pool.close()
	      pool.join()

Ejemplo n.º 2

Mostrar archivo

Archivo: mp_tile_statistics.py Proyecto: xiaotuxiaotu520/carbon-budget

def mp_tile_statistics(sensit_type, tile_id_list):

    os.chdir(cn.docker_base_dir)

    # The column names for the tile summary statistics.
    # If the statistics calculations are changed in tile_statistics.py, the list here needs to be changed, too.
    headers = [
        'tile_id', 'tile_type', 'tile_name', 'pixel_count', 'mean', 'median',
        'percentile10', 'percentile25', 'percentile75', 'percentile90', 'min',
        'max', 'sum'
    ]
    header_no_brackets = ', '.join(headers)

    tile_stats_txt = '{0}_v{1}_{2}_{3}.csv'.format(cn.tile_stats_pattern,
                                                   cn.version, sensit_type,
                                                   uu.date_time_today)

    # Creates the output text file with the column names
    with open(tile_stats_txt, 'w+') as f:
        f.write(header_no_brackets + '\r\n')
    f.close()

    uu.print_log(tile_id_list)

    # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
    uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                            cn.docker_base_dir, 'std', tile_id_list)

    # For downloading all tiles in selected folders
    download_dict = {
        # cn.WHRC_biomass_2000_unmasked_dir: [cn.pattern_WHRC_biomass_2000_unmasked],
        # cn.JPL_processed_dir: [cn.pattern_JPL_unmasked_processed],
        # cn.mangrove_biomass_2000_dir: [cn.pattern_mangrove_biomass_2000],
        # cn.cont_eco_dir: [cn.pattern_cont_eco_processed],

        # cn.model_extent_dir: [cn.pattern_model_extent], # 15 = 370 GB peak
        #
        # # Mangrove removals
        # cn.annual_gain_AGB_mangrove_dir: [cn.pattern_annual_gain_AGB_mangrove], # 15 = 640 GB peak
        # cn.annual_gain_BGB_mangrove_dir: [cn.pattern_annual_gain_BGB_mangrove], # 15 = 640 GB peak
        cn.stdev_annual_gain_AGB_mangrove_dir:
        [cn.pattern_stdev_annual_gain_AGB_mangrove],  # 15 = 640 GB peak
        #
        # # European forest removals
        # cn.annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe], # 15 = 630 GB peak
        # cn.stdev_annual_gain_AGC_BGC_natrl_forest_Europe_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe], # 15 = 630 GB peak
        #
        # # Planted forest removals
        # cn.annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked], # 15 = 600 GB peak
        # cn.planted_forest_type_unmasked_dir: [cn.pattern_planted_forest_type_unmasked], # 15 = 360 GB peak
        # cn.stdev_annual_gain_AGC_BGC_planted_forest_unmasked_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_planted_forest_unmasked], # 15 = 600 GB peak
        #
        # # US forest removals
        # cn.FIA_regions_processed_dir: [cn.pattern_FIA_regions_processed], # 15 = 350 GB peak
        # cn.FIA_forest_group_processed_dir: [cn.pattern_FIA_forest_group_processed], # 15 = 340 GB peak
        # cn.age_cat_natrl_forest_US_dir: [cn.pattern_age_cat_natrl_forest_US], # 15 = 350 GB peak
        # cn.annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_annual_gain_AGC_BGC_natrl_forest_US], # 15 = 620 GB peak
        # # cn.stdev_annual_gain_AGC_BGC_natrl_forest_US_dir: [cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_US],
        #
        # # Young natural forest removals
        # cn.annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_annual_gain_AGC_natrl_forest_young], # 15 = 710 GB peak
        # cn.stdev_annual_gain_AGC_natrl_forest_young_dir: [cn.pattern_stdev_annual_gain_AGC_natrl_forest_young], # 15 = 700 GB peak
        #
        # # IPCC defaults forest removals
        # cn.age_cat_IPCC_dir: [cn.pattern_age_cat_IPCC], # 15 = 330 GB peak
        # cn.annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_annual_gain_AGB_IPCC_defaults], # 15 = 620 GB peak
        # cn.annual_gain_BGB_IPCC_defaults_dir: [cn.pattern_annual_gain_BGB_IPCC_defaults], # 15 = 620 GB peak
        # cn.stdev_annual_gain_AGB_IPCC_defaults_dir: [cn.pattern_stdev_annual_gain_AGB_IPCC_defaults], # 15 = 620 GB peak
        #
        # # Annual removals from all forest types
        # cn.annual_gain_AGC_all_types_dir: [cn.pattern_annual_gain_AGC_all_types], # 15 = XXX GB peak
        # cn.annual_gain_BGC_all_types_dir: [cn.pattern_annual_gain_BGC_all_types], # 15 > 550 GB peak
        # cn.annual_gain_AGC_BGC_all_types_dir: [cn.pattern_annual_gain_AGC_BGC_all_types], # 15 = XXX GB peak
        # cn.removal_forest_type_dir: [cn.pattern_removal_forest_type], # 15 = XXX GB peak
        cn.stdev_annual_gain_AGC_all_types_dir:
        [cn.pattern_stdev_annual_gain_AGC_all_types],  # 15 = XXX GB peak

        # # Gain year count
        # cn.gain_year_count_dir: [cn.pattern_gain_year_count], # 15 = XXX GB peak

        # # Gross removals from all forest types
        # cn.cumul_gain_AGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_all_types], # 15 = 630 GB peak
        # cn.cumul_gain_BGCO2_all_types_dir: [cn.pattern_cumul_gain_BGCO2_all_types], # 15 = XXX GB peak
        # cn.cumul_gain_AGCO2_BGCO2_all_types_dir: [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types], # 15 = XXX GB peak

        # # Carbon pool inputs
        # cn.elevation_processed_dir: [cn.pattern_elevation],
        # cn.precip_processed_dir: [cn.pattern_precip],
        # cn.bor_tem_trop_processed_dir: [cn.pattern_bor_tem_trop_processed],
        # cn.drivers_processed_dir: [cn.pattern_drivers],
        # cn.climate_zone_processed_dir: [cn.pattern_climate_zone],
        # cn.soil_C_full_extent_2000_dir: [cn.pattern_soil_C_full_extent_2000], # 15 = 430 GB peak
        cn.stdev_soil_C_full_extent_2000_dir:
        [cn.pattern_stdev_soil_C_full_extent],

        # Carbon pools in emissions year
        # cn.AGC_emis_year_dir: [cn.pattern_AGC_emis_year], # 14 = 590 GB peak
        # cn.BGC_emis_year_dir: [cn.pattern_BGC_emis_year], # 14 = > 520 GB peak
        # cn.deadwood_emis_year_2000_dir: [cn.pattern_deadwood_emis_year_2000], # 14 > 560 GB peak (error memory when using 15, so switched to 14)
        # cn.litter_emis_year_2000_dir: [cn.pattern_litter_emis_year_2000], # 14 = XXX GB peak
        # cn.soil_C_emis_year_2000_dir: [cn.pattern_soil_C_emis_year_2000], # 14 = XXX GB peak
        # cn.total_C_emis_year_dir: [cn.pattern_total_C_emis_year], # 14 = XXX GB peak

        # # Carbon pools in 2000
        # cn.AGC_2000_dir: [cn.pattern_AGC_2000],
        # cn.BGC_2000_dir: [cn.pattern_BGC_2000],
        # cn.deadwood_2000_dir: [cn.pattern_deadwood_2000],
        # cn.litter_2000_dir: [cn.pattern_litter_2000],
        # cn.total_C_2000_dir: [cn.pattern_total_C_2000],

        # # Net flux
        # cn.net_flux_dir: [cn.pattern_net_flux],  # 14 = XXX GB peak
        #
        # # Gross emissions from biomass and soil
        # cn.gross_emis_all_gases_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_co2_only_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_non_co2_all_drivers_biomass_soil_dir: [cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_commod_biomass_soil_dir: [cn.pattern_gross_emis_commod_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_shifting_ag_biomass_soil_dir: [cn.pattern_gross_emis_shifting_ag_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_forestry_biomass_soil_dir: [cn.pattern_gross_emis_forestry_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_wildfire_biomass_soil_dir: [cn.pattern_gross_emis_wildfire_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_urban_biomass_soil_dir: [cn.pattern_gross_emis_urban_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_no_driver_biomass_soil_dir: [cn.pattern_gross_emis_no_driver_biomass_soil], # 14 = XXX GB peak
        # cn.gross_emis_nodes_biomass_soil_dir: [cn.pattern_gross_emis_nodes_biomass_soil], # 14 = XXX GB peak

        # Gross emissions from soil only
        cn.gross_emis_all_gases_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_soil_only],
        cn.gross_emis_co2_only_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_co2_only_all_drivers_soil_only],
        cn.gross_emis_non_co2_all_drivers_soil_only_dir:
        [cn.pattern_gross_emis_non_co2_all_drivers_soil_only],
        cn.gross_emis_commod_soil_only_dir:
        [cn.pattern_gross_emis_commod_soil_only],
        cn.gross_emis_shifting_ag_soil_only_dir:
        [cn.pattern_gross_emis_shifting_ag_soil_only]
        # cn.gross_emis_forestry_soil_only_dir: [cn.pattern_gross_emis_forestry_soil_only],
        # cn.gross_emis_wildfire_soil_only_dir: [cn.pattern_gross_emis_wildfire_soil_only],
        # cn.gross_emis_urban_soil_only_dir: [cn.pattern_gross_emis_urban_soil_only],
        # cn.gross_emis_no_driver_soil_only_dir: [cn.pattern_gross_emis_no_driver_soil_only],
        # cn.gross_emis_nodes_soil_only_dir: [cn.pattern_gross_emis_nodes_soil_only]
    }

    # Iterates through each set of tiles and gets statistics of it
    for key, values in download_dict.items():

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                tile_id_list)

        # List of all the tiles on the spot machine to be summarized (excludes pixel area tiles and tiles created by gdal_calc
        # (in case this script was already run on this spot machine and created output from gdal_calc)
        tile_list = uu.tile_list_spot_machine(".", ".tif")
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [
            i for i in tile_list
            if not ('hanson_2013' in i or 'value_per_pixel' in i)
        ]
        uu.print_log(tile_list)
        uu.print_log(
            "There are {} tiles to process".format(str(len(tile_list))) + "\n")

        # For multiprocessor use.
        processes = 14
        uu.print_log('Tile statistics max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(tile_statistics.create_tile_statistics,
                    sensit_type=sensit_type,
                    tile_stats_txt=tile_stats_txt), tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #     tile_statistics.create_tile_statistics(tile, sensit_type)

        # Copies the text file to the tile statistics folder on s3
        cmd = ['aws', 's3', 'cp', tile_stats_txt, cn.tile_stats_dir]
        uu.log_subprocess_output_full(cmd)

        # Spot machine can't store all the tiles, so this cleans it up
        uu.print_log("Deleting tiles...")
        for tile in tile_list:
            os.remove(tile)
            tile_short = tile[:-4]
            outname = '{0}_value_per_pixel.tif'.format(tile_short)
            os.remove(outname)
            uu.print_log("  {} deleted".format(tile))

    uu.print_log("Script complete. All tiles analyzed!")

Ejemplo n.º 3

Mostrar archivo

Archivo: mp_aggregate_results_to_4_km.py Proyecto: xiaotuxiaotu520/carbon-budget

def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )

Ejemplo n.º 4

Mostrar archivo

def main():

    # Create the output log
    uu.initiate_log()

    os.chdir(cn.docker_base_dir)

    # Files to download for this script.
    download_dict = {
        cn.gain_dir: [cn.pattern_gain],
        cn.annual_gain_AGB_IPCC_defaults_dir:
        [cn.pattern_annual_gain_AGB_IPCC_defaults]
    }

    # List of tiles that could be run. This list is only used to create the FIA region tiles if they don't already exist.
    tile_id_list = uu.tile_list_s3(cn.annual_gain_AGB_IPCC_defaults_dir)
    # tile_id_list = ["00N_000E", "00N_050W", "00N_060W", "00N_010E", "00N_020E", "00N_030E", "00N_040E", "10N_000E", "10N_010E", "10N_010W", "10N_020E", "10N_020W"] # test tiles
    # tile_id_list = ['50N_130W'] # test tiles

    # List of output directories and output file name patterns
    output_dir_list = [
        cn.US_annual_gain_AGB_natrl_forest_dir,
        cn.US_annual_gain_BGB_natrl_forest_dir
    ]
    output_pattern_list = [
        cn.pattern_US_annual_gain_AGB_natrl_forest,
        cn.pattern_US_annual_gain_BGB_natrl_forest
    ]

    # By definition, this script is for US-specific removals
    sensit_type = 'US_removals'

    # Counts how many processed FIA region tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_regions_tile_count = uu.count_tiles_s3(cn.FIA_regions_processed_dir)

    # Only creates FIA region tiles if they don't already exist on s3.
    if FIA_regions_tile_count == 16:
        uu.print_log("FIA region tiles already created. Copying to s3 now...")
        uu.s3_flexible_download(cn.FIA_regions_processed_dir,
                                cn.pattern_FIA_regions_processed,
                                cn.docker_base_dir, 'std', 'all')

    else:
        uu.print_log(
            "FIA region tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_regions_raw_dir, cn.name_FIA_regions_raw),
            cn.docker_base_dir, 'std')

        cmd = ['unzip', '-o', '-j', cn.name_FIA_regions_raw]
        # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        with process.stdout:
            uu.log_subprocess_output(process.stdout)

        # Converts the region shapefile to Hansen tiles
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(US_removal_rates.prep_FIA_regions, tile_id_list)

    # List of FIA region tiles on the spot machine. Only this list is used for the rest of the script.
    US_tile_list = uu.tile_list_spot_machine(
        cn.docker_base_dir, '{}.tif'.format(cn.pattern_FIA_regions_processed))
    US_tile_id_list = [i[0:8] for i in US_tile_list]
    # US_tile_id_list = ['50N_130W']    # For testing
    uu.print_log(US_tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(US_tile_id_list))) +
        "\n")

    # Counts how many processed forest age category tiles there are on s3 already. 16 tiles cover the continental US.
    US_age_tile_count = uu.count_tiles_s3(cn.US_forest_age_cat_processed_dir)

    # Only creates FIA forest age category tiles if they don't already exist on s3.
    if US_age_tile_count == 16:
        uu.print_log(
            "Forest age category tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.US_forest_age_cat_processed_dir,
                                cn.pattern_US_forest_age_cat_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "Southern forest age category tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.US_forest_age_cat_raw_dir,
                         cn.name_US_forest_age_cat_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest age category raster to Hansen tiles
        source_raster = cn.name_US_forest_age_cat_raw
        out_pattern = cn.pattern_US_forest_age_cat_processed
        dt = 'Int16'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.US_forest_age_cat_processed_dir,
                            cn.pattern_US_forest_age_cat_processed)

    # Counts how many processed FIA forest group tiles there are on s3 already. 16 tiles cover the continental US.
    FIA_forest_group_tile_count = uu.count_tiles_s3(
        cn.FIA_forest_group_processed_dir)

    # Only creates FIA forest group tiles if they don't already exist on s3.
    if FIA_forest_group_tile_count == 16:
        uu.print_log(
            "FIA forest group tiles already created. Copying to spot machine now..."
        )
        uu.s3_flexible_download(cn.FIA_forest_group_processed_dir,
                                cn.pattern_FIA_forest_group_processed, '',
                                'std', US_tile_id_list)

    else:
        uu.print_log(
            "FIA forest group tiles do not exist. Creating tiles, then copying to s3 for future use..."
        )
        uu.s3_file_download(
            os.path.join(cn.FIA_forest_group_raw_dir,
                         cn.name_FIA_forest_group_raw), cn.docker_base_dir,
            'std')

        # Converts the national forest group raster to Hansen forest group tiles
        source_raster = cn.name_FIA_forest_group_raw
        out_pattern = cn.pattern_FIA_forest_group_processed
        dt = 'Byte'
        pool = multiprocessing.Pool(int(cn.count / 2))
        pool.map(
            partial(uu.mp_warp_to_Hansen,
                    source_raster=source_raster,
                    out_pattern=out_pattern,
                    dt=dt), US_tile_id_list)

        uu.upload_final_set(cn.FIA_forest_group_processed_dir,
                            cn.pattern_FIA_forest_group_processed)

    # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list
    for key, values in download_dict.items():
        dir = key
        pattern = values[0]
        uu.s3_flexible_download(dir, pattern, cn.docker_base_dir, sensit_type,
                                US_tile_id_list)

    # Table with US-specific removal rates
    cmd = [
        'aws', 's3', 'cp',
        os.path.join(cn.gain_spreadsheet_dir, cn.table_US_removal_rate),
        cn.docker_base_dir
    ]

    # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging
    process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
    with process.stdout:
        uu.log_subprocess_output(process.stdout)

    # Imports the table with the region-group-age AGB removal rates
    gain_table = pd.read_excel("{}".format(cn.table_US_removal_rate),
                               sheet_name="US_rates_for_model")

    # Converts gain table from wide to long, so each region-group-age category has its own row
    gain_table_group_region_by_age = pd.melt(
        gain_table,
        id_vars=['FIA_region_code', 'forest_group_code'],
        value_vars=['growth_young', 'growth_middle', 'growth_old'])
    gain_table_group_region_by_age = gain_table_group_region_by_age.dropna()

    # In the forest age category raster, each category has this value
    age_dict = {
        'growth_young': 1000,
        'growth_middle': 2000,
        'growth_old': 3000
    }

    # Creates a unique value for each forest group-region-age category in the table.
    # Although these rates are applied to all standard gain model pixels at first, they are not ultimately used for
    # pixels that have Hansen gain (see below).
    gain_table_group_region_age = gain_table_group_region_by_age.replace(
        {"variable": age_dict})
    gain_table_group_region_age[
        'age_cat'] = gain_table_group_region_age['variable'] * 10
    gain_table_group_region_age['group_region_age_combined'] = gain_table_group_region_age['age_cat'] + \
                                              gain_table_group_region_age['forest_group_code']*100 + \
                                              gain_table_group_region_age['FIA_region_code']
    # Converts the forest group-region-age codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region-age code and the value is the AGB removal rate.
    gain_table_group_region_age_dict = pd.Series(
        gain_table_group_region_age.value.values,
        index=gain_table_group_region_age.group_region_age_combined).to_dict()
    uu.print_log(gain_table_group_region_age_dict)

    # Creates a unique value for each forest group-region category using just young forest rates.
    # These are assigned to Hansen gain pixels, which automatically get the young forest rate, regardless of the
    # forest age category raster.
    gain_table_group_region = gain_table_group_region_age.drop(
        gain_table_group_region_age[
            gain_table_group_region_age.age_cat != 10000].index)
    gain_table_group_region['group_region_combined'] = gain_table_group_region['forest_group_code']*100 + \
                                                       gain_table_group_region['FIA_region_code']
    # Converts the forest group-region codes and corresponding gain rates to a dictionary,
    # where the key is the unique group-region code (youngest age category) and the value is the AGB removal rate.
    gain_table_group_region_dict = pd.Series(
        gain_table_group_region.value.values,
        index=gain_table_group_region.group_region_combined).to_dict()
    uu.print_log(gain_table_group_region_dict)

    # count/2 on a m4.16xlarge maxes out at about 230 GB of memory (processing 16 tiles at once), so it's okay on an m4.16xlarge
    pool = multiprocessing.Pool(int(cn.count / 2))
    pool.map(
        partial(
            US_removal_rates.US_removal_rate_calc,
            gain_table_group_region_age_dict=gain_table_group_region_age_dict,
            gain_table_group_region_dict=gain_table_group_region_dict,
            output_pattern_list=output_pattern_list,
            sensit_type=sensit_type), US_tile_id_list)
    pool.close()
    pool.join()

    # # For single processor use
    # for tile_id in US_tile_id_list:
    #
    #     US_removal_rates.US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_group_region_dict,
    #                                           output_pattern_list, sensit_type)

    # Uploads output tiles to s3
    for i in range(0, len(output_dir_list)):
        uu.upload_final_set(output_dir_list[i], output_pattern_list[i])

Ejemplo n.º 5

Mostrar archivo

Archivo: mp_plantation_preparation.py Proyecto: elizabethgoldman/carbon-budget

def main():

    parser = argparse.ArgumentParser(
        description='Create planted forest carbon gain rate tiles')
    parser.add_argument(
        '--gadm-tile-index',
        '-gi',
        required=True,
        help=
        'Shapefile of 1x1 degree tiles of countries that contain planted forests (i.e. countries with planted forests rasterized to 1x1 deg). If no shapefile, write None.'
    )
    parser.add_argument(
        '--planted-tile-index',
        '-pi',
        required=True,
        help=
        'Shapefile of 1x1 degree tiles of that contain planted forests (i.e. planted forest extent rasterized to 1x1 deg). If no shapefile, write None.'
    )
    args = parser.parse_args()

    # Creates the directory and shapefile names for the two possible arguments (index shapefiles)
    gadm_index = os.path.split(args.gadm_tile_index)
    gadm_index_path = gadm_index[0]
    gadm_index_shp = gadm_index[1]
    gadm_index_shp = gadm_index_shp[:-4]
    planted_index = os.path.split(args.planted_tile_index)
    planted_index_path = planted_index[0]
    planted_index_shp = planted_index[1]
    planted_index_shp = planted_index_shp[:-4]

    # Checks the validity of the two arguments. If either one is invalid, the script ends.
    if (gadm_index_path not in cn.gadm_plant_1x1_index_dir
            or planted_index_path not in cn.gadm_plant_1x1_index_dir):
        raise Exception(
            'Invalid inputs. Please provide None or s3 shapefile locations for both arguments.'
        )

    # List of all possible 10x10 Hansen tiles except for those at very extreme latitudes (not just WHRC biomass tiles)
    total_tile_list = uu.tile_list(cn.pixel_area_dir)
    print "Number of possible 10x10 tiles to evaluate:", len(total_tile_list)

    # Removes the latitude bands that don't have any planted forests in them according to Liz Goldman.
    # i.e., Liz Goldman said by Slack on 1/2/19 that the nothernmost planted forest is 69.5146 and the southernmost is -46.938968.
    # This creates a more focused list of 10x10 tiles to iterate through (removes ones that definitely don't have planted forest).
    # NOTE: If the planted forest gdb is updated, the list of latitudes to exclude below may need to be changed to not exclude certain latitude bands.
    planted_lat_tile_list = [
        tile for tile in total_tile_list if '90N' not in tile
    ]
    planted_lat_tile_list = [
        tile for tile in planted_lat_tile_list if '80N' not in tile
    ]
    planted_lat_tile_list = [
        tile for tile in planted_lat_tile_list if '50S' not in tile
    ]
    planted_lat_tile_list = [
        tile for tile in planted_lat_tile_list if '60S' not in tile
    ]
    planted_lat_tile_list = [
        tile for tile in planted_lat_tile_list if '70S' not in tile
    ]
    planted_lat_tile_list = [
        tile for tile in planted_lat_tile_list if '80S' not in tile
    ]
    # planted_lat_tile_list = ['10N_080W']

    print planted_lat_tile_list
    print "Number of 10x10 tiles to evaluate after extreme latitudes have been removed:", len(
        planted_lat_tile_list)

    # If a planted forest extent 1x1 tile index shapefile isn't supplied
    if 'None' in args.planted_tile_index:

        ### Entry point 1:
        # If no shapefile of 1x1 tiles for countries with planted forests is supplied, 1x1 tiles of country extents will be created.
        # This runs the process from the very beginning and will take a few days.
        if 'None' in args.gadm_tile_index:

            print "No GADM 1x1 tile index shapefile provided. Creating 1x1 planted forest country tiles from scratch..."

            # Downloads and unzips the GADM shapefile, which will be used to create 1x1 tiles of land areas
            uu.s3_file_download(cn.gadm_path, '.')
            cmd = ['unzip', cn.gadm_zip]
            subprocess.check_call(cmd)

            # Creates a new GADM shapefile with just the countries that have planted forests in them.
            # This limits creation of 1x1 rasters of land area on the countries that have planted forests rather than on all countries.
            # NOTE: If the planted forest gdb is updated and has new countries added to it, the planted forest country list
            # in constants_and_names.py must be updated, too.
            print "Creating shapefile of countries with planted forests..."
            os.system(
                '''ogr2ogr -sql "SELECT * FROM gadm_3_6_adm2_final WHERE iso IN ({0})" {1} gadm_3_6_adm2_final.shp'''
                .format(str(cn.plantation_countries)[1:-1], cn.gadm_iso))

            # Creates 1x1 degree tiles of countries that have planted forests in them.
            # I assume this can handle using 50 processors because it's not trying to upload files to s3 and the tiles are small.
            # This takes several days to run because it iterates through at least 250 10x10 tiles.
            # For multiprocessor use.
            num_of_processes = 50
            pool = Pool(num_of_processes)
            pool.map(plantation_preparation.rasterize_gadm_1x1,
                     planted_lat_tile_list)
            pool.close()
            pool.join()

            # # Creates 1x1 degree tiles of countries that have planted forests in them.
            # # For single processor use.
            # for tile in planted_lat_tile_list:
            #
            #     plantation_preparation.rasterize_gadm_1x1(tile)

            # Creates a shapefile of the boundaries of the 1x1 GADM tiles in countries with planted forests
            os.system('''gdaltindex {0}_{1}.shp GADM_*.tif'''.format(
                cn.pattern_gadm_1x1_index, uu.date))
            cmd = [
                'aws', 's3', 'cp', '.', cn.gadm_plant_1x1_index_dir,
                '--exclude', '*', '--include',
                '{}*'.format(cn.pattern_gadm_1x1_index), '--recursive'
            ]
            subprocess.check_call(cmd)

            # # Saves the 1x1 country extent tiles to s3
            # # Only use if the entire process can't run in one go on the spot machine
            # cmd = ['aws', 's3', 'cp', '.', 's3://gfw2-data/climate/carbon_model/temp_spotmachine_output/', '--exclude', '*', '--include', 'GADM_*.tif', '--recursive']
            # subprocess.check_call(cmd)

            # Delete the aux.xml files
            os.system('''rm GADM*.tif.*''')

            # List of all 1x1 degree countey extent tiles created
            gadm_list_1x1 = uu.tile_list_spot_machine(".", "GADM_")
            print "List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1
            print len(gadm_list_1x1)

        ### Entry point 2:
        # If a shapefile of the boundaries of 1x1 degree tiles of countries with planted forests is supplied,
        # a list of the 1x1 tiles is created from the shapefile.
        # This avoids creating the 1x1 country extent tiles all over again because the relevant tile extent are supplied
        # in the shapefile.
        elif cn.gadm_plant_1x1_index_dir in args.gadm_tile_index:

            print "Country extent 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest tiles..."

            # Copies the shapefile of 1x1 tiles of extent of countries with planted forests
            cmd = [
                'aws', 's3', 'cp', '{}/'.format(gadm_index_path), '.',
                '--recursive', '--exclude', '*', '--include',
                '{}*'.format(gadm_index_shp), '--recursive'
            ]
            subprocess.check_call(cmd)

            # Gets the attribute table of the country extent 1x1 tile shapefile
            gadm = glob.glob('{}*.dbf'.format(cn.pattern_gadm_1x1_index))[0]

            # Converts the attribute table to a dataframe
            dbf = Dbf5(gadm)
            df = dbf.to_dataframe()

            # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
            gadm_list_1x1 = df['location'].tolist()
            gadm_list_1x1 = [str(y) for y in gadm_list_1x1]
            print "List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", gadm_list_1x1
            print "There are", len(
                gadm_list_1x1), "1x1 country extent tiles to iterate through."

        # In case some other arguments are provided
        else:

            raise Exception(
                'Invalid GADM tile index shapefile provided. Please provide a valid shapefile.'
            )

        # Creates 1x1 degree tiles of plantation growth wherever there are plantations.
        # Because this is iterating through all 1x1 tiles in countries with planted forests, it first checks
        # whether each 1x1 tile intersects planted forests before creating a 1x1 planted forest tile for that
        # 1x1 country extent tile.
        # For multiprocessor use
        num_of_processes = 30
        pool = Pool(num_of_processes)
        pool.map(plantation_preparation.create_1x1_plantation_from_1x1_gadm,
                 gadm_list_1x1)
        pool.close()
        pool.join()

        # # Creates 1x1 degree tiles of plantation growth wherever there are plantations
        # # For single processor use
        # for tile in gadm_list_1x1:
        #
        #     plantation_preparation.create_1x1_plantation(tile)

        # Creates a shapefile in which each feature is the extent of a plantation extent tile.
        # This index shapefile can be used the next time this process is run if starting with Entry Point 3.
        os.system('''gdaltindex {0}_{1}.shp plant_*.tif'''.format(
            cn.pattern_plant_1x1_index, uu.date))
        cmd = [
            'aws', 's3', 'cp', '.', cn.gadm_plant_1x1_index_dir, '--exclude',
            '*', '--include', '{}*'.format(cn.pattern_plant_1x1_index),
            '--recursive'
        ]
        subprocess.check_call(cmd)

    ### Entry point 3
    # If a shapefile of the extents of 1x1 planted forest tiles is provided
    if cn.pattern_plant_1x1_index in args.planted_tile_index:

        print "Planted forest 1x1 tile index shapefile supplied. Using that to create 1x1 planted forest growth tiles..."

        # Copies the shapefile of 1x1 tiles of extent of planted forests
        cmd = [
            'aws', 's3', 'cp', '{}/'.format(planted_index_path), '.',
            '--recursive', '--exclude', '*', '--include',
            '{}*'.format(planted_index_shp), '--recursive'
        ]
        subprocess.check_call(cmd)

        # Gets the attribute table of the planted forest extent 1x1 tile shapefile
        gadm = glob.glob('{}*.dbf'.format(cn.pattern_plant_1x1_index))[0]

        # Converts the attribute table to a dataframe
        dbf = Dbf5(gadm)
        df = dbf.to_dataframe()

        # Converts the column of the dataframe with the names of the tiles (which contain their coordinates) to a list
        planted_list_1x1 = df['location'].tolist()
        planted_list_1x1 = [str(y) for y in planted_list_1x1]
        print "List of 1x1 degree tiles in countries that have planted forests, with defining coordinate in the northwest corner:", planted_list_1x1
        print "There are", len(
            planted_list_1x1
        ), "1x1 planted forest extent tiles to iterate through."

        # Creates 1x1 degree tiles of plantation growth wherever there are plantations.
        # Because this is iterating through only 1x1 tiles that are known to have planted forests (from a previous run
        # of this script), it does not need to check whether there are planted forests in this tile. It goes directly
        # to intersecting the planted forest table with the 1x1 tile.
        # For multiprocessor use
        # This works with 30 processors on an r4.16xlarge.
        num_of_processes = 50
        pool = Pool(num_of_processes)
        pool.map(plantation_preparation.create_1x1_plantation_from_1x1_planted,
                 planted_list_1x1)
        pool.close()
        pool.join()

    ### All entry points meet here: creation of 10x10 degree planted forest tiles from 1x1 degree planted forest tiles

    # Name of the vrt of 1x1 planted forest tiles
    plant_1x1_vrt = 'plant_1x1.vrt'

    # Creates a mosaic of all the 1x1 plantation growth rate tiles
    print "Creating vrt of 1x1 plantation growth rate tiles"
    os.system('gdalbuildvrt {} plant_*.tif'.format(plant_1x1_vrt))

    # Creates 10x10 degree tiles of plantation growth by iterating over the pixel area tiles that are in latitudes with planted forests
    # For multiprocessor use
    num_of_processes = 20
    pool = Pool(num_of_processes)
    pool.map(
        partial(plantation_preparation.create_10x10_plantation,
                plant_1x1_vrt=plant_1x1_vrt), planted_lat_tile_list)
    pool.close()
    pool.join()