def create_supplementary_outputs(tile_id, input_pattern, output_patterns,
                                 sensit_type):

    # start time
    start = datetime.datetime.now()

    # Extracts the tile id, tile type, and bounding box for the tile
    tile_id = uu.get_tile_id(tile_id)

    # Names of inputs
    focal_tile = '{0}_{1}.tif'.format(tile_id, input_pattern)
    pixel_area = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id)
    tcd = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id)
    gain = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id)
    mangrove = '{0}_{1}.tif'.format(tile_id, cn.pattern_mangrove_biomass_2000)

    # Names of outputs.
    # Requires that output patterns be listed in main script in the correct order for here
    # (currently, per pixel full extent, per hectare forest extent, per pixel forest extent).
    per_pixel_full_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[0])
    per_hectare_forest_extent = '{0}_{1}.tif'.format(tile_id,
                                                     output_patterns[1])
    per_pixel_forest_extent = '{0}_{1}.tif'.format(tile_id, output_patterns[2])

    # Opens input tiles for rasterio
    in_src = rasterio.open(focal_tile)
    # Grabs metadata about the tif, like its location/projection/cellsize
    kwargs = in_src.meta
    # Grabs the windows of the tile (stripes) so we can iterate over the entire tif without running out of memory
    windows = in_src.block_windows(1)

    pixel_area_src = rasterio.open(pixel_area)
    tcd_src = rasterio.open(tcd)
    gain_src = rasterio.open(gain)

    try:
        mangrove_src = rasterio.open(mangrove)
        uu.print_log("    Mangrove tile found for {}".format(tile_id))
    except:
        uu.print_log("    No mangrove tile found for {}".format(tile_id))

    uu.print_log("  Creating outputs for {}...".format(focal_tile))

    kwargs.update(driver='GTiff',
                  count=1,
                  compress='lzw',
                  nodata=0,
                  dtype='float32')

    # Opens output tiles, giving them the arguments of the input tiles
    per_pixel_full_extent_dst = rasterio.open(per_pixel_full_extent, 'w',
                                              **kwargs)
    per_hectare_forest_extent_dst = rasterio.open(per_hectare_forest_extent,
                                                  'w', **kwargs)
    per_pixel_forest_extent_dst = rasterio.open(per_pixel_forest_extent, 'w',
                                                **kwargs)

    # Adds metadata tags to the output rasters

    uu.add_rasterio_tags(per_pixel_full_extent_dst, sensit_type)
    per_pixel_full_extent_dst.update_tags(
        units='Mg CO2e/pixel over model duration (2001-20{})'.format(
            cn.loss_years))
    per_pixel_full_extent_dst.update_tags(
        source='per hectare full model extent tile')
    per_pixel_full_extent_dst.update_tags(
        extent=
        'Full model extent: ((TCD2000>0 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations'
    )

    uu.add_rasterio_tags(per_hectare_forest_extent_dst, sensit_type)
    per_hectare_forest_extent_dst.update_tags(
        units='Mg CO2e/hectare over model duration (2001-20{})'.format(
            cn.loss_years))
    per_hectare_forest_extent_dst.update_tags(
        source='per hectare full model extent tile')
    per_hectare_forest_extent_dst.update_tags(
        extent=
        'Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations'
    )

    uu.add_rasterio_tags(per_pixel_forest_extent_dst, sensit_type)
    per_pixel_forest_extent_dst.update_tags(
        units='Mg CO2e/pixel over model duration (2001-20{})'.format(
            cn.loss_years))
    per_pixel_forest_extent_dst.update_tags(
        source='per hectare forest model extent tile')
    per_pixel_forest_extent_dst.update_tags(
        extent=
        'Forest extent: ((TCD2000>30 AND WHRC AGB2000>0) OR Hansen gain=1 OR mangrove AGB2000>0) NOT IN pre-2000 plantations'
    )

    if "net_flux" in focal_tile:
        per_pixel_full_extent_dst.update_tags(
            scale=
            'Negative values are net sinks. Positive values are net sources.')
        per_hectare_forest_extent_dst.update_tags(
            scale=
            'Negative values are net sinks. Positive values are net sources.')
        per_pixel_forest_extent_dst.update_tags(
            scale=
            'Negative values are net sinks. Positive values are net sources.')

    # Iterates across the windows of the input tiles
    for idx, window in windows:

        # Creates windows for each input tile
        in_window = in_src.read(1, window=window)
        pixel_area_window = pixel_area_src.read(1, window=window)
        tcd_window = tcd_src.read(1, window=window)
        gain_window = gain_src.read(1, window=window)

        try:
            mangrove_window = mangrove_src.read(1, window=window)
        except:
            mangrove_window = np.zeros((window.height, window.width),
                                       dtype='uint8')

        # Output window for per pixel full extent raster
        dst_window_per_pixel_full_extent = in_window * pixel_area_window / cn.m2_per_ha

        # Output window for per hectare forest extent raster
        # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni.
        dst_window_per_hectare_forest_extent = np.where(
            (tcd_window > cn.canopy_threshold) | (gain_window == 1) |
            (mangrove_window != 0), in_window, 0)

        # Output window for per pixel forest extent raster
        dst_window_per_pixel_forest_extent = dst_window_per_hectare_forest_extent * pixel_area_window / cn.m2_per_ha

        # Writes arrays to output raster
        per_pixel_full_extent_dst.write_band(1,
                                             dst_window_per_pixel_full_extent,
                                             window=window)
        per_hectare_forest_extent_dst.write_band(
            1, dst_window_per_hectare_forest_extent, window=window)
        per_pixel_forest_extent_dst.write_band(
            1, dst_window_per_pixel_forest_extent, window=window)

    uu.print_log("  Output tiles created for {}...".format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, output_patterns[0])
Ejemplo n.º 2
0
def rewindow(tile):

    # start time
    start = datetime.datetime.now()

    uu.print_log(
        "Rewindowing {} to 200x200 pixel windows (0.04 degree x 0.04 degree)..."
        .format(tile))

    # Extracts the tile id, tile type, and bounding box for the tile
    tile_id = uu.get_tile_id(tile)
    tile_type = uu.get_tile_type(tile)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # Raster name for 400x400 pixel tiles (intermediate output)
    input_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type)
    area_tile = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id)
    pixel_area_rewindow = '{0}_{1}_rewindow.tif'.format(
        cn.pattern_pixel_area, tile_id)
    tcd_tile = '{0}_{1}.tif'.format(cn.pattern_tcd, tile_id)
    tcd_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_tcd, tile_id)
    gain_tile = '{0}_{1}.tif'.format(cn.pattern_gain, tile_id)
    gain_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_gain, tile_id)
    mangrove_tile = '{0}_{1}.tif'.format(tile_id,
                                         cn.pattern_mangrove_biomass_2000)
    mangrove_tile_rewindow = '{0}_{1}_rewindow.tif'.format(
        tile_id, cn.pattern_mangrove_biomass_2000)

    # Only rewindows the necessary files if they haven't already been processed (just in case
    # this was run on the spot machine before)

    if not os.path.exists(input_rewindow):
        uu.print_log(
            "Model output for {} not rewindowed. Rewindowing...".format(
                tile_id))

        # Converts the tile of interest to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', tile, input_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    if not os.path.exists(tcd_rewindow):
        uu.print_log(
            "Canopy cover for {} not rewindowed. Rewindowing...".format(
                tile_id))

        # Converts the tcd tile to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', tcd_tile, tcd_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("Canopy cover for {} already rewindowed.".format(tile_id))

    if not os.path.exists(pixel_area_rewindow):
        uu.print_log(
            "Pixel area for {} not rewindowed. Rewindowing...".format(tile_id))

        # Converts the pixel area tile to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', area_tile, pixel_area_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("Pixel area for {} already rewindowed.".format(tile_id))

    if not os.path.exists(gain_rewindow):
        uu.print_log(
            "Hansen gain for {} not rewindowed. Rewindowing...".format(
                tile_id))

        # Converts the pixel area tile to the 400x400 pixel windows
        cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata', '0',
            '-te',
            str(xmin),
            str(ymin),
            str(xmax),
            str(ymax), '-tap', '-tr',
            str(cn.Hansen_res),
            str(cn.Hansen_res), '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=160',
            '-co', 'BLOCKYSIZE=160', gain_tile, gain_rewindow
        ]
        uu.log_subprocess_output_full(cmd)

    else:

        uu.print_log("Hansen gain for {} already rewindowed.".format(tile_id))

    if os.path.exists(mangrove_tile):
        uu.print_log(
            "Mangrove for {} not rewindowed. Rewindowing...".format(tile_id))

        if not os.path.exists(mangrove_tile_rewindow):

            # Converts the pixel area tile to the 400x400 pixel windows
            cmd = [
                'gdalwarp', '-co', 'COMPRESS=LZW', '-overwrite', '-dstnodata',
                '0', '-te',
                str(xmin),
                str(ymin),
                str(xmax),
                str(ymax), '-tap', '-tr',
                str(cn.Hansen_res),
                str(cn.Hansen_res), '-co', 'TILED=YES', '-co',
                'BLOCKXSIZE=160', '-co', 'BLOCKYSIZE=160', mangrove_tile,
                mangrove_tile_rewindow
            ]
            uu.log_subprocess_output_full(cmd)

        else:

            uu.print_log(
                "Mangrove tile for {} already rewindowed.".format(tile_id))
    else:

        uu.print_log("No mangrove tile found for {}".format(tile_id))

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, '{}_rewindow'.format(tile_type))
def mp_aggregate_results_to_4_km(sensit_type,
                                 thresh,
                                 tile_id_list,
                                 std_net_flux=None,
                                 run_date=None,
                                 no_upload=None):

    os.chdir(cn.docker_base_dir)

    # If a full model run is specified, the correct set of tiles for the particular script is listed
    if tile_id_list == 'all':
        # List of tiles to run in the model
        tile_id_list = uu.tile_list_s3(cn.net_flux_dir, sensit_type)

    uu.print_log(tile_id_list)
    uu.print_log(
        "There are {} tiles to process".format(str(len(tile_id_list))) + "\n")

    # Files to download for this script
    download_dict = {
        cn.annual_gain_AGC_all_types_dir:
        [cn.pattern_annual_gain_AGC_all_types],
        cn.cumul_gain_AGCO2_BGCO2_all_types_dir:
        [cn.pattern_cumul_gain_AGCO2_BGCO2_all_types],
        cn.gross_emis_all_gases_all_drivers_biomass_soil_dir:
        [cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil],
        cn.net_flux_dir: [cn.pattern_net_flux]
    }

    # Checks whether the canopy cover argument is valid
    if thresh < 0 or thresh > 99:
        uu.exception_log(
            no_upload,
            'Invalid tcd. Please provide an integer between 0 and 99.')

    if uu.check_aws_creds():

        # Pixel area tiles-- necessary for calculating sum of pixels for any set of tiles
        uu.s3_flexible_download(cn.pixel_area_dir, cn.pattern_pixel_area,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        # Tree cover density, Hansen gain, and mangrove biomass tiles-- necessary for filtering sums to model extent
        uu.s3_flexible_download(cn.tcd_dir, cn.pattern_tcd, cn.docker_base_dir,
                                sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.gain_dir, cn.pattern_gain,
                                cn.docker_base_dir, sensit_type, tile_id_list)
        uu.s3_flexible_download(cn.mangrove_biomass_2000_dir,
                                cn.pattern_mangrove_biomass_2000,
                                cn.docker_base_dir, sensit_type, tile_id_list)

    uu.print_log("Model outputs to process are:", download_dict)

    # List of output directories. Modified later for sensitivity analysis.
    # Output pattern is determined later.
    output_dir_list = [cn.output_aggreg_dir]

    # If the model run isn't the standard one, the output directory is changed
    if sensit_type != 'std':
        uu.print_log(
            "Changing output directory and file name pattern based on sensitivity analysis"
        )
        output_dir_list = uu.alter_dirs(sensit_type, output_dir_list)

    # A date can optionally be provided by the full model script or a run of this script.
    # This replaces the date in constants_and_names.
    if run_date is not None:
        output_dir_list = uu.replace_output_dir_date(output_dir_list, run_date)

    # Iterates through the types of tiles to be processed
    for dir, download_pattern in list(download_dict.items()):

        download_pattern_name = download_pattern[0]

        # Downloads input files or entire directories, depending on how many tiles are in the tile_id_list, if AWS credentials are found
        if uu.check_aws_creds():

            uu.s3_flexible_download(dir, download_pattern_name,
                                    cn.docker_base_dir, sensit_type,
                                    tile_id_list)

        # Gets an actual tile id to use as a dummy in creating the actual tile pattern
        local_tile_list = uu.tile_list_spot_machine(cn.docker_base_dir,
                                                    download_pattern_name)
        sample_tile_id = uu.get_tile_id(local_tile_list[0])

        # Renames the tiles according to the sensitivity analysis before creating dummy tiles.
        # The renaming function requires a whole tile name, so this passes a dummy time name that is then stripped a few
        # lines later.
        tile_id = sample_tile_id  # a dummy tile id (but it has to be a real tile id). It is removed later.
        output_pattern = uu.sensit_tile_rename(sensit_type, tile_id,
                                               download_pattern_name)
        pattern = output_pattern[9:-4]

        # For sensitivity analysis runs, only aggregates the tiles if they were created as part of the sensitivity analysis
        if (sensit_type != 'std') & (sensit_type not in pattern):
            uu.print_log(
                "{} not a sensitivity analysis output. Skipping aggregation..."
                .format(pattern))
            uu.print_log("")

            continue

        # Lists the tiles of the particular type that is being iterates through.
        # Excludes all intermediate files
        tile_list = uu.tile_list_spot_machine(".", "{}.tif".format(pattern))
        # from https://stackoverflow.com/questions/12666897/removing-an-item-from-list-matching-a-substring
        tile_list = [i for i in tile_list if not ('hanson_2013' in i)]
        tile_list = [i for i in tile_list if not ('rewindow' in i)]
        tile_list = [i for i in tile_list if not ('0_4deg' in i)]
        tile_list = [i for i in tile_list if not ('.ovr' in i)]

        # tile_list = ['00N_070W_cumul_gain_AGCO2_BGCO2_t_ha_all_forest_types_2001_15_biomass_swap.tif']  # test tiles

        uu.print_log("There are {0} tiles to process for pattern {1}".format(
            str(len(tile_list)), download_pattern) + "\n")
        uu.print_log("Processing:", dir, "; ", pattern)

        # Converts the 10x10 degree Hansen tiles that are in windows of 40000x1 pixels to windows of 400x400 pixels,
        # which is the resolution of the output tiles. This will allow the 30x30 m pixels in each window to be summed.
        # For multiprocessor use. count/2 used about 400 GB of memory on an r4.16xlarge machine, so that was okay.
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 12  # 12 processors = XXX GB peak
            else:
                processes = 16  # 12 processors = 140 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Rewindow max processors=', processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.rewindow, no_upload=no_upload),
            tile_list)
        # Added these in response to error12: Cannot allocate memory error.
        # This fix was mentioned here: of https://stackoverflow.com/questions/26717120/python-cannot-allocate-memory-using-multiprocessing-pool
        # Could also try this: https://stackoverflow.com/questions/42584525/python-multiprocessing-debugging-oserror-errno-12-cannot-allocate-memory
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.rewindow(til, no_upload)

        # Converts the existing (per ha) values to per pixel values (e.g., emissions/ha to emissions/pixel)
        # and sums those values in each 400x400 pixel window.
        # The sum for each 400x400 pixel window is stored in a 2D array, which is then converted back into a raster at
        # 0.1x0.1 degree resolution (approximately 10m in the tropics).
        # Each pixel in that raster is the sum of the 30m pixels converted to value/pixel (instead of value/ha).
        # The 0.1x0.1 degree tile is output.
        # For multiprocessor use. This used about 450 GB of memory with count/2, it's okay on an r4.16xlarge
        if cn.count == 96:
            if sensit_type == 'biomass_swap':
                processes = 10  # 10 processors = XXX GB peak
            else:
                processes = 12  # 16 processors = 180 GB peak; 16 = XXX GB peak; 20 = >750 GB (maxed out)
        else:
            processes = 8
        uu.print_log('Conversion to per pixel and aggregate max processors=',
                     processes)
        pool = multiprocessing.Pool(processes)
        pool.map(
            partial(aggregate_results_to_4_km.aggregate,
                    thresh=thresh,
                    sensit_type=sensit_type,
                    no_upload=no_upload), tile_list)
        pool.close()
        pool.join()

        # # For single processor use
        # for tile in tile_list:
        #
        #     aggregate_results_to_4_km.aggregate(tile, thresh, sensit_type, no_upload)

        # Makes a vrt of all the output 10x10 tiles (10 km resolution)
        out_vrt = "{}_0_4deg.vrt".format(pattern)
        os.system('gdalbuildvrt -tr 0.04 0.04 {0} *{1}_0_4deg*.tif'.format(
            out_vrt, pattern))

        # Creates the output name for the 10km map
        out_pattern = uu.name_aggregated_output(download_pattern_name, thresh,
                                                sensit_type)
        uu.print_log(out_pattern)

        # Produces a single raster of all the 10x10 tiles (0.4 degree resolution)
        cmd = [
            'gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0',
            '-co', 'COMPRESS=LZW', '-tr', '0.04', '0.04', out_vrt,
            '{}.tif'.format(out_pattern)
        ]
        uu.log_subprocess_output_full(cmd)

        # Adds metadata tags to output rasters
        uu.add_universal_metadata_tags('{0}.tif'.format(out_pattern),
                                       sensit_type)

        # Units are different for annual removal factor, so metadata has to reflect that
        if 'annual_removal_factor' in out_pattern:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg aboveground carbon/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'scale=negative values are removals', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        else:
            cmd = [
                'gdal_edit.py', '-mo',
                'units=Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees',
                '-mo',
                'source=per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
                '-mo', 'extent=Global', '-mo',
                'treecover_density_threshold={0} (only model pixels with canopy cover > {0} are included in aggregation'
                .format(thresh), '{0}.tif'.format(out_pattern)
            ]
            uu.log_subprocess_output_full(cmd)

        # If no_upload flag is not activated, output is uploaded
        if not no_upload:

            uu.print_log("Tiles processed. Uploading to s3 now...")
            uu.upload_final_set(output_dir_list[0], out_pattern)

        # Cleans up the folder before starting on the next raster type
        vrtList = glob.glob('*vrt')
        for vrt in vrtList:
            os.remove(vrt)

        for tile_name in tile_list:
            tile_id = uu.get_tile_id(tile_name)
            # os.remove('{0}_{1}.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_rewindow.tif'.format(tile_id, pattern))
            os.remove('{0}_{1}_0_4deg.tif'.format(tile_id, pattern))

    # Compares the net flux from the standard model and the sensitivity analysis in two ways.
    # This does not work for compariing the raw outputs of the biomass_swap and US_removals sensitivity models because their
    # extents are different from the standard model's extent (tropics and US tiles vs. global).
    # Thus, in order to do this comparison, you need to clip the standard model net flux and US_removals net flux to
    # the outline of the US and clip the standard model net flux to the extent of JPL AGB2000.
    # Then, manually upload the clipped US_removals and biomass_swap net flux rasters to the spot machine and the
    # code below should work.
    if sensit_type not in [
            'std', 'biomass_swap', 'US_removals', 'legal_Amazon_loss'
    ]:

        if std_net_flux:

            uu.print_log(
                "Standard aggregated flux results provided. Creating comparison maps."
            )

            # Copies the standard model aggregation outputs to s3. Only net flux is used, though.
            uu.s3_file_download(std_net_flux, cn.docker_base_dir, sensit_type)

            # Identifies the standard model net flux map
            std_aggreg_flux = os.path.split(std_net_flux)[1]

            try:
                # Identifies the sensitivity model net flux map
                sensit_aggreg_flux = glob.glob(
                    'net_flux_Mt_CO2e_*{}*'.format(sensit_type))[0]

                uu.print_log("Standard model net flux:", std_aggreg_flux)
                uu.print_log("Sensitivity model net flux:", sensit_aggreg_flux)

            except:
                uu.print_log(
                    'Cannot do comparison. One of the input flux tiles is not valid. Verify that both net flux rasters are on the spot machine.'
                )

            uu.print_log(
                "Creating map of percent difference between standard and {} net flux"
                .format(sensit_type))
            aggregate_results_to_4_km.percent_diff(std_aggreg_flux,
                                                   sensit_aggreg_flux,
                                                   sensit_type, no_upload)

            uu.print_log(
                "Creating map of which pixels change sign and which stay the same between standard and {}"
                .format(sensit_type))
            aggregate_results_to_4_km.sign_change(std_aggreg_flux,
                                                  sensit_aggreg_flux,
                                                  sensit_type, no_upload)

            # If no_upload flag is not activated, output is uploaded
            if not no_upload:

                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_perc_diff)
                uu.upload_final_set(output_dir_list[0],
                                    cn.pattern_aggreg_sensit_sign_change)

        else:

            uu.print_log(
                "No standard aggregated flux results provided. Not creating comparison maps."
            )
Ejemplo n.º 4
0
def aggregate(tile, thresh, sensit_type):

    # start time
    start = datetime.datetime.now()

    # Extracts the tile id, tile type, and bounding box for the tile
    tile_id = uu.get_tile_id(tile)
    tile_type = uu.get_tile_type(tile)
    xmin, ymin, xmax, ymax = uu.coords(tile_id)

    # Name of inputs
    focal_tile_rewindow = '{0}_{1}_rewindow.tif'.format(tile_id, tile_type)
    pixel_area_rewindow = '{0}_{1}_rewindow.tif'.format(
        cn.pattern_pixel_area, tile_id)
    tcd_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_tcd, tile_id)
    gain_rewindow = '{0}_{1}_rewindow.tif'.format(cn.pattern_gain, tile_id)
    mangrove_rewindow = '{0}_{1}_rewindow.tif'.format(
        tile_id, cn.pattern_mangrove_biomass_2000)

    # Opens input tiles for rasterio
    in_src = rasterio.open(focal_tile_rewindow)
    pixel_area_src = rasterio.open(pixel_area_rewindow)
    tcd_src = rasterio.open(tcd_rewindow)
    gain_src = rasterio.open(gain_rewindow)

    try:
        mangrove_src = rasterio.open(mangrove_rewindow)
        uu.print_log("    Mangrove tile found for {}".format(tile_id))
    except:
        uu.print_log("    No mangrove tile found for {}".format(tile_id))

    uu.print_log("  Converting {} to per-pixel values...".format(tile))

    # Grabs the windows of the tile (stripes) in order to iterate over the entire tif without running out of memory
    windows = in_src.block_windows(1)

    #2D array in which the 0.05x0.05 deg aggregated sums will be stored
    sum_array = np.zeros([250, 250], 'float32')

    out_raster = "{0}_{1}_0_4deg.tif".format(tile_id, tile_type)

    # Iterates across the windows (400x400 30m pixels) of the input tile
    for idx, window in windows:

        # Creates windows for each input tile
        in_window = in_src.read(1, window=window)
        pixel_area_window = pixel_area_src.read(1, window=window)
        tcd_window = tcd_src.read(1, window=window)
        gain_window = gain_src.read(1, window=window)

        try:
            mangrove_window = mangrove_src.read(1, window=window)
        except:
            mangrove_window = np.zeros((window.height, window.width),
                                       dtype='uint8')

        # Applies the tree cover density threshold to the 30x30m pixels
        if thresh > 0:

            # QCed this line before publication and then again afterwards in response to question from Lena Schulte-Uebbing at Wageningen Uni.
            in_window = np.where((tcd_window > thresh) | (gain_window == 1) |
                                 (mangrove_window != 0), in_window, 0)

        # Calculates the per-pixel value from the input tile value (/ha to /pixel)
        per_pixel_value = in_window * pixel_area_window / cn.m2_per_ha

        # Sums the pixels to create a total value for the 0.1x0.1 deg pixel
        non_zero_pixel_sum = np.sum(per_pixel_value)

        # Stores the resulting value in the array
        sum_array[idx[0], idx[1]] = non_zero_pixel_sum

    # Converts the annual carbon gain values annual gain in megatonnes and makes negative (because removals are negative)
    if cn.pattern_annual_gain_AGC_all_types in tile_type:
        sum_array = sum_array / cn.tonnes_to_megatonnes * -1

    # Converts the cumulative CO2 gain values to annualized CO2 in megatonnes and makes negative (because removals are negative)
    if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types in tile_type:
        sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes * -1

    # # Converts the cumulative gross emissions CO2 only values to annualized gross emissions CO2e in megatonnes
    # if cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil in tile_type:
    #     sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes
    #
    # # Converts the cumulative gross emissions non-CO2 values to annualized gross emissions CO2e in megatonnes
    # if cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil in tile_type:
    #     sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes

    # Converts the cumulative gross emissions all gases CO2e values to annualized gross emissions CO2e in megatonnes
    if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil in tile_type:
        sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes

    # Converts the cumulative net flux CO2 values to annualized net flux CO2 in megatonnes
    if cn.pattern_net_flux in tile_type:
        sum_array = sum_array / cn.loss_years / cn.tonnes_to_megatonnes

    uu.print_log("  Creating aggregated tile for {}...".format(tile))

    # Converts array to the same output type as the raster that is created below
    sum_array = np.float32(sum_array)

    # Creates a tile at 0.04x0.04 degree resolution (approximately 10x10 km in the tropics) where the values are
    # from the 2D array created by rasterio above
    # https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster
    with rasterio.open(out_raster,
                       'w',
                       driver='GTiff',
                       compress='lzw',
                       nodata='0',
                       dtype='float32',
                       count=1,
                       height=250,
                       width=250,
                       crs='EPSG:4326',
                       transform=from_origin(xmin, ymax, 0.04,
                                             0.04)) as aggregated:
        aggregated.write(sum_array, 1)
        ### I don't know why, but update_tags() is adding the tags to the raster but not saving them.
        ### That is, the tags are printed but not showing up when I do gdalinfo on the raster.
        ### Instead, I'm using gdal_edit
        # print(aggregated)
        # aggregated.update_tags(a="1")
        # print(aggregated.tags())
        # uu.add_rasterio_tags(aggregated, sensit_type)
        # print(aggregated.tags())
        # if cn.pattern_annual_gain_AGC_all_types in tile_type:
        #     aggregated.update_tags(units='Mg aboveground carbon/pixel, where pixels are 0.04x0.04 degrees)',
        #                     source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
        #                     extent='Global',
        #                     treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh))
        # if cn.pattern_cumul_gain_AGCO2_BGCO2_all_types:
        #     aggregated.update_tags(units='Mg CO2/yr/pixel, where pixels are 0.04x0.04 degrees)',
        #                     source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
        #                     extent='Global',
        #                     treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh))
        # # if cn.pattern_gross_emis_co2_only_all_drivers_biomass_soil in tile_type:
        # #     aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)',
        # #                     source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
        # #                     extent='Global', gases_included='CO2 only',
        # #                     treecover_density_threshold = '{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh))
        # # if cn.pattern_gross_emis_non_co2_all_drivers_biomass_soil in tile_type:
        # #     aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)',
        # #                     source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
        # #                     extent='Global', gases_included='CH4, N20',
        # #                     treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh))
        # if cn.pattern_gross_emis_all_gases_all_drivers_biomass_soil in tile_type:
        #     aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)',
        #                     source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
        #                     extent='Global',
        #                     treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh))
        # if cn.pattern_net_flux in tile_type:
        #     aggregated.update_tags(units='Mg CO2e/yr/pixel, where pixels are 0.04x0.04 degrees)',
        #                     scale='Negative values are net sinks. Positive values are net sources.',
        #                     source='per hectare version of the same model output, aggregated from 0.00025x0.00025 degree pixels',
        #                     extent='Global',
        #                     treecover_density_threshold='{0} (only model pixels with canopy cover > {0} are included in aggregation'.format(thresh))
        # print(aggregated.tags())
        # aggregated.close()

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, '{}_0_4deg'.format(tile_type))
Ejemplo n.º 5
0
def create_tile_statistics(tile):

    # Extracts the tile id from the full tile name
    tile_id = uu.get_tile_id(tile)

    print "Calculating tile statistics for {0}, tile id {1}...".format(
        tile, tile_id)

    # start time
    start = datetime.datetime.now()

    # Source: http://gis.stackexchange.com/questions/90726
    # Opens raster we're getting statistics on
    focus_tile = gdal.Open(tile)

    nodata = uu.get_raster_nodata_value(tile)
    print "NoData value =", nodata

    # Turns the raster into a numpy array
    tile_array = np.array(focus_tile.GetRasterBand(1).ReadAsArray())

    # Flattens the numpy array to a single dimension
    tile_array_flat = tile_array.flatten()

    # Removes NoData values from the array. NoData are generally either 0 or -9999.
    tile_array_flat_mask = tile_array_flat[tile_array_flat != nodata]

    ### For converting value/hectare to value/pixel
    # Tile with the area of each pixel in m2
    area_tile = '{0}_{1}.tif'.format(cn.pattern_pixel_area, tile_id)

    # Output file name
    tile_short = tile[:-4]
    outname = '{0}_value_per_pixel.tif'.format(tile_short)

    # Equation argument for converting emissions from per hectare to per pixel.
    # First, multiplies the per hectare emissions by the area of the pixel in m2, then divides by the number of m2 in a hectare.
    calc = '--calc=A*B/{}'.format(cn.m2_per_ha)

    # Argument for outputting file
    out = '--outfile={}'.format(outname)

    print "Converting {} from /ha to /pixel...".format(tile)
    cmd = [
        'gdal_calc.py', '-A', tile, '-B', area_tile, calc, out,
        '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--overwrite'
    ]
    subprocess.check_call(cmd)
    print "{} converted to /pixel".format(tile)

    print "Converting value/pixel tile {} to numpy array...".format(tile)
    # Opens raster with value per pixel
    value_per_pixel = gdal.Open(outname)

    # Turns the pixel area raster into a numpy array
    value_per_pixel_array = np.array(
        value_per_pixel.GetRasterBand(1).ReadAsArray())

    # Flattens the pixel area numpy array to a single dimension
    value_per_pixel_array_flat = value_per_pixel_array.flatten()

    print "Converted {} to numpy array".format(tile)

    # Empty statistics list
    stats = [None] * 13

    # Calculates basic tile info
    stats[0] = tile[9:-4]
    stats[1] = tile_id
    stats[2] = tile
    stats[3] = tile_array_flat_mask.size

    # If there are no pixels with values in the tile (as determined by the length of the array when NoData values are removed),
    # the statistics are all N/A.
    if stats[3] == 0:

        stats[4] = "N/A"
        stats[5] = "N/A"
        stats[6] = "N/A"
        stats[7] = "N/A"
        stats[8] = "N/A"
        stats[9] = "N/A"
        stats[10] = "N/A"
        stats[11] = "N/A"
        stats[12] = "N/A"

    # If there are pixels with values in the tile, the following statistics are calculated
    else:

        stats[4] = np.mean(tile_array_flat_mask, dtype=np.float64)
        stats[5] = np.median(tile_array_flat_mask)
        stats[6] = np.percentile(tile_array_flat_mask, 10)
        stats[7] = np.percentile(tile_array_flat_mask, 25)
        stats[8] = np.percentile(tile_array_flat_mask, 75)
        stats[9] = np.percentile(tile_array_flat_mask, 90)
        stats[10] = np.amin(tile_array_flat_mask)
        stats[11] = np.amax(tile_array_flat_mask)
        stats[12] = np.sum(value_per_pixel_array_flat)

    stats_no_brackets = ', '.join(map(str, stats))

    print stats_no_brackets

    # Adds the tile's statistics to the txt file
    with open(cn.tile_stats, 'a+') as f:
        f.write(stats_no_brackets + '\r\n')
    f.close()

    # Prints information about the tile that was just processed
    uu.end_of_fx_summary(start, tile_id, stats[0])