Example #1
0
def calculate_percentile(raster_path, percentiles_list, workspace_dir,
                         result_pickle_path):
    """Calculate the percentile cutoffs of a given raster. Store in json.

    Parameters:
        raster_path (str): path to raster to calculate over.
        percentiles_list (list): sorted list of increasing percentile
            cutoffs to calculate.
        workspace_dir (str): path to a directory where this function can
            create a temporary directory to work in.
        result_pickle_path (path): path to .json file that will store
            a list of percentile threshold values in the same position in
            `percentile_list`.

    Returns:
        None.

    """
    churn_dir = tempfile.mkdtemp(dir=workspace_dir)
    LOGGER.debug('processing percentiles for %s', raster_path)
    heap_size = 2**28
    ffi_buffer_size = 2**10
    percentile_values_list = pygeoprocessing.raster_band_percentile(
        (raster_path, 1), churn_dir, percentiles_list, heap_size,
        ffi_buffer_size)
    with open(result_pickle_path, 'wb') as pickle_file:
        pickle.dump(percentile_values_list, pickle_file)
        pickle_file.flush()
    shutil.rmtree(churn_dir)
Example #2
0
def calculate_percentile(
        raster_path, percentiles_list, workspace_dir, result_pickle_path):
    """Calculate the percentile cutoffs of a given raster. Store in json.

    Parameters:
        raster_path (str): path to raster to calculate over.
        percentiles_list (list): sorted list of increasing percentile
            cutoffs to calculate.
        workspace_dir (str): path to a directory where this function can
            create a temporary directory to work in.
        result_pickle_path (path): path to .json file that will store
            "percentiles_list" -- original value of perentile_list
            "percentile_values_list" -- list of percentile threshold values
                in the same position in `percentile_list`.
            "percentile_sums_list" -- sum of all values up to the given
                percentile in the same position in `percentile_list`.

    Returns:
        None.

    """
    churn_dir = tempfile.mkdtemp(dir=workspace_dir)
    LOGGER.debug('processing percentiles for %s', raster_path)
    heap_size = 2**28
    ffi_buffer_size = 2**10
    result_dict = {
        'percentiles_list': percentiles_list,
        'percentile_sum_list': [0.] * len(percentiles_list),
        'percentile_values_list': pygeoprocessing.raster_band_percentile(
            (raster_path, 1), churn_dir, percentiles_list,
            heap_size, ffi_buffer_size)
    }
    LOGGER.debug('intermediate result_dict: %s', str(result_dict))
    LOGGER.debug('processing percentile sums for %s', raster_path)
    nodata_value = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
    for _, block_data in pygeoprocessing.iterblocks((raster_path, 1)):
        nodata_mask = numpy.isclose(block_data, nodata_value)
        # this loop makes the block below a lot simpler
        for index, percentile_value in enumerate(
                result_dict['percentile_values_list']):
            mask = (block_data > percentile_value) & (~nodata_mask)
            result_dict['percentile_sum_list'][index] += (
                numpy.sum(block_data[mask]))

    LOGGER.debug(
        'pickling percentile results of %s to %s', raster_path,
        result_pickle_path)
    with open(result_pickle_path, 'wb') as pickle_file:
        pickle_file.write(pickle.dumps(result_dict))
    shutil.rmtree(churn_dir)
def main():
    """Write your expression here."""

    path = r"C:\Users\Becky\Documents\carbon_edge_model\error_regression_baccini_biomass.tif"
    percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\percentile_working_dir"
    #makes a temporary directory because there's a shitton of rasters to find out the percentiles
    try:
        os.makedirs(percentile_working_dir)
    except OSError:
        pass
        #checks to see if the directory already exists, if it doesn't it makes it, if it does it doesn't do anything
    percentile_values_list = pygeoprocessing.raster_band_percentile(
        #(path, 1), percentile_working_dir, [0, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 99, 99.5, 99.9, 99.99, 99.999, 100] )
        (path, 1),
        percentile_working_dir,
        list(range(0, 101, 1)))
    # (path,1) is indicating the first band in that "path" raster; the 2nd argument is the working dir; the third is the list of percentiles we want
    shutil.rmtree(percentile_working_dir)
    #this gets rid of that termporary directory
    print(percentile_values_list)
def calculate_percentiles(
        raster_path, percentile_list, target_percentile_pickle_path):
    """Calculate percentiles and save to a pickle file.

    Parameters:
        raster_path (str): path to raster.
        percentile_list (list): list of increasing order percentile thresholds
            between the ranges 0-100.
        target_percentile_pickle_path (str): the result of the percentile
            function will be saved in a list that is pickled in this file.

    Returns:
        None.

    """
    working_dir = os.path.dirname(target_percentile_pickle_path)
    heapfile_dir = tempfile.mkdtemp(dir=working_dir)
    percentile_values = pygeoprocessing.raster_band_percentile(
        (raster_path, 1), heapfile_dir, percentile_list)
    with open(target_percentile_pickle_path, 'wb') as pickle_file:
        pickle.dump(percentile_values, pickle_file)
    shutil.rmtree(heapfile_dir)
Example #5
0
def _calculate_visual_quality(source_raster_path, working_dir, target_path):
    """Calculate visual quality based on a raster.

    Visual quality is based on the nearest-rank method for breaking pixel
    values from the source raster into percentiles.

    Args:
        source_raster_path (string): The path to a raster from which
            percentiles should be calculated. Nodata values and pixel values
            of 0 are ignored.
        working_dir (string): A directory where working files can be saved.
            A new temporary directory will be created within. This new
            temporary directory will be removed at the end of the function.
        target_path (string): The path to where the output raster will be
            written.

    Returns:
        ``None``

    """
    # Using the nearest-rank method.
    LOGGER.info('Calculating visual quality')

    raster_info = pygeoprocessing.get_raster_info(source_raster_path)
    raster_nodata = raster_info['nodata'][0]

    temp_dir = tempfile.mkdtemp(dir=working_dir,
                                prefix='visual_quality')

    # phase 1: calculate percentiles from the visible_structures raster
    LOGGER.info('Determining percentiles for %s',
                os.path.basename(source_raster_path))

    def _mask_zeros(valuation_matrix):
        """Assign zeros to nodata, excluding them from percentile calc."""
        valid_mask = ~numpy.isclose(valuation_matrix, 0.0)
        if raster_nodata is not None:
            valid_mask &= ~utils.array_equals_nodata(
                valuation_matrix, raster_nodata)
        visual_quality = numpy.empty(valuation_matrix.shape,
                                     dtype=numpy.float64)
        visual_quality[:] = _VALUATION_NODATA
        visual_quality[valid_mask] = valuation_matrix[valid_mask]
        return visual_quality

    masked_raster_path = os.path.join(temp_dir, 'zeros_masked.tif')
    pygeoprocessing.raster_calculator(
        [(source_raster_path, 1)], _mask_zeros, masked_raster_path,
        gdal.GDT_Float64, _VALUATION_NODATA,
        raster_driver_creation_tuple=FLOAT_GTIFF_CREATION_OPTIONS)

    percentile_values = pygeoprocessing.raster_band_percentile(
        (masked_raster_path, 1), temp_dir, [0., 25., 50., 75.])

    shutil.rmtree(temp_dir, ignore_errors=True)

    # Phase 2: map values to their bins to indicate visual quality.
    percentile_bins = numpy.array(percentile_values)
    LOGGER.info('Mapping percentile breaks %s', percentile_bins)

    def _map_percentiles(valuation_matrix):
        nonzero = (valuation_matrix != 0)
        nodata = utils.array_equals_nodata(valuation_matrix, raster_nodata)
        valid_indexes = (~nodata & nonzero)
        visual_quality = numpy.empty(valuation_matrix.shape,
                                     dtype=numpy.int8)
        visual_quality[:] = _BYTE_NODATA
        visual_quality[~nonzero & ~nodata] = 0
        visual_quality[valid_indexes] = numpy.digitize(
            valuation_matrix[valid_indexes], percentile_bins)
        return visual_quality

    pygeoprocessing.raster_calculator(
        [(source_raster_path, 1)], _map_percentiles, target_path,
        gdal.GDT_Byte, _BYTE_NODATA,
        raster_driver_creation_tuple=BYTE_GTIFF_CREATION_OPTIONS)
def _evaluate_expression(processed_raster_list_file_path,
                         symbol_to_path_band_map, args, workspace_dir):
    """Evaluate expression once rasters have been processed."""
    LOGGER.debug(processed_raster_list_file_path)
    with open(processed_raster_list_file_path,
              'rb') as (processed_raster_list_file):
        processed_raster_path_list = pickle.load(processed_raster_list_file)

    for symbol, raster_path in zip(symbol_to_path_band_map,
                                   processed_raster_path_list):
        path_band_id = symbol_to_path_band_map[symbol][1]
        symbol_to_path_band_map[symbol] = (raster_path, path_band_id)

    # this sets a common target sr, pixel size, and resample method .
    args.update({
        'churn_dir': workspace_dir,
        'symbol_to_path_band_map': symbol_to_path_band_map,
    })
    del args['symbol_to_path_map']
    if 'build_overview' in args:
        del args['build_overview']

    default_nan = None
    default_inf = None
    if 'default_nan' in args:
        default_nan = args['default_nan']
    if 'default_inf' in args:
        default_inf = args['default_inf']

    expression = args['expression']
    # search for percentile functions
    match_obj = re.match(r'(.*)(percentile\(([^,]*), ([^)]*)\))(.*)',
                         expression)
    if match_obj:
        base_raster_path_band = args['symbol_to_path_band_map'][
            match_obj.group(3)]
        percentile_threshold = float(match_obj.group(4))
        working_sort_directory = tempfile.mkdtemp(dir=workspace_dir)
        LOGGER.debug('doing percentile of %s to %s', base_raster_path_band,
                     percentile_threshold)
        percentile_val = pygeoprocessing.raster_band_percentile(
            base_raster_path_band, working_sort_directory,
            [percentile_threshold])[0]
        expression = '%s%f%s' % (match_obj.group(1), percentile_val,
                                 match_obj.group(5))
        LOGGER.debug('new expression: %s', expression)

    if not expression.startswith('mask(raster'):
        pygeoprocessing.symbolic.evaluate_raster_calculator_expression(
            expression,
            args['symbol_to_path_band_map'],
            args['target_nodata'],
            args['target_raster_path'],
            default_nan=default_nan,
            default_inf=default_inf)
    else:
        # parse out array
        arg_list = expression.split(',')
        # the first 1 to n-1 args must be integers
        mask_val_list = [int(val) for val in arg_list[1:-1]]
        # the last argument could be 'invert=?'
        if 'invert' in arg_list[-1]:
            invert = 'True' in arg_list[-1]
        else:
            # if it's not, it'll be another integer
            mask_val_list.append(int(arg_list[-1][:-1]))
            invert = False
        LOGGER.debug('mask raster %s by %s -> %s' %
                     (symbol_to_path_band_map['raster'], str(mask_val_list),
                      args['target_raster_path']))
        mask_raster_by_array(symbol_to_path_band_map['raster'],
                             numpy.array(mask_val_list),
                             args['target_raster_path'], invert)
def main():
    """Entry point."""
    #for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]:
    #    try:
    #        os.makedirs(dir_path)
    #    except OSError:
    #        pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0)
    world_borders_path = os.path.join(
        WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL))
    download_wb_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WORLD_BORDERS_URL, world_borders_path),
        target_path_list=[world_borders_path],
        task_name='download world borders')
    raster_path = os.path.join(WORKSPACE_DIR, os.path.basename(RASTER_URL))
    download_raster_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(RASTER_URL, raster_path),
        target_path_list=[raster_path],
        task_name='download raster')

    #world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR)
    #world_borders_layer = world_borders_vector.GetLayer()
    #wgs84_srs = osr.SpatialReference()
    #wgs84_srs.ImportFromEPSG(4326)

    # mask out everything that's not a country
    masked_raster_path = os.path.join(
        WORKSPACE_DIR, '%s_masked.%s' % os.path.splitext(
            os.path.basename(raster_path)))
    # we need to define this because otherwise no nodata value is defined
    mask_nodata = -1
    mask_task = task_graph.add_task(
        func=pygeoprocessing.mask_raster,
        args=(
            (raster_path, 1), world_borders_path, masked_raster_path),
        kwargs={
            'raster_driver_creation_tuple': GTIFF_CREATION_TUPLE_OPTIONS,
            'target_mask_value': mask_nodata,
        },
        target_path_list=[masked_raster_path],
        dependent_task_list=[download_wb_task, download_raster_task],
        task_name='mask raster')

    download_raster_task.join()
    raster_info = pygeoprocessing.get_raster_info(raster_path)
    country_name = "Global"

    country_threshold_table_path = os.path.join(
        WORKSPACE_DIR, 'country_threshold.csv')
    country_threshold_table_file = open(country_threshold_table_path, 'w')
    country_threshold_table_file.write('country,percentile at 90% max,pixel count\n')

    target_percentile_pickle_path = os.path.join(
        WORKSPACE_DIR, '%s.pkl' % (
            os.path.basename(os.path.splitext(raster_path)[0])))
    calculate_percentiles_task = task_graph.add_task(
        func=calculate_percentiles,
        args=(
            raster_path, PERCENTILE_LIST, target_percentile_pickle_path),
        target_path_list=[target_percentile_pickle_path],
        dependent_task_list=[mask_task],
        task_name='calculate percentiles')
    calculate_percentiles_task.join()
    with open(target_percentile_pickle_path, 'rb') as pickle_file:
        percentile_values = pickle.load(pickle_file)
    LOGGER.debug(
        "len percentile_values: %d len PERCENTILE_LIST: %d",
        len(percentile_values), len(PERCENTILE_LIST))

    cdf_array = [0.0] * len(percentile_values)

    raster_info = pygeoprocessing.get_raster_info(raster_path)
    nodata = raster_info['nodata'][0]
    valid_pixel_count = 0
    total_pixel_count = 0
    total_pixels = (
        raster_info['raster_size'][0] * raster_info['raster_size'][1])
    for _, data_block in pygeoprocessing.iterblocks(
            (raster_path, 1), largest_block=2**28):
        nodata_mask = ~numpy.isclose(data_block, nodata)
        nonzero_count = numpy.count_nonzero(nodata_mask)
        if nonzero_count == 0:
            continue
        valid_pixel_count += numpy.count_nonzero(nodata_mask)
        for index, percentile_value in enumerate(percentile_values):
            cdf_array[index] += numpy.sum((data_block[
                nodata_mask & (data_block >= percentile_value)]).astype(
                    numpy.float32))
        total_pixel_count += data_block.size
        LOGGER.debug('%.2f%% complete', (100.0*total_pixel_count)/total_pixels)
        LOGGER.debug('current cdf array: %s', cdf_array)
        # threshold is at 90% says Becky
    threshold_limit = 0.9 * cdf_array[2]

    LOGGER.debug(cdf_array)
    fig, ax = matplotlib.pyplot.subplots()
    ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
    f = scipy.interpolate.interp1d(
        cdf_array, list(reversed(PERCENTILE_LIST)))
    try:
        cdf_threshold = f(threshold_limit)
    except ValueError:
        LOGGER.exception(
            "error when passing threshold_limit: %s\ncdf_array: %s" % (
                threshold_limit, cdf_array))
        cdf_threshold = cdf_array[2]

    ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
    ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

    ax.grid(True, linestyle='-.')
    ax.set_title(
        '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count))
    ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(raster_path))
    ax.set_ylabel('100-percentile')
    ax.tick_params(labelcolor='r', labelsize='medium', width=3)
    matplotlib.pyplot.autoscale(enable=True, tight=True)
    matplotlib.pyplot.savefig(
        os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
    country_threshold_table_file.write(
        '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count))
    country_threshold_table_file.flush()
    country_threshold_table_file.close()

    return

    for world_border_feature in world_borders_layer:
        country_name = world_border_feature.GetField('nev_name')
        country_name= country_name.replace('.','')
        LOGGER.debug(country_name)
        country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name)
        try:
            os.makedirs(country_workspace)
        except OSError:
            pass

        country_vector = os.path.join(
            country_workspace, '%s.gpkg' % country_name)
        country_vector_complete_token = os.path.join(
            country_workspace, '%s.COMPLETE' % country_name)
        extract_feature(
            world_borders_path, world_border_feature.GetFID(),
            wgs84_srs.ExportToWkt(), country_vector,
            country_vector_complete_token)

        country_raster_path = os.path.join(country_workspace, '%s_%s' % (
            country_name, os.path.basename(RASTER_PATH)))

        country_vector_info = pygeoprocessing.get_vector_info(country_vector)
        pygeoprocessing.warp_raster(
            RASTER_PATH, raster_info['pixel_size'], country_raster_path,
            'near', target_bb=country_vector_info['bounding_box'],
            vector_mask_options={'mask_vector_path': country_vector},
            working_dir=country_workspace)

        percentile_values = pygeoprocessing.raster_band_percentile(
            (country_raster_path, 1), country_workspace, PERCENTILE_LIST)
        if len(percentile_values) != len(PERCENTILE_LIST):
            continue
        LOGGER.debug(
            "len percentile_values: %d len PERCENTILE_LIST: %d",
            len(percentile_values), len(PERCENTILE_LIST))

        cdf_array = [0.0] * len(percentile_values)

        nodata = pygeoprocessing.get_raster_info(
            country_raster_path)['nodata'][0]
        valid_pixel_count = 0
        for _, data_block in pygeoprocessing.iterblocks(
                (country_raster_path, 1)):
            nodata_mask = ~numpy.isclose(data_block, nodata)
            valid_pixel_count += numpy.count_nonzero(nodata_mask)
            for index, percentile_value in enumerate(percentile_values):
                cdf_array[index] += numpy.sum(data_block[
                    nodata_mask & (data_block >= percentile_value)])

        # threshold is at 90% says Becky
        threshold_limit = 0.9 * cdf_array[2]

        LOGGER.debug(cdf_array)
        fig, ax = matplotlib.pyplot.subplots()
        ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
        f = scipy.interpolate.interp1d(
            cdf_array, list(reversed(PERCENTILE_LIST)))
        try:
            cdf_threshold = f(threshold_limit)
        except ValueError:
            LOGGER.exception(
                "error when passing threshold_limit: %s\ncdf_array: %s" % (
                    threshold_limit, cdf_array))
            cdf_threshold = cdf_array[2]

        ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
        ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

        ax.grid(True, linestyle='-.')
        ax.set_title(
            '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, valid_pixel_count))
        ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH))
        ax.set_ylabel('100-percentile')
        ax.tick_params(labelcolor='r', labelsize='medium', width=3)
        matplotlib.pyplot.autoscale(enable=True, tight=True)
        matplotlib.pyplot.savefig(
            os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
        country_threshold_table_file.write(
            '%s, %f, %d\n' % (country_name, cdf_threshold, valid_pixel_count))
        country_threshold_table_file.flush()
    country_threshold_table_file.close()
Example #8
0
def main():
    """Entry point."""
    for dir_path in [WORKSPACE_DIR, COUNTRY_WORKSPACES]:
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

    task_graph = taskgraph.TaskGraph(WORKSPACE_DIR, -1, 5.0)
    world_borders_path = os.path.join(
        WORKSPACE_DIR, os.path.basename(WORLD_BORDERS_URL))
    download_task = task_graph.add_task(
        func=ecoshard.download_url,
        args=(WORLD_BORDERS_URL, world_borders_path),
        target_path_list=[world_borders_path],
        task_name='download world borders')

    download_task.join()

    world_borders_vector = gdal.OpenEx(world_borders_path, gdal.OF_VECTOR)
    world_borders_layer = world_borders_vector.GetLayer()

    wgs84_srs = osr.SpatialReference()
    wgs84_srs.ImportFromEPSG(4326)

    raster_info = pygeoprocessing.get_raster_info(RASTER_PATH)

    country_threshold_table_path = os.path.join(
        WORKSPACE_DIR, 'country_threshold.csv')
    country_threshold_table_file = open(country_threshold_table_path, 'w')
    country_threshold_table_file.write('country,percentile at 90% max,pixel count\n')
    for world_border_feature in world_borders_layer:
        country_name = world_border_feature.GetField('NAME')
        if country_name != 'Canada':
            continue
        LOGGER.debug(country_name)
        country_workspace = os.path.join(COUNTRY_WORKSPACES, country_name)
        try:
            os.makedirs(country_workspace)
        except OSError:
            pass

        country_vector = os.path.join(
            country_workspace, '%s.gpkg' % country_name)
        country_vector_complete_token = os.path.join(
            country_workspace, '%s.COMPLETE' % country_name)
        extract_feature(
            world_borders_path, world_border_feature.GetFID(),
            wgs84_srs.ExportToWkt(), country_vector,
            country_vector_complete_token)

        country_raster_path = os.path.join(country_workspace, '%s_%s' % (
            country_name, os.path.basename(RASTER_PATH)))

        country_vector_info = pygeoprocessing.get_vector_info(country_vector)
        pygeoprocessing.warp_raster(
            RASTER_PATH, raster_info['pixel_size'], country_raster_path,
            'near', target_bb=country_vector_info['bounding_box'],
            vector_mask_options={'mask_vector_path': country_vector},
            working_dir=country_workspace)

        percentile_values = pygeoprocessing.raster_band_percentile(
            (country_raster_path, 1), country_workspace, PERCENTILE_LIST)
        if len(percentile_values) != len(PERCENTILE_LIST):
            continue
        LOGGER.debug(
            "len percentile_values: %d len PERCENTILE_LIST: %d",
            len(percentile_values), len(PERCENTILE_LIST))

        cdf_array = [0.0] * len(percentile_values)

        nodata = pygeoprocessing.get_raster_info(
            country_raster_path)['nodata'][0]
        pixel_count = 0
        for _, data_block in pygeoprocessing.iterblocks(
                (country_raster_path, 1)):
            nodata_mask = ~numpy.isclose(data_block, nodata)
            pixel_count += numpy.count_nonzero(nodata_mask)
            for index, percentile_value in enumerate(percentile_values):
                cdf_array[index] += numpy.sum(data_block[
                    nodata_mask & (data_block >= percentile_value)])

        # threshold is at 90% says Becky
        threshold_limit = 0.9 * cdf_array[2]

        LOGGER.debug(cdf_array)
        fig, ax = matplotlib.pyplot.subplots()
        ax.plot(list(reversed(PERCENTILE_LIST)), cdf_array)
        f = scipy.interpolate.interp1d(
            cdf_array, list(reversed(PERCENTILE_LIST)))
        try:
            cdf_threshold = f(threshold_limit)
        except ValueError:
            LOGGER.exception(
                "error when passing threshold_limit: %s\ncdf_array: %s" % (
                    threshold_limit, cdf_array))
            cdf_threshold = cdf_array[2]

        ax.plot([0, 100], [threshold_limit, threshold_limit], 'k:', linewidth=2)
        ax.plot([cdf_threshold, cdf_threshold], [cdf_array[0], cdf_array[-1]], 'k:', linewidth=2)

        ax.grid(True, linestyle='-.')
        ax.set_title(
            '%s CDF. 90%% max at %.2f and %.2f%%\nn=%d' % (country_name, threshold_limit, cdf_threshold, pixel_count))
        ax.set_ylabel('Sum of %s up to 100-percentile' % os.path.basename(RASTER_PATH))
        ax.set_ylabel('100-percentile')
        ax.tick_params(labelcolor='r', labelsize='medium', width=3)
        matplotlib.pyplot.autoscale(enable=True, tight=True)
        matplotlib.pyplot.savefig(
            os.path.join(COUNTRY_WORKSPACES, '%s_cdf.png' % country_name))
        country_threshold_table_file.write(
            '%s, %f, %d\n' % (country_name, cdf_threshold, pixel_count))
        country_threshold_table_file.flush()
    country_threshold_table_file.close()
Example #9
0
def main():

    # POTENTIAL
    # [0.0, 8.223874317755279e-18, 0.06352668319825519, 0.6784644064412253, 1.2982949910007597, 1.4329746715109062, 1.5756065342319365, 1.7761127919757702, 2.040984541853515, 2.344609197149186, 2.55102265792189, 2.8146687301480546, 5.87844488615983]
    path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_potential_ES_score_nspwng.tif"
    nodata_value = pygeoprocessing.get_raster_info(path)['nodata'][0]
    top2_sum = 0.0
    top5_sum = 0.0
    top10_sum = 0.0
    top20_sum = 0.0
    top30_sum = 0.0
    top40_sum = 0.0
    top50_sum = 0.0
    top60_sum = 0.0
    top70_sum = 0.0
    top80_sum = 0.0
    top90_sum = 0.0
    full_sum = 0.0

    for _, block_data in pygeoprocessing.iterblocks((path, 1)):
        nodata_mask = numpy.isclose(block_data, nodata_value)
        top2_mask = block_data > 2.8146687301480546
        top2_sum += numpy.sum(block_data[top2_mask & ~nodata_mask])
        top5_mask = block_data > 2.55102265792189
        top5_sum += numpy.sum(block_data[top5_mask & ~nodata_mask])
        top10_mask = block_data > 2.344609197149186
        top10_sum += numpy.sum(block_data[top10_mask & ~nodata_mask])
        top20_mask = block_data > 2.040984541853515
        top20_sum += numpy.sum(block_data[top20_mask & ~nodata_mask])
        top30_mask = block_data > 1.7761127919757702
        top30_sum += numpy.sum(block_data[top30_mask & ~nodata_mask])
        top40_mask = block_data > 1.5756065342319365
        top40_sum += numpy.sum(block_data[top40_mask & ~nodata_mask])
        top50_mask = block_data > 1.4329746715109062
        top50_sum += numpy.sum(block_data[top50_mask & ~nodata_mask])
        top60_mask = block_data > 1.2982949910007597
        top60_sum += numpy.sum(block_data[top60_mask & ~nodata_mask])
        top70_mask = block_data > 0.6784644064412253
        top70_sum += numpy.sum(block_data[top70_mask & ~nodata_mask])
        top80_mask = block_data > 0.06352668319825519
        top80_sum += numpy.sum(block_data[top80_mask & ~nodata_mask])
        top90_mask = block_data > 8.223874317755279e-18
        top90_sum += numpy.sum(block_data[top90_mask & ~nodata_mask])
        nonzero_mask = block_data != 0
        full_sum += numpy.sum(block_data[nonzero_mask & ~nodata_mask])

    print(
        'Pixel sum stats from %s\n'
        '2 pct sum:                      %14.2f\n'
        '5 pct sum:                      %14.2f\n'
        '10 pct sum:                     %14.2f\n'
        '20 pct sum:                     %14.2f\n'
        '30 pct sum:                     %14.2f\n'
        '40 pct sum:                     %14.2f\n'
        '50 pct sum:                     %14.2f\n'
        '60 pct sum:                     %14.2f\n'
        '70 pct sum:                     %14.2f\n'
        '80 pct sum:                     %14.2f\n'
        '90 pct sum:                     %14.2f\n'
        '100 pct sum:                    %14.2f\n' %
        (path, top2_sum, top5_sum, top10_sum, top20_sum, top30_sum, top40_sum,
         top50_sum, top60_sum, top70_sum, top80_sum, top90_sum, full_sum))

    #Pixel sum stats from C:\Users\Becky\Documents\raster_calculations\aggregate_potential_ES_score_nspwpg.tif
    # This layer only had 5 services so isn't fully comparable to realized (no surrogate for non-wood foraged products, which should have just been all natural habitat for potential)
    # # [8.223874317755279e-18, 0.06277088660611055, 0.31905198201749124, 0.43141886583982053, 0.5513050308982201, 0.7021776828519225, 0.8801414329582294, 1.0867488999270096, 1.3572950878165897, 1.5653558772021574, 2.14759821821794, 4.87844488615983]
    #1 pct sum:                         34709125.64
    #5 pct sum:                        135799124.58
    #10 pct sum:                       237768933.24
    #20 pct sum:                       410918702.08
    #30 pct sum:                       549156818.78
    #40 pct sum:                       660915846.65
    #50 pct sum:                       749234063.39
    #60 pct sum:                       818545087.19
    #70 pct sum:                       871880286.70
    #80 pct sum:                       901438219.50
    #90 pct sum:                       903098652.38
    #100 pct sum:                      903098652.38

    return

    # REALIZED
    # [0.0, 0.0, 2.6564152339677546e-05, 0.00449669105901578, 0.026592994668002544, 0.08908325455615322, 0.21252896986988581, 0.4257240946680402, 0.8519801985470177, 1.1987215681382737, 1.54221074228756]
    path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0.tif"
    nodata_value = pygeoprocessing.get_raster_info(path)['nodata'][0]
    top2_sum = 0.0
    top5_sum = 0.0
    top10_sum = 0.0
    top20_sum = 0.0
    top30_sum = 0.0
    top40_sum = 0.0
    top50_sum = 0.0
    top60_sum = 0.0
    top70_sum = 0.0
    top80_sum = 0.0
    top90_sum = 0.0
    full_sum = 0.0

    for _, block_data in pygeoprocessing.iterblocks((path, 1)):
        nodata_mask = numpy.isclose(block_data, nodata_value)
        top2_mask = block_data > 1.54221074228756
        top2_sum += numpy.sum(block_data[top2_mask & ~nodata_mask])
        top5_mask = block_data > 1.1987215681382737
        top5_sum += numpy.sum(block_data[top5_mask & ~nodata_mask])
        top10_mask = block_data > 0.8519801985470177
        top10_sum += numpy.sum(block_data[top10_mask & ~nodata_mask])
        top20_mask = block_data > 0.4257240946680402
        top20_sum += numpy.sum(block_data[top20_mask & ~nodata_mask])
        top30_mask = block_data > 0.21252896986988581
        top30_sum += numpy.sum(block_data[top30_mask & ~nodata_mask])
        top40_mask = block_data > 0.08908325455615322
        top40_sum += numpy.sum(block_data[top40_mask & ~nodata_mask])
        top50_mask = block_data > 0.026592994668002544
        top50_sum += numpy.sum(block_data[top50_mask & ~nodata_mask])
        top60_mask = block_data > 0.00449669105901578
        top60_sum += numpy.sum(block_data[top60_mask & ~nodata_mask])
        top70_mask = block_data > 2.6564152339677546e-05
        top70_sum += numpy.sum(block_data[top70_mask & ~nodata_mask])
        nonzero_mask = block_data != 0
        full_sum += numpy.sum(block_data[nonzero_mask & ~nodata_mask])

    print('Pixel sum stats from %s\n'
          '2.5 pct sum:                    %14.2f\n'
          '5 pct sum:                      %14.2f\n'
          '10 pct sum:                     %14.2f\n'
          '20 pct sum:                     %14.2f\n'
          '30 pct sum:                     %14.2f\n'
          '40 pct sum:                     %14.2f\n'
          '50 pct sum:                     %14.2f\n'
          '60 pct sum:                     %14.2f\n'
          '70 pct sum:                     %14.2f\n'
          '100 pct sum:                    %14.2f\n' %
          (path, top2_sum, top5_sum, top10_sum, top20_sum, top30_sum,
           top40_sum, top50_sum, top60_sum, top70_sum, full_sum))

    #2.5 pct sum:                       77750003.43
    #5 pct sum:                        130085623.90
    #10 pct sum:                       209758688.42
    #20 pct sum:                       304675563.91
    #30 pct sum:                       352506707.61
    #40 pct sum:                       375005156.25
    #50 pct sum:                       383134918.72
    #60 pct sum:                       385359011.24
    #70 pct sum:                       385546722.25
    #100 pct sum:                      385546979.30

    return  # terminates at this point

    #path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0.tif"
    path = r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\masked_nathab_esa_md5_40577bae3ef60519b1043bb8582a07af.tif"
    # gets the nodata value from the first band ([0]) of `path`
    nodata_value = pygeoprocessing.get_raster_info(path)['nodata'][0]
    # loop over all memory blocks of the first band of path (indicated by
    # the (path, 1) tuple, and ignore the second argument from iterblocks that
    # shows what block it is (that's the `_`)
    nonzero_count = 0
    total_pixels = 0
    nodata_count = 0
    running_sum = 0.0
    for _, block_data in pygeoprocessing.iterblocks((path, 1)):
        # we'll use this nodata mask to mask only valid nonzero counts and
        # also to count the number of nodata in the raster
        nodata_mask = numpy.isclose(block_data, nodata_value)
        # make a mask where the raster block is != 0 AND is not equal to a
        # nodata value
        nonzero_mask = block_data != 0
        nonzero_count += numpy.count_nonzero(nonzero_mask & ~nodata_mask)

        # only get the valid numbers for the sum
        running_sum += numpy.sum(block_data[nonzero_mask & ~nodata_mask])

        # count # of nodata pixels
        nodata_count += numpy.count_nonzero(nodata_mask)

        # and count for the total size of the block
        total_pixels += block_data.size

    # this is fine:
    print('Pixel count stats from %s\n'
          'total pixels:                   %11d\n'
          'nonzero non-nodata pixel count: %11d\n'
          'nodata count:                   %11d\n'
          'sum:                            %14.2f\n' %
          (path, total_pixels, nonzero_count, nodata_count, running_sum))

    return

    #print(
    #    'Pixel count stats from %s\n'
    #    'total pixels:                   %11d\n'
    #    'nonzero non-nodata pixel count: %11d\n' % (
    #        path, total_pixels, nonzero_count))

    ## for aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0
    #total pixels:                    6531840000
    #nonzero non-nodata pixel count:  1133004447
    #nodata count:                    5118894498

    ## for masked_nathab_esa_md5_40577bae3ef60519b1043bb8582a07af.tif
    #total pixels:                    8398080000
    #nonzero non-nodata pixel count:  1257421938
    #nodata count:                             0
    #sum:                                      0.00

    #So 1/10 of 1257421938 is 125742194 <-- the number of pixels at this resolution making up 10% of the remaining natural habitat land area
    # For aggregate ES, that corresponds to 125742194/1133004447 is 0.1109812007648722. So if we want the top 11th percentile we need to take the 0.89

    nathab_path = r"C:\Users\Becky\Documents\raster_calculations\CNC_workspace\masked_nathab_esa_md5_40577bae3ef60519b1043bb8582a07af.tif"
    nathab_nodata_value = pygeoprocessing.get_raster_info(
        nathab_path)['nodata'][0]
    nathab_nonzero_count = 0
    for _, nathab_block_data in pygeoprocessing.iterblocks((path, 1)):
        nathab_nodata_mask = numpy.isclose(nathab_block_data,
                                           nathab_nodata_value)
        nathab_nonzero_mask = nathab_block_data != 0
        nathab_nonzero_count += numpy.count_nonzero(nathab_nonzero_mask
                                                    & ~nathab_nodata_mask)

    pct_path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0.tif"
    percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\percentile_working_dir"
    try:
        os.makedirs(percentile_working_dir)
    except OSError:
        pass
    percentile_values_list = pygeoprocessing.raster_band_percentile(
        (pct_path, 1), percentile_working_dir,
        [1, 12, 23, 34, 45, 56, 67, 78, 89, 94.5, 97.25])
    shutil.rmtree(percentile_working_dir)
    print(percentile_values_list)

    # aggregate_realized_ES_score_nspntg_renorm_md5_f788b5b627aa06c4028a2277da9d8dc0
    # [0.0, 0.0, 2.6564152339677546e-05, 0.00449669105901578, 0.026592994668002544, 0.08908325455615322, 0.21252896986988581, 0.4257240946680402, 0.8519801985470177, 1.1987215681382737, 1.54221074228756]

    pct_path = r"C:\Users\Becky\Documents\raster_calculations\aggregate_potential_ES_score_nspwpg.tif"
    percentile_working_dir = r"C:\Users\Becky\Documents\raster_calculations\percentile_working_dir"
    try:
        os.makedirs(percentile_working_dir)
    except OSError:
        pass
    percentile_values_list = pygeoprocessing.raster_band_percentile(
        (pct_path, 1), percentile_working_dir,
        [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 100])
    shutil.rmtree(percentile_working_dir)
    print(percentile_values_list)